npm - pullfrog - Versions diffs - 0.1.5 → 0.1.7 - Mend

pullfrog 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/agents/postRun.d.ts +21 -0
package/dist/agents/sessionLabeler.d.ts +38 -18
package/dist/agents/subagentModels.d.ts +19 -0
package/dist/cli.mjs +678 -278
package/dist/index.js +662 -264
package/dist/internal.js +151 -59
package/dist/models.d.ts +63 -3
package/dist/utils/agent.d.ts +5 -2
package/dist/utils/apiKeys.d.ts +18 -0
package/dist/utils/instructions.d.ts +19 -0
package/dist/utils/learnings.d.ts +20 -9
package/dist/utils/normalizeEnv.d.ts +21 -1
package/dist/utils/runContext.d.ts +16 -0
package/dist/utils/subprocess.d.ts +40 -0
package/dist/utils/timer.d.ts +11 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -19718,10 +19718,10 @@ var require_core = __commonJS({
       (0, command_1.issueCommand)("set-env", { name }, convertedVal);
     }
     exports.exportVariable = exportVariable;
-    function setSecret3(secret) {
+    function setSecret4(secret) {
       (0, command_1.issueCommand)("add-mask", {}, secret);
     }
-    exports.setSecret = setSecret3;
+    exports.setSecret = setSecret4;
     function addPath(inputPath) {
       const filePath = process.env["GITHUB_PATH"] || "";
       if (filePath) {
@@ -47737,7 +47737,7 @@ var require_core3 = __commonJS({
     Object.defineProperty(exports, "__esModule", { value: true });
     var id_1 = require_id();
     var ref_1 = require_ref();
-    var core7 = [
+    var core8 = [
       "$schema",
       "$id",
       "$defs",
@@ -47747,7 +47747,7 @@ var require_core3 = __commonJS({
       id_1.default,
       ref_1.default
     ];
-    exports.default = core7;
+    exports.default = core8;
   }
 });
@@ -98924,7 +98924,7 @@ var require_fast_content_type_parse = __commonJS({
 });
 // main.ts
-var core6 = __toESM(require_core(), 1);
+var core7 = __toESM(require_core(), 1);
 import { existsSync as existsSync7, readdirSync } from "node:fs";
 import { readFile as readFile4 } from "node:fs/promises";
 import { join as join17 } from "node:path";
@@ -107744,7 +107744,8 @@ var providers = {
         displayName: "Claude Opus",
         resolve: "anthropic/claude-opus-4-7",
         openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
-        preferred: true
+        preferred: true,
+        subagentModel: "claude-sonnet"
       },
       "claude-sonnet": {
         displayName: "Claude Sonnet",
@@ -107766,12 +107767,23 @@ var providers = {
         displayName: "GPT",
         resolve: "openai/gpt-5.5",
         openRouterResolve: "openrouter/openai/gpt-5.5",
-        preferred: true
+        preferred: true,
+        subagentModel: "gpt-5.4"
       },
       "gpt-pro": {
         displayName: "GPT Pro",
         resolve: "openai/gpt-5.5-pro",
-        openRouterResolve: "openrouter/openai/gpt-5.5-pro"
+        openRouterResolve: "openrouter/openai/gpt-5.5-pro",
+        subagentModel: "gpt"
+      },
+      // hidden subagent target — `gpt` lenses run against this. surfacing
+      // it in the picker would just confuse users (it's the prior-flagship,
+      // and they already have `gpt` and `gpt-mini` to choose from).
+      "gpt-5.4": {
+        displayName: "GPT 5.4",
+        resolve: "openai/gpt-5.4",
+        openRouterResolve: "openrouter/openai/gpt-5.4",
+        hidden: true
       },
       "gpt-mini": {
         displayName: "GPT Mini",
@@ -107809,7 +107821,8 @@ var providers = {
         displayName: "Gemini Pro",
         resolve: "google/gemini-3.1-pro-preview",
         openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
-        preferred: true
+        preferred: true,
+        subagentModel: "gemini-flash"
       },
       "gemini-flash": {
         displayName: "Gemini Flash",
@@ -107897,7 +107910,8 @@ var providers = {
       "claude-opus": {
         displayName: "Claude Opus",
         resolve: "opencode/claude-opus-4-7",
-        openRouterResolve: "openrouter/anthropic/claude-opus-4.7"
+        openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
+        subagentModel: "claude-sonnet"
       },
       "claude-sonnet": {
         displayName: "Claude Sonnet",
@@ -107912,12 +107926,21 @@ var providers = {
       gpt: {
         displayName: "GPT",
         resolve: "opencode/gpt-5.5",
-        openRouterResolve: "openrouter/openai/gpt-5.5"
+        openRouterResolve: "openrouter/openai/gpt-5.5",
+        subagentModel: "gpt-5.4"
       },
       "gpt-pro": {
         displayName: "GPT Pro",
         resolve: "opencode/gpt-5.5-pro",
-        openRouterResolve: "openrouter/openai/gpt-5.5-pro"
+        openRouterResolve: "openrouter/openai/gpt-5.5-pro",
+        subagentModel: "gpt"
+      },
+      // hidden subagent target — see openai provider above for context.
+      "gpt-5.4": {
+        displayName: "GPT 5.4",
+        resolve: "opencode/gpt-5.4",
+        openRouterResolve: "openrouter/openai/gpt-5.4",
+        hidden: true
       },
       "gpt-mini": {
         displayName: "GPT Mini",
@@ -107940,7 +107963,8 @@ var providers = {
       "gemini-pro": {
         displayName: "Gemini Pro",
         resolve: "opencode/gemini-3.1-pro",
-        openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
+        openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
+        subagentModel: "gemini-flash"
       },
       "gemini-flash": {
         displayName: "Gemini Flash",
@@ -107972,6 +107996,20 @@ var providers = {
       }
     }
   }),
+  bedrock: provider({
+    displayName: "Amazon Bedrock",
+    envVars: ["AWS_BEARER_TOKEN_BEDROCK", "AWS_REGION", "BEDROCK_MODEL_ID"],
+    models: {
+      // single routing entry — the actual Bedrock model ID is read from
+      // BEDROCK_MODEL_ID at run time. see ModelRouting docs for why we
+      // don't catalog individual Bedrock models.
+      byok: {
+        displayName: "Amazon Bedrock",
+        resolve: "bedrock",
+        routing: "bedrock"
+      }
+    }
+  }),
   openrouter: provider({
     displayName: "OpenRouter",
     envVars: ["OPENROUTER_API_KEY"],
@@ -107980,7 +108018,8 @@ var providers = {
         displayName: "Claude Opus",
         resolve: "openrouter/anthropic/claude-opus-4.7",
         openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
-        preferred: true
+        preferred: true,
+        subagentModel: "claude-sonnet"
       },
       "claude-sonnet": {
         displayName: "Claude Sonnet",
@@ -107995,12 +108034,21 @@ var providers = {
       gpt: {
         displayName: "GPT",
         resolve: "openrouter/openai/gpt-5.5",
-        openRouterResolve: "openrouter/openai/gpt-5.5"
+        openRouterResolve: "openrouter/openai/gpt-5.5",
+        subagentModel: "gpt-5.4"
       },
       "gpt-pro": {
         displayName: "GPT Pro",
         resolve: "openrouter/openai/gpt-5.5-pro",
-        openRouterResolve: "openrouter/openai/gpt-5.5-pro"
+        openRouterResolve: "openrouter/openai/gpt-5.5-pro",
+        subagentModel: "gpt"
+      },
+      // hidden subagent target — see openai provider above for context.
+      "gpt-5.4": {
+        displayName: "GPT 5.4",
+        resolve: "openrouter/openai/gpt-5.4",
+        openRouterResolve: "openrouter/openai/gpt-5.4",
+        hidden: true
       },
       "gpt-mini": {
         displayName: "GPT Mini",
@@ -108028,7 +108076,8 @@ var providers = {
       "gemini-pro": {
         displayName: "Gemini Pro",
         resolve: "openrouter/google/gemini-3.1-pro-preview",
-        openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
+        openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
+        subagentModel: "gemini-flash"
       },
       "gemini-flash": {
         displayName: "Gemini Flash",
@@ -108097,7 +108146,13 @@ var modelAliases = Object.entries(providers).flatMap(
     openRouterResolve: def.openRouterResolve,
     preferred: def.preferred ?? false,
     isFree: def.isFree ?? false,
-    fallback: def.fallback
+    fallback: def.fallback,
+    routing: def.routing,
+    // subagentModel is stored as an alias key local to the provider; expand
+    // here to a fully-qualified slug so callers can look up the target alias
+    // directly without re-deriving the provider.
+    subagentModel: def.subagentModel ? `${providerKey}/${def.subagentModel}` : void 0,
+    hidden: def.hidden ?? false
   }))
 );
 var MAX_FALLBACK_DEPTH = 10;
@@ -108117,6 +108172,10 @@ function resolveDisplayAlias(slug2) {
 function resolveCliModel(slug2) {
   return resolveDisplayAlias(slug2)?.resolve;
 }
+var BEDROCK_MODEL_ID_ENV = "BEDROCK_MODEL_ID";
+function isBedrockAnthropicId(bedrockModelId) {
+  return bedrockModelId.toLowerCase().split(/[./:]/).includes("anthropic");
+}
 // utils/buildPullfrogFooter.ts
 var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
@@ -108964,7 +109023,7 @@ var Comment = type({
 function CreateCommentTool(ctx) {
   return tool({
     name: "create_issue_comment",
-    description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments.",
+    description: "Create a comment on a GitHub issue or PR. Example: `create_issue_comment({ issueNumber: 1234, body: \"Thanks for the report.\" })`. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments.",
     parameters: Comment,
     execute: execute(async ({ issueNumber, body, type: commentType }) => {
       const bodyWithFooter = addFooter(ctx, body);
@@ -109132,7 +109191,7 @@ async function reportProgress(ctx, params) {
 function ReportProgressTool(ctx) {
   return tool({
     name: "report_progress",
-    description: "Share progress on the associated GitHub issue/PR. The first call creates a comment; subsequent calls update it in place. Call this at the end of every run with a brief final summary (1-3 sentences) unless the mode guidance instructs otherwise. The current task list is automatically appended in a collapsible section \u2014 do not restate individual steps.",
+    description: 'Share progress on the associated GitHub issue/PR. The first call creates a comment; subsequent calls update it in place. Example: `report_progress({ body: "Implemented the auth check and added tests." })`. Call this at the end of every run with a brief final summary (1-3 sentences) unless the mode guidance instructs otherwise. The current task list is automatically appended in a collapsible section \u2014 do not restate individual steps.',
     parameters: ReportProgress,
     execute: execute(async (params) => {
       let body = params.body;
@@ -109212,7 +109271,7 @@ function duplicateReplyDecision(params) {
 function ReplyToReviewCommentTool(ctx) {
   return tool({
     name: "reply_to_review_comment",
-    description: "Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Call exactly ONCE per parent comment you address in AddressReviews mode \u2014 duplicate calls with the same body are a no-op. Keep replies extremely brief (1 sentence max).",
+    description: 'Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Example: `reply_to_review_comment({ pull_number: 1234, comment_id: 567890, body: "Fixed by adding a null check." })`. Call exactly ONCE per parent comment you address in AddressReviews mode \u2014 duplicate calls with the same body are a no-op. Keep replies extremely brief (1 sentence max).',
     parameters: ReplyToReviewComment,
     execute: execute(async ({ pull_number, comment_id, body }) => {
       const bodyWithFooter = addFooter(ctx, body);
@@ -109742,12 +109801,41 @@ function installSignalHandler() {
     killTrackedChildren();
   });
 }
+var DEFAULT_MAX_RETAINED_BYTES = 8 * 1024 * 1024;
+var TailBuffer = class {
+  // explicit field declarations rather than constructor parameter properties:
+  // node's strip-only TS loader (used by action/test/run.ts in CI) rejects
+  // `constructor(private readonly cap: number)` with ERR_UNSUPPORTED_TYPESCRIPT_SYNTAX.
+  cap;
+  buffer = "";
+  truncatedBytes = 0;
+  constructor(cap) {
+    this.cap = cap;
+  }
+  append(chunk) {
+    if (this.cap <= 0) return;
+    this.buffer += chunk;
+    if (this.buffer.length > this.cap) {
+      const drop = this.buffer.length - this.cap;
+      this.truncatedBytes += drop;
+      this.buffer = this.buffer.slice(drop);
+    }
+  }
+  toString() {
+    if (this.truncatedBytes === 0) return this.buffer;
+    const mib = (this.truncatedBytes / 1024 / 1024).toFixed(1);
+    return `... [${mib} MiB truncated by retain:tail cap] ...
+${this.buffer}`;
+  }
+};
 async function spawn(options) {
   const activityTimeoutMs = options.activityTimeout ?? DEFAULT_ACTIVITY_TIMEOUT_MS;
   installSignalHandler();
   const startTime = performance3.now();
-  let stdoutBuffer = "";
-  let stderrBuffer = "";
+  const retain = options.retain ?? "tail";
+  const cap = options.maxRetainedBytes ?? DEFAULT_MAX_RETAINED_BYTES;
+  const stdoutBuffer = retain === "none" ? null : new TailBuffer(cap);
+  const stderrBuffer = retain === "none" ? null : new TailBuffer(cap);
   const killGroup = options.killGroup ?? false;
   return new Promise((resolve3, reject) => {
     const child = nodeSpawn(options.cmd, options.args, {
@@ -109821,17 +109909,29 @@ async function spawn(options) {
     }
     if (child.stdout) {
       child.stdout.on("data", (data) => {
-        updateActivity();
-        const chunk = data.toString();
-        stdoutBuffer += chunk;
-        options.onStdout?.(chunk);
+        try {
+          updateActivity();
+          const chunk = data.toString();
+          stdoutBuffer?.append(chunk);
+          options.onStdout?.(chunk);
+        } catch (err) {
+          log.debug(
+            `spawn stdout handler threw: ${err instanceof Error ? err.message : String(err)}`
+          );
+        }
       });
     }
     if (child.stderr) {
       child.stderr.on("data", (data) => {
-        const chunk = data.toString();
-        stderrBuffer += chunk;
-        options.onStderr?.(chunk);
+        try {
+          const chunk = data.toString();
+          stderrBuffer?.append(chunk);
+          options.onStderr?.(chunk);
+        } catch (err) {
+          log.debug(
+            `spawn stderr handler threw: ${err instanceof Error ? err.message : String(err)}`
+          );
+        }
       });
     }
     child.on("close", (exitCode, signal) => {
@@ -109858,7 +109958,7 @@ async function spawn(options) {
         return;
       }
       let resolvedExitCode = exitCode ?? 0;
-      let resolvedStderr = stderrBuffer;
+      let resolvedStderr = stderrBuffer?.toString() ?? "";
       if (exitCode === null && signal) {
         const killMsg = `[spawn] ${options.cmd}: killed by signal ${signal}`;
         resolvedStderr = resolvedStderr ? `${resolvedStderr}
@@ -109866,7 +109966,7 @@ ${killMsg}` : killMsg;
         resolvedExitCode = 1;
       }
       resolve3({
-        stdout: stdoutBuffer,
+        stdout: stdoutBuffer?.toString() ?? "",
         stderr: resolvedStderr,
         exitCode: resolvedExitCode,
         durationMs
@@ -109880,11 +109980,12 @@ ${killMsg}` : killMsg;
       if (activityCheckIntervalId) clearInterval(activityCheckIntervalId);
       const errMsg = `[spawn] ${options.cmd}: ${error49.message}`;
       console.error(errMsg);
-      stderrBuffer = stderrBuffer ? `${stderrBuffer}
+      const existingStderr = stderrBuffer?.toString() ?? "";
+      const finalStderr = existingStderr ? `${existingStderr}
 ${errMsg}` : errMsg;
       resolve3({
-        stdout: stdoutBuffer,
-        stderr: stderrBuffer,
+        stdout: stdoutBuffer?.toString() ?? "",
+        stderr: finalStderr,
         exitCode: 1,
         durationMs
       });
@@ -137793,7 +137894,7 @@ var require_core4 = /* @__PURE__ */ __commonJSMin(((exports) => {
   Object.defineProperty(exports, "__esModule", { value: true });
   const id_1 = require_id2();
   const ref_1 = require_ref2();
-  const core7 = [
+  const core8 = [
     "$schema",
     "$id",
     "$defs",
@@ -137803,7 +137904,7 @@ var require_core4 = /* @__PURE__ */ __commonJSMin(((exports) => {
     id_1.default,
     ref_1.default
   ];
-  exports.default = core7;
+  exports.default = core8;
 }));
 var require_limitNumber2 = /* @__PURE__ */ __commonJSMin(((exports) => {
   Object.defineProperty(exports, "__esModule", { value: true });
@@ -142313,7 +142414,7 @@ var import_semver = __toESM(require_semver2(), 1);
 // package.json
 var package_default = {
   name: "pullfrog",
-  version: "0.1.5",
+  version: "0.1.7",
   type: "module",
   bin: {
     pullfrog: "dist/cli.mjs",
@@ -143169,7 +143270,7 @@ function PushBranchTool(ctx) {
   const pushPermission = ctx.payload.push;
   return tool({
     name: "push_branch",
-    description: "Push the current branch to the remote repository. Omit branchName to push the current branch (recommended). If specifying branchName, use the LOCAL branch name (e.g., 'pr-1'), not the remote branch name. The correct remote and remote branch are determined automatically from branch config set by checkout_pr. Requires a clean working tree. Runs the repository prepush hook (if configured) before the network push \u2014 hook failure means tests/lint or similar in that script failed, not necessarily a Pullfrog timeout. Never force push unless explicitly requested. Pushes to the default branch are blocked in restricted mode.",
+    description: "Push the current branch to the remote repository. Omit branchName to push the current branch (recommended). Example: `push_branch({})` to push the current branch. Example: `push_branch({ branchName: \"pr-1\" })` to push a specific local branch. If specifying branchName, use the LOCAL branch name (e.g., 'pr-1'), not the remote branch name. The correct remote and remote branch are determined automatically from branch config set by checkout_pr. Requires a clean working tree. Runs the repository prepush hook (if configured) before the network push \u2014 hook failure means tests/lint or similar in that script failed, not necessarily a Pullfrog timeout. Never force push unless explicitly requested. Pushes to the default branch are blocked in restricted mode. If the response reports a timeout, the underlying push may have actually succeeded \u2014 verify with `git log origin/<branch>` (or this tool with command 'log') before retrying, otherwise you'll push a duplicate.",
     parameters: PushBranch,
     execute: execute(async ({ branchName, force }) => {
       if (pushPermission === "disabled") {
@@ -143308,7 +143409,7 @@ var Git = type({
 function GitTool(ctx) {
   return tool({
     name: "git",
-    description: "Run git commands. For push/fetch, use the dedicated MCP tools (push_branch, git_fetch). git pull is not available \u2014 use git_fetch then this tool with command 'merge'.",
+    description: 'Run a git subcommand. `command` is a single subcommand; flags and positional args go in `args`. Example: `git({ command: "log", args: ["--oneline", "-n", "20"] })`. Example: `git({ command: "diff", args: ["origin/main..HEAD"] })`. For push/fetch, use the dedicated MCP tools (push_branch, git_fetch). git pull is not available \u2014 use git_fetch then this tool with command \'merge\'.',
     parameters: Git,
     execute: execute(async (params) => {
       const command = params.command;
@@ -143358,7 +143459,7 @@ var DEEPEN_RETRY_DEPTH = 1e3;
 function GitFetchTool(ctx) {
   return tool({
     name: "git_fetch",
-    description: "Fetch refs from remote repository. Use this instead of git fetch directly.",
+    description: 'Fetch refs from remote repository. Use this instead of git fetch directly. Example: `git_fetch({ ref: "main" })`. With depth: `git_fetch({ ref: "pull/1234/head", depth: 1 })`.',
     parameters: GitFetch,
     execute: execute(async (params) => {
       rejectIfLeadingDash(params.ref, "ref");
@@ -143592,13 +143693,15 @@ var CreatePullRequestReview = type({
   approved: type.boolean.describe(
     "Set to true to submit as an approval. Use for both 'no issues found' and informational `> [!NOTE]` reviews where the PR is mergeable as-is and nothing in the body warrants code changes \u2014 approving also suppresses the Fix-button footer affordance so users don't dispatch a fix run on non-actionable feedback. Reserve approved: false for `> [!IMPORTANT]` (recommended changes) and `> [!CAUTION]` (critical) reviews. Defaults to false (comment-only review). Rejections are not supported."
   ).optional(),
-  commit_id: type.string.describe("Optional SHA of the commit being reviewed. Defaults to latest.").optional(),
+  commit_id: type.string.describe(
+    "Optional SHA of the commit being reviewed. Defaults to latest. Must be the FULL 40-character SHA \u2014 abbreviated SHAs are rejected by GitHub with `422 Unprocessable Entity`. The PR-synchronize event payload's `head_sha` is already full-length."
+  ).optional(),
   comments: type({
     path: type.string.describe(
       "The file path to comment on (relative to repo root). Must be a file that appears in the PR diff."
     ),
     line: type.number.describe(
-      "Line number to comment on. For multi-line ranges, this is the end line. Use NEW column from diff format."
+      "Line number to comment on. For multi-line ranges, this is the end line. Use NEW column from diff format. Must sit inside a `@@` hunk in the PR diff \u2014 anchors on context-only or untouched lines are dropped silently (the rest of the review still posts; dropped entries are reported under `droppedComments` in the response)."
     ),
     side: type.enumerated("LEFT", "RIGHT").describe(
       "Side of the diff: LEFT (old code, lines starting with -) or RIGHT (new code, lines starting with + or unchanged). Defaults to RIGHT."
@@ -143608,7 +143711,7 @@ var CreatePullRequestReview = type({
       "Full replacement code for the line range [start_line, line]. MUST preserve the exact indentation of the original code."
     ).optional(),
     start_line: type.number.describe(
-      "Start line for multi-line comment ranges. Omit for single-line comments. The range [start_line, line] defines which lines a suggestion replaces."
+      "Start line for multi-line comment ranges. Omit for single-line comments. The range [start_line, line] defines which lines a suggestion replaces. Both `start_line` and `line` must sit inside the same `@@` hunk \u2014 a `start_line` outside the hunk causes the whole comment to be dropped even when `line` is valid. If you need to comment on context just above/below a hunk, shrink the range to a single line that is provably modified."
     ).optional()
   }).array().describe(
     "Inline comments on lines within diff hunks. Feedback about code outside the diff goes in 'body' instead."
@@ -143617,7 +143720,7 @@ var CreatePullRequestReview = type({
 function CreatePullRequestReviewTool(ctx) {
   return tool({
     name: "create_pull_request_review",
-    description: `Submit a review for an existing pull request. Each call creates a permanent, visible review on the PR \u2014 NEVER submit test or diagnostic reviews. Reviews with no body AND no comments are silently skipped (nothing to post). IMPORTANT: 95%+ of feedback should be in 'comments' array with file paths and line numbers. Only use 'body' for a 1-2 sentence summary with urgency and critical callouts. Use 'suggestion' to propose replacement code - MUST preserve exact indentation of original code. The first submission may error once with a one-time diff-coverage nudge listing unread TOC regions \u2014 retry with the same arguments and the pre-flight will not block again. Example replacing lines 42-44 (3 lines) with 5 lines: { path: 'src/api.ts', start_line: 42, line: 44, suggestion: '    const result = await fetch(url);\\n    if (!result.ok) {\\n      log.error(result.status);\\n      throw new Error("request failed");\\n    }' } CONSTRAINT: Inline comments can ONLY target files and lines that appear in the PR diff. Comments anchored outside a diff hunk are dropped automatically (with a note appended to the review body) \u2014 the rest of the review still posts.`,
+    description: `Submit a review for an existing pull request. Example: \`create_pull_request_review({ pull_number: 1234, body: "LGTM", approved: true, comments: [{ path: "src/api.ts", line: 42, body: "nit: rename" }] })\`. Each call creates a permanent, visible review on the PR \u2014 NEVER submit test or diagnostic reviews. Reviews with no body AND no comments are silently skipped (nothing to post). IMPORTANT: 95%+ of feedback should be in 'comments' array with file paths and line numbers. Only use 'body' for a 1-2 sentence summary with urgency and critical callouts. Use 'suggestion' to propose replacement code - MUST preserve exact indentation of original code. The first submission may error once with a one-time diff-coverage nudge listing unread TOC regions \u2014 retry with the same arguments and the pre-flight will not block again. Example replacing lines 42-44 (3 lines) with 5 lines: { path: 'src/api.ts', start_line: 42, line: 44, suggestion: '    const result = await fetch(url);\\n    if (!result.ok) {\\n      log.error(result.status);\\n      throw new Error("request failed");\\n    }' } CONSTRAINT: Inline comments can ONLY target files and lines that appear in the PR diff. Comments anchored outside a diff hunk are dropped automatically (with a note appended to the review body) \u2014 the rest of the review still posts.`,
     parameters: CreatePullRequestReview,
     execute: execute(async ({ pull_number, body, approved, commit_id, comments = [] }) => {
       if (body) body = fixDoubleEscapedString(body);
@@ -143846,7 +143949,7 @@ function runDiffCoveragePreflight(params) {
   );
   const unreadText = unread.map((entry) => `- ${entry.path} (${entry.unreadLines} lines, ${entry.ranges})`).join("\n");
   throw new Error(
-    `diff coverage pre-flight: some TOC regions were not read before review submission. this is a one-time nudge \u2014 optionally read the ranges below from ${coverageState.diffPath}, then call create_pull_request_review again with the same arguments. this pre-flight will not block again in this review session.
+    `diff coverage pre-flight: some TOC regions were not read before review submission. this is a one-time nudge \u2014 read the ranges below from ${coverageState.diffPath} on a best-effort basis, then call create_pull_request_review again. you are NOT obligated to read generated artifacts (lockfiles like pnpm-lock.yaml / package-lock.json / yarn.lock / Cargo.lock; codegen output like *.gen.*, *.pb.go, *.generated.*; snapshot/fixture dirs like __snapshots__/; migration metadata like drizzle/meta/, prisma migration SQL). if every unread region is generated, retry immediately without reading. this pre-flight will not block again in this review session.
 unread TOC regions:
 ${unreadText}
@@ -144293,7 +144396,7 @@ async function checkoutPrBranch(pr, params) {
 function CheckoutPrTool(ctx) {
   return tool({
     name: "checkout_pr",
-    description: "Checkout a pull request branch locally. This fetches the PR branch and sets up push configuration for fork PRs. Returns diffPath pointing to the formatted diff file.",
+    description: "Checkout a pull request branch locally. This fetches the PR branch and sets up push configuration for fork PRs. Returns diffPath pointing to the formatted diff file. Example: `checkout_pr({ pull_number: 1234 })`. Transient fetch timeouts are common \u2014 retry the same call up to a few times before treating the failure as terminal. If the error mentions `.git/shallow.lock: File exists` or `.git/index.lock: File exists`, that's a stale lock from a prior timed-out fetch \u2014 remove it via the shell tool (`rm -f .git/shallow.lock .git/index.lock`) and retry.",
     parameters: CheckoutPr,
     execute: execute(async ({ pull_number }) => {
       const prResponse = await ctx.octokit.rest.pulls.get({
@@ -144604,7 +144707,7 @@ var CommitInfo = type({
 function CommitInfoTool(ctx) {
   return tool({
     name: "get_commit_info",
-    description: "Retrieve commit metadata and diff via GitHub API. Use this instead of git show for reviewing commits - it works with shallow clones and shows the actual changes in the commit. Returns diffPath pointing to formatted diff file.",
+    description: 'Retrieve commit metadata and diff via GitHub API. Use this instead of git show for reviewing commits - it works with shallow clones and shows the actual changes in the commit. Returns diffPath pointing to formatted diff file. Example: `get_commit_info({ sha: "2a6ab5d" })`.',
     parameters: CommitInfo,
     execute: execute(async ({ sha }) => {
       const response = await ctx.octokit.rest.repos.getCommit({
@@ -144695,7 +144798,7 @@ var GetIssueComments = type({
 function GetIssueCommentsTool(ctx) {
   return tool({
     name: "get_issue_comments",
-    description: "Get all comments for a GitHub issue. Returns all comments including the issue body and all subsequent discussion comments.",
+    description: "Get all comments for a GitHub issue. Returns all comments including the issue body and all subsequent discussion comments. Example: `get_issue_comments({ issue_number: 1234 })`.",
     parameters: GetIssueComments,
     execute: execute(async ({ issue_number }) => {
       ctx.toolState.issueNumber = issue_number;
@@ -144796,7 +144899,7 @@ var IssueInfo = type({
 function IssueInfoTool(ctx) {
   return tool({
     name: "get_issue",
-    description: "Retrieve GitHub issue information by issue number",
+    description: "Retrieve GitHub issue information by issue number. Example: `get_issue({ issue_number: 1234 })`.",
     parameters: IssueInfo,
     execute: execute(async ({ issue_number }) => {
       const issue3 = await ctx.octokit.rest.issues.get({
@@ -145038,7 +145141,7 @@ var PullRequestInfo = type({
 function PullRequestInfoTool(ctx) {
   return tool({
     name: "get_pull_request",
-    description: "Retrieve PR metadata (title, body, state, branches, author, labels, linked issues). To checkout a PR branch locally, use checkout_pr instead.",
+    description: "Retrieve PR metadata (title, body, state, branches, author, labels, linked issues). Example: `get_pull_request({ pull_number: 1234 })`. To checkout a PR branch locally, use checkout_pr instead.",
     parameters: PullRequestInfo,
     execute: execute(async ({ pull_number }) => {
       const [restResponse, graphqlResponse] = await Promise.all([
@@ -145442,7 +145545,7 @@ async function getReviewData(input) {
 function GetReviewCommentsTool(ctx) {
   return tool({
     name: "get_review_comments",
-    description: "Get review comments for a pull request review with full thread context. Automatically filters to approved comments when applicable. Returns a TOC and commentsPath pointing to a markdown file with full comment details.",
+    description: "Get review comments for a pull request review with full thread context. Example: `get_review_comments({ pull_number: 1234, review_id: 567890 })`. Automatically filters to approved comments when applicable. Returns a TOC and commentsPath pointing to a markdown file with full comment details.",
     parameters: GetReviewComments,
     execute: execute(async (params) => {
       const approvedBy = ctx.payload.event.trigger === "fix_review" && ctx.payload.event.approved_only ? ctx.payload.triggerer : void 0;
@@ -145492,7 +145595,7 @@ var ListPullRequestReviews = type({
 function ListPullRequestReviewsTool(ctx) {
   return tool({
     name: "list_pull_request_reviews",
-    description: "List all reviews for a pull request. Returns all reviews including approvals, request changes, and comments.",
+    description: "List all reviews for a pull request. Returns all reviews including approvals, request changes, and comments. Example: `list_pull_request_reviews({ pull_number: 1234 })`.",
     parameters: ListPullRequestReviews,
     execute: execute(async (params) => {
       const reviews = await ctx.octokit.paginate(ctx.octokit.rest.pulls.listReviews, {
@@ -145642,7 +145745,7 @@ function SelectModeTool(ctx) {
   const overrides = buildModeOverrides(t);
   return tool({
     name: "select_mode",
-    description: "Select a mode and receive step-by-step guidance on how to handle the task. Call this to understand the best workflow for the current mode.",
+    description: 'Select a mode and receive step-by-step guidance on how to handle the task. Call this to understand the best workflow for the current mode. Example: `select_mode({ mode: "Review" })` or `select_mode({ mode: "Plan", issue_number: 1234 })`.',
     parameters: SelectModeParams,
     execute: execute(async (params) => {
       if (ctx.toolState.selectedMode) {
@@ -145703,7 +145806,9 @@ import { setTimeout as sleep2 } from "node:timers/promises";
 var ShellParams = type({
   command: "string",
   description: "string",
-  "timeout?": "number",
+  "timeout?": type.number.describe(
+    "Timeout in MILLISECONDS (not seconds). Default 30000 (30s), max 120000 (2m). e.g. timeout: 180000 for 3 minutes; timeout: 180 means 180ms and will kill the process almost immediately."
+  ),
   "working_directory?": "string",
   "background?": "boolean"
 });
@@ -145822,6 +145927,8 @@ function ShellTool(ctx) {
     name: "shell",
     description: `Execute shell commands securely. Environment is filtered to remove API keys and secrets.
+Example: \`shell({ command: "pnpm test", description: "run the test suite" })\`.
 Use this tool to:
 - Run shell commands (ls, cat, grep, find, etc.)
 - Execute build tools (npm, pnpm, cargo, make, etc.)
@@ -146339,18 +146446,24 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
    - resolve addressed threads via \`${t("resolve_review_thread")}\`
    - call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)`
     },
-    // Review and IncrementalReview use the multi-lens orchestrator pattern
-    // (canonical source: .claude/commands/anneal.md). The orchestrator does
-    // triage → parallel read-only subagent fan-out → aggregate → draft comments
-    // → submit. For someone else's PR, parallel lenses (correctness, security,
-    // research-validated claims, user-journey, etc.) provide breadth across
-    // angles that a single subagent can't carry coherently. Build mode keeps
-    // a single fresh-eyes subagent (different problem shape — orchestrator
-    // wrote the code and bias-mitigation comes from delegating to one
-    // subagent that doesn't share the implementation context).
-    // Deliberate omission vs canonical /anneal: severity categorization in the
-    // final message (the review body has its own CAUTION/IMPORTANT framing
-    // instead of a severity table).
+    // Review and IncrementalReview use a 0-or-2+ lens pattern. The default is
+    // 0 lenses (orchestrator handles the review solo). Multi-lens (2+
+    // reviewfrog subagents in parallel) only fires for substantive PRs or
+    // high-stakes-subsystem touches — and when it fires, ALL lenses must
+    // dispatch in a single assistant turn or the parallelism win disappears.
+    // We never dispatch exactly one lens: a single lens is just a worse,
+    // slower version of doing the work yourself.
+    //
+    // Build mode self-review is a different problem shape: the orchestrator
+    // wrote the code, so bias-mitigation comes from delegating to one
+    // fresh-eyes subagent that doesn't share the implementation context. A
+    // single subagent there is appropriate; the 0-or-2+ rule applies only to
+    // the Review/IncrementalReview lens fan-out where independence between
+    // perspectives is what's being purchased.
+    //
+    // Deliberate omission vs canonical /anneal: severity categorization in
+    // the final message (the review body has its own CAUTION/IMPORTANT
+    // framing instead of a severity table).
     {
       name: "Review",
       description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
@@ -146360,9 +146473,9 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
 2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
-3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
+3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). pull as much context as you need to render a confident, well-grounded review: read related files, grep for callers of changed symbols, check tests that exercise the touched paths, fetch related GitHub state. **you are the synthesizer** \u2014 never delegate understanding to subagents.
-   if the PR is **genuinely trivial**, skip steps 4\u20135 entirely and submit a \`No new issues found.\` review per step 6. there's no value in dispatching even one lens for a typo.
+   if the PR is **genuinely trivial**, skip the fan-out entirely and submit a \`No new issues found.\` review per step 7.
    "Genuinely trivial" (skip):
    - single-word doc typo, whitespace/format-only, comment-only across any number of files
@@ -146381,23 +146494,25 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
    - any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
    - mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
-   When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
+4. **lens decision \u2014 0 or 2+, NEVER 1**.
-   otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
+   The default is **0 lenses**: handle the review yourself end-to-end. Most PRs land here.
-   - **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
-   - **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
-   - **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
-   - **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
+   Dispatch **2+ \`${REVIEWER_AGENT_NAME}\` lenses in parallel** ONLY when ALL of the following are true:
+   - the PR is substantive (>5 files changed AND >200 net lines), OR touches a high-stakes subsystem (auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling)
+   - you can name 2+ distinct concrete failure modes that warrant independent lenses (one lens per failure mode; orthogonal, not overlapping)
+   - parallel-orchestrated independent perspectives meaningfully outperform what you'd find solo
-   lenses come in two flavors, and you can mix them:
+   **NEVER dispatch exactly one lens.** A single lens is just a more expensive version of doing the work yourself with a worse model \u2014 it adds wall time and a context-handoff for no orthogonality benefit. Either you have at least two genuinely independent failure-mode hypotheses (dispatch all in one turn), or you don't (do the review yourself).
+   When you do go multi-lens, lens framings come in two flavors:
    - **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
-   - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
+   - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
    starter menu (combine, omit, or invent your own):
    - **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
-   - **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
-   - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
+   - **impact** \u2014 stale references in code/tests/docs/configs/UI after rename/remove
+   - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. **only pick when the PR's correctness depends on the contract behaving a specific way** \u2014 not when the API is merely used. The bar is "if the third-party contract differs from what the diff assumes, the PR is incorrect." When dispatched, the subagent must verify load-bearing claims via web search and quote source URLs.
    - **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
    - **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
    - **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
@@ -146407,26 +146522,36 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
    - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
    - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
-4. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
+   The only subagent type is \`${REVIEWER_AGENT_NAME}\` \u2014 used for lens judgment work ("is this safe / correct / well-tested?"), runs on a mid-tier model.
+5. **fan out (only if step 4 said 2+ lenses)**: dispatch every \`${REVIEWER_AGENT_NAME}\` subagent for this run **IN A SINGLE ASSISTANT TURN, AS MULTIPLE PARALLEL TASK TOOL_USE BLOCKS IN ONE MESSAGE.**
+   \u26A0\uFE0F  CRITICAL \u2014 PARALLELISM IS THE ONLY REASON LENSES EXIST. \u26A0\uFE0F
+   The default tool-call behavior of Claude Code (and most agent runtimes) is **serial dispatch**: emit one Task call, await result, emit next, await, etc. This collapses your fan-out into a sequential review where each lens adds N \xD7 (orchestrator-think-time + lens-execution-time) to wall time. **YOU MUST OVERRIDE THIS DEFAULT.** Emit ALL of your Task tool_use blocks in the SAME assistant message, BEFORE you read ANY result from ANY of them. If you find yourself emitting one Task call, then thinking about the result, then emitting another \u2014 STOP and re-issue them all together. The whole point of going multi-lens is the wall-clock speedup from parallel execution; serial dispatch defeats it entirely.
+   \u2705 Right pattern: one assistant turn with N Task tool_use blocks \u2192 wait \u2192 N results arrive together \u2192 aggregate.
+   \u274C Wrong pattern: turn 1 = Task(lens A) \u2192 turn 2 (after A's result) = Task(lens B) \u2192 turn 3 (after B's result) = Task(lens C). This is the failure mode. Do not do this.
+   You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches \u2014 concurrent context-pulling on the orchestrator side runs in parallel with the lens fan-out and costs zero extra wall time.
+   if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip the fan-out entirely on a single subagent failure. each subagent gets:
    - the diff path / target \u2014 reading the diff and the codebase is its job
    - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
    - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
-   - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
    - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
    - ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
    delegation discipline:
-   - do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
    - do NOT summarize the PR for them (biases toward a validation frame)
    - do NOT hand them a curated reading list (let them discover scope)
    - do NOT pre-shape their output with a finding schema
    - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
-5. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
+6. **aggregate & draft**: when the fan-out lands, merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
    for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
-6. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
+7. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
    note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
@@ -146454,10 +146579,10 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
 ${PR_SUMMARY_FORMAT}`
     },
-    // IncrementalReview shares Review's multi-lens orchestrator pattern but
-    // scopes the target to the incremental diff. The "issues must be NEW
-    // since the last Pullfrog review" filter lives at aggregation time
-    // (step 6), NOT in the subagent prompt — pushing the filter into
+    // IncrementalReview shares Review's 0-or-2+ lens pattern but scopes the
+    // target to the incremental diff. The "issues must be NEW since the last
+    // Pullfrog review" filter lives at aggregation time (step 8), NOT in the
+    // subagent prompt — pushing the filter into
     // subagents matches the canonical anneal anti-pattern of "list known
     // pre-existing failures — don't flag these" and suppresses signal on
     // regressions the new commits amplified. The review body is just
@@ -146476,38 +146601,57 @@ ${PR_SUMMARY_FORMAT}`
 3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
-4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 6 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
+4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 8 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
-5. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
+5. **triage**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces. pull as much context as you need to render a confident review: read related files, grep for callers of changed symbols, check tests that exercise the touched paths. **you are the synthesizer.**
-   if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 8's non-substantive path (do NOT submit a review).
+   if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 10's non-substantive path (do NOT submit a review).
    "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
    "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
    When unsure, treat as non-trivial.
-   otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
+6. **lens decision \u2014 0 or 2+, NEVER 1**.
-   dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 5 entirely on a single subagent failure. each subagent gets:
-   - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 6), not in the subagent prompt
+   The default is **0 lenses**: handle the re-review yourself end-to-end. Most incremental reviews land here \u2014 especially thread-reply re-reviews where the user is asking "did you address X?" rather than "review the diff again."
+   Dispatch **2+ \`${REVIEWER_AGENT_NAME}\` lenses in parallel** ONLY when ALL of the following are true:
+   - the incremental changes are substantive (>5 files changed AND >200 net new lines), OR touch a high-stakes subsystem (auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling)
+   - you can name 2+ distinct concrete failure modes the new commits plausibly introduce that warrant independent lenses
+   - parallel-orchestrated independent perspectives meaningfully outperform what you'd find solo
+   **NEVER dispatch exactly one lens.** Single-lens dispatch adds wall time and cost for no orthogonality benefit. Either go multi-lens (\u22652 in parallel) or do the re-review yourself.
+   Lens framing follows Review mode: themed lenses (correctness, security, etc.) and subsystem lenses (auth, billing, schema-migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens.
+7. **fan out (only if step 6 said 2+ lenses)**: dispatch every \`${REVIEWER_AGENT_NAME}\` subagent for this run **IN A SINGLE ASSISTANT TURN, AS MULTIPLE PARALLEL TASK TOOL_USE BLOCKS IN ONE MESSAGE.**
+   \u26A0\uFE0F  CRITICAL \u2014 PARALLELISM IS THE ONLY REASON LENSES EXIST. \u26A0\uFE0F
+   Default tool-call behavior is **serial dispatch**: emit one Task call, await result, emit next, await, etc. This collapses your fan-out into a sequential review where each lens adds N \xD7 (orchestrator-think-time + lens-execution-time) to wall time. **YOU MUST OVERRIDE THIS DEFAULT.** Emit ALL of your Task tool_use blocks in the SAME assistant message, BEFORE you read ANY result from ANY of them.
+   \u2705 Right pattern: one assistant turn with N Task tool_use blocks \u2192 wait \u2192 N results arrive together \u2192 aggregate.
+   \u274C Wrong pattern: turn 1 = Task(lens A) \u2192 turn 2 (after A's result) = Task(lens B). This is the failure mode.
+   You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches.
+   if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body. each subagent gets:
+   - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 8), not in the subagent prompt
    - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
-   - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
-   - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
-   - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
+   - **a Task \`description\` set to the lens name** \u2014 the harness reads this field to label log lines so parallel runs can be told apart.
+   - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs.
    - ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
    delegation discipline:
-   - do NOT lens-review the diff yourself in parallel with the subagents
    - do NOT summarize the changes for them (biases toward validation frame)
    - do NOT hand them a curated reading list (let them discover scope)
    - do NOT pre-shape their output with a finding schema
    - do NOT mention the other lenses (independence is the point)
-6. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
+8. **aggregate, draft, self-critique**: merge findings (yours + any subagent output if you went multi-lens); de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
-7. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
+9. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
-8. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
+10. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
    Same callout-intensity ladder as Review mode \u2014 \`[!CAUTION]\` (large red, "will break") \u2192 \`[!IMPORTANT]\` (large purple, "must address before merging") \u2192 \`[!NOTE]\` (small blue, "FYI") \u2192 no callout (plain text). And the same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
@@ -146856,20 +147000,30 @@ var ThinkingTimer = class {
     maximumFractionDigits: 1
   });
   lastToolResultTimestamp = null;
+  formatLine;
+  // node's native TS strip-only mode does not support parameter properties,
+  // so the formatter is declared as a field and assigned in the body.
+  constructor(formatLine = (l) => l) {
+    this.formatLine = formatLine;
+  }
   markToolResult() {
     this.lastToolResultTimestamp = performance5.now();
-    log.debug(`\xBB thinking timer: markToolResult at ${this.lastToolResultTimestamp}`);
+    log.debug(
+      this.formatLine(`\xBB thinking timer: markToolResult at ${this.lastToolResultTimestamp}`)
+    );
   }
   markToolCall() {
     const now = performance5.now();
     log.debug(
-      `\xBB thinking timer: markToolCall at ${now}, lastToolResult=${this.lastToolResultTimestamp}`
+      this.formatLine(
+        `\xBB thinking timer: markToolCall at ${now}, lastToolResult=${this.lastToolResultTimestamp}`
+      )
     );
     if (this.lastToolResultTimestamp === null) return;
     const elapsed = now - this.lastToolResultTimestamp;
     if (elapsed < THINKING_THRESHOLD) return;
     const seconds = elapsed / 1e3;
-    log.info(`\xBB thought for ${this.durationFormatter.format(seconds)}`);
+    log.info(this.formatLine(`\xBB thought for ${this.durationFormatter.format(seconds)}`));
   }
 };
@@ -146877,45 +147031,12 @@ var ThinkingTimer = class {
 import { readFile } from "node:fs/promises";
 function getUnsubmittedReview(toolState) {
   const mode = toolState.selectedMode;
-  if (mode !== "Review" && mode !== "IncrementalReview") return null;
-  if (toolState.review || toolState.finalSummaryWritten) return null;
   if (!toolState.hadProgressComment) return null;
-  return mode;
-}
-var MAX_HOOK_OUTPUT_CHARS = 4096;
-function truncateHookOutput(raw2) {
-  if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
-  return `...(truncated, showing last ${MAX_HOOK_OUTPUT_CHARS} chars)
-${raw2.slice(-MAX_HOOK_OUTPUT_CHARS)}`;
-}
-async function executeStopHook(script) {
-  log.info("\xBB executing stop hook...");
-  try {
-    const result = await spawn({
-      cmd: "bash",
-      args: ["-c", script],
-      env: process.env,
-      timeout: LIFECYCLE_HOOK_TIMEOUT_MS,
-      activityTimeout: 0,
-      onStdout: (chunk) => process.stdout.write(chunk),
-      onStderr: (chunk) => process.stderr.write(chunk)
-    });
-    if (result.exitCode === 0) {
-      log.info("\xBB stop hook passed");
-      return null;
-    }
-    const combined = [result.stderr.trim(), result.stdout.trim()].filter(Boolean).join("\n");
-    const output = truncateHookOutput(combined);
-    log.info(`\xBB stop hook failed with exit code ${result.exitCode}`);
-    return { exitCode: result.exitCode, output };
-  } catch (err) {
-    const isTimeout = err instanceof SpawnTimeoutError && (err.code === SPAWN_TIMEOUT_CODE || err.code === SPAWN_ACTIVITY_TIMEOUT_CODE);
-    const msg = err instanceof Error ? err.message : String(err);
-    log.warning(
-      `stop hook ${isTimeout ? "timed out" : "failed to spawn"}: ${msg} \u2014 skipping retry`
-    );
-    return null;
+  if (mode === "Review") return toolState.review ? null : "Review";
+  if (mode === "IncrementalReview") {
+    return toolState.review || toolState.finalSummaryWritten ? null : "IncrementalReview";
   }
+  return null;
 }
 function buildStopHookPrompt(failure) {
   return [
@@ -146965,10 +147086,6 @@ function buildUnsubmittedReviewPrompt(mode) {
 }
 async function collectPostRunIssues(ctx, options = {}) {
   const issues = {};
-  if (ctx.stopScript) {
-    const failure = await executeStopHook(ctx.stopScript);
-    if (failure) issues.stopHook = failure;
-  }
   const status = getGitStatus();
   const mode = ctx.toolState.selectedMode;
   if (status) {
@@ -147004,11 +147121,25 @@ function buildLearningsReflectionPrompt(filePath) {
     "",
     `the rolling learnings file is at \`${filePath}\`. read it first if you haven't already, then edit it in place using your native file tools. the server reads this file at end-of-run and persists any changes \u2014 there is no tool to call.`,
     "",
-    `keep the file healthy:`,
-    `- only add bullets when the finding is high-confidence AND broadly useful. skip speculative, one-off, or "maybe" findings.`,
-    `- prune bullets that are clearly wrong, no longer relevant, or low-signal (rarely useful). a focused, accurate file beats a long stale one.`,
-    `- format: flat bullet list, one fact per line starting with \`- \`. deduplicate against existing entries \u2014 if a bullet covers the same fact, update it in place instead of adding a duplicate.`,
-    `- leave the file alone if you have nothing substantively new to add and the existing entries still look healthy. silence is a valid outcome \u2014 just reply "done" and stop.`
+    `structure:`,
+    `- markdown hierarchy: \`## \` for top-level themes, \`### \` and deeper for sub-themes when a section grows. there is no fixed taxonomy \u2014 choose headings that fit THIS repo (e.g. for one repo \`## Migrations\` / \`## Local dev\` may make sense; for another, \`## API quirks\` / \`## Failure modes\`).`,
+    `- **no section over ~300 lines.** when a section is approaching that, split it: introduce \`### \` subsections grouping related bullets, or hoist a coherent group into a new top-level \`## \` section. granular sections mean future runs read targeted line ranges instead of slurping the whole file. this is the most important hygiene rule on long-lived repos.`,
+    `- if you find a flat unstructured list (legacy content from before this format), restructure it: read it, group related bullets, rewrite the file with \`## \` / \`### \` headings around them. don't preserve bad structure \u2014 fix it.`,
+    "",
+    `bullet hygiene:`,
+    `- one fact per line starting with \`- \`. each bullet is ONE specific durable fact, not a paragraph or essay.`,
+    `- aim for \u2264 240 chars per bullet. longer bullets are almost always mixing multiple facts that should be split, or burying the durable claim under PR-specific context that should be cut.`,
+    `- only add bullets when the finding is high-confidence AND broadly useful AND will still be true in 3+ months. skip speculative, one-off, or "maybe" findings.`,
+    `- prune bullets that are clearly wrong, no longer relevant, or low-signal. a focused, accurate file beats a long stale one. compressing two overlapping bullets into one tighter bullet counts as progress.`,
+    `- deduplicate against existing entries (in any section) \u2014 if a bullet covers the same fact, update it in place instead of adding a duplicate.`,
+    "",
+    `do NOT add bullets for:`,
+    `- pullfrog tool quirks (e.g. "\`shell\` timeout is in milliseconds", "\`git\` args must be a JSON array", "\`create_pull_request_review\` drops out-of-hunk comments", "\`push_branch\` may report timeout when push succeeded"). these are universal across repos and belong in the tool descriptions \u2014 flag the gap rather than hoarding the workaround per-repo.`,
+    `- references to specific PR numbers, review IDs, commit SHAs, branch names, or person handles ("PR #595 introduced X", "flagged in review 12345", "as of commit abc123"). repo state changes; these decay into noise within weeks.`,
+    `- dated assertions ("as of May 2026", "currently...", "for now..."). if a fact needs a date to be true, it isn't durable enough to belong here.`,
+    `- play-by-play of what THIS run did. learnings are for the NEXT run, not a retrospective.`,
+    "",
+    `if you have nothing substantively new to add AND the existing entries still look healthy and well-structured, leave the file alone \u2014 just reply "done" and stop. silence is a valid outcome.`
   ].join("\n");
 }
 async function runPostRunRetryLoop(params) {
@@ -147118,19 +147249,39 @@ function deriveLabelFromTaskInput(input) {
 }
 var SessionLabeler = class {
   labels = /* @__PURE__ */ new Map();
+  labelsByToolUseId = /* @__PURE__ */ new Map();
   pendingLabels = [];
   fallbackCounter = 0;
-  recordTaskDispatch(input) {
+  /**
+   * Record a Task/Agent tool dispatch.
+   *
+   * @param input  Task tool input — used to derive the lens label.
+   * @param toolUseId  Optional Agent tool_use id. When provided, future events
+   *                   carrying `parent_tool_use_id === toolUseId` resolve
+   *                   directly to this label without consuming the FIFO queue
+   *                   (Claude path). Always also pushed to the FIFO queue so
+   *                   the OpenCode path still works when toolUseId is absent.
+   */
+  recordTaskDispatch(input, toolUseId) {
     const label = deriveLabelFromTaskInput(input);
     this.pendingLabels.push(label);
+    if (toolUseId) this.labelsByToolUseId.set(toolUseId, label);
     return label;
   }
   /**
-   * Return a label for the given sessionID. Binds on first call.
-   * Pass undefined/empty for events that lack a session id — the caller
-   * gets ORCHESTRATOR_LABEL so the line is still attributable.
+   * Return a label for the given event.
+   *
+   * @param sessionID         Session id from the event (OpenCode: per-session;
+   *                          Claude: shared across orchestrator + subagents).
+   * @param parentToolUseId   Claude's `parent_tool_use_id` — non-null on
+   *                          subagent messages. When set and known, takes
+   *                          priority over the FIFO/sessionID path.
    */
-  labelFor(sessionID) {
+  labelFor(sessionID, parentToolUseId) {
+    if (parentToolUseId) {
+      const direct = this.labelsByToolUseId.get(parentToolUseId);
+      if (direct) return direct;
+    }
     if (!sessionID) return ORCHESTRATOR_LABEL;
     const existing = this.labels.get(sessionID);
     if (existing) return existing;
@@ -147192,8 +147343,9 @@ function writeMcpConfig(ctx) {
 function buildAgentsJson() {
   const agents2 = {
     [REVIEWER_AGENT_NAME]: {
-      description: "Read-only review subagent for self-review and lens-based code review. Reads only \u2014 no writes, no state-changing shell or MCP calls, no nested subagent dispatch.",
-      prompt: REVIEWER_SYSTEM_PROMPT
+      description: "Read-only review subagent for lens-based code review (correctness, security, billing-subsystem, etc.). Reads only \u2014 no writes, no state-changing shell or MCP calls, no nested subagent dispatch.",
+      prompt: REVIEWER_SYSTEM_PROMPT,
+      model: "claude-sonnet-4-6"
     }
   };
   return JSON.stringify(agents2);
@@ -147214,7 +147366,23 @@ function tailLines(text, maxCodeUnits) {
 async function runClaude(params) {
   const startTime = performance6.now();
   let eventCount = 0;
-  const thinkingTimer = new ThinkingTimer();
+  const labeler = new SessionLabeler();
+  function eventLabel(event) {
+    return labeler.labelFor(event.session_id ?? null, event.parent_tool_use_id ?? null);
+  }
+  function withLabel(label, message) {
+    return label === ORCHESTRATOR_LABEL ? message : formatWithLabel(label, message);
+  }
+  const thinkingTimers = /* @__PURE__ */ new Map();
+  function timerFor(label) {
+    let t = thinkingTimers.get(label);
+    if (!t) {
+      const formatLine = (line) => label === ORCHESTRATOR_LABEL ? line : formatWithLabel(label, line);
+      t = new ThinkingTimer(formatLine);
+      thinkingTimers.set(label, t);
+    }
+    return t;
+  }
   let finalOutput = "";
   let sessionId;
   let resultErrorSubtype = null;
@@ -147235,17 +147403,22 @@ async function runClaude(params) {
     } : void 0;
   }
   const handlers2 = {
-    system: (_event) => {
-      log.debug(`\xBB ${params.label} system event`);
+    system: (event) => {
+      const label = eventLabel(event);
+      log.debug(withLabel(label, `\xBB ${params.label} system event`));
     },
     assistant: (event) => {
       const content = event.message?.content;
       if (!content) return;
+      const label = eventLabel(event);
+      const boxTitle = label === ORCHESTRATOR_LABEL ? params.label : `${params.label} [${label}]`;
       for (const block of content) {
         if (block.type === "text" && block.text?.trim()) {
           const message = block.text.trim();
-          log.box(message, { title: params.label });
-          finalOutput = message;
+          log.box(message, { title: boxTitle });
+          if (label === ORCHESTRATOR_LABEL) {
+            finalOutput = message;
+          }
         } else if (block.type === "tool_use") {
           const toolName = block.name || "unknown";
           if (params.onToolUse) {
@@ -147254,20 +147427,25 @@ async function runClaude(params) {
               input: block.input
             });
           }
-          thinkingTimer.markToolCall();
-          log.toolCall({ toolName, input: block.input || {} });
-          if (toolName === "Task" && block.input && typeof block.input === "object") {
+          timerFor(label).markToolCall();
+          const inputFormatted = formatJsonValue(block.input || {});
+          const toolCallLine = inputFormatted !== "{}" ? `\xBB ${toolName}(${inputFormatted})` : `\xBB ${toolName}()`;
+          log.info(withLabel(label, toolCallLine));
+          if ((toolName === "Task" || toolName === "Agent") && block.input && typeof block.input === "object") {
             const taskInput = block.input;
-            const label = deriveLabelFromTaskInput(taskInput);
+            const dispatchedLabel = labeler.recordTaskDispatch(taskInput, block.id ?? null);
             log.info(
-              `\xBB dispatching subagent: ${label}` + (taskInput.subagent_type ? ` (subagent_type=${taskInput.subagent_type})` : "")
+              withLabel(
+                label,
+                `\xBB dispatching subagent: ${dispatchedLabel}` + (taskInput.subagent_type ? ` (subagent_type=${taskInput.subagent_type})` : "")
+              )
             );
           }
           if (toolName.includes("report_progress") && params.todoTracker) {
             log.debug("\xBB report_progress detected, disabling todo tracking");
             params.todoTracker.cancel();
           }
-          if (toolName === "TodoWrite" && params.todoTracker?.enabled) {
+          if (toolName === "TodoWrite" && params.todoTracker?.enabled && label === ORCHESTRATOR_LABEL) {
             params.todoTracker.update(block.input);
           }
         }
@@ -147283,17 +147461,18 @@ async function runClaude(params) {
     user: (event) => {
       const content = event.message?.content;
       if (!content) return;
+      const label = eventLabel(event);
       for (const block of content) {
         if (typeof block === "string") continue;
         if (block.type === "tool_result") {
-          thinkingTimer.markToolResult();
+          timerFor(label).markToolResult();
           const outputContent = typeof block.content === "string" ? block.content : Array.isArray(block.content) ? block.content.map(
             (entry) => typeof entry === "string" ? entry : typeof entry === "object" && entry !== null && "text" in entry ? String(entry.text) : JSON.stringify(entry)
           ).join("\n") : String(block.content);
           if (block.is_error) {
-            log.info(`\xBB tool error: ${outputContent}`);
+            log.info(withLabel(label, `\xBB tool error: ${outputContent}`));
           } else {
-            log.debug(`\xBB tool output: ${outputContent}`);
+            log.debug(withLabel(label, `\xBB tool output: ${outputContent}`));
           }
         }
       }
@@ -147362,8 +147541,9 @@ async function runClaude(params) {
     }
   };
   const recentStderr = [];
+  const recentNonJsonStdout = [];
   let lastProviderError = null;
-  let output = "";
+  const output = new TailBuffer(DEFAULT_MAX_RETAINED_BYTES);
   let stdoutBuffer = "";
   try {
     const result = await spawn({
@@ -147380,9 +147560,14 @@ async function runClaude(params) {
       // there's no shim-orphan issue like opencode-ai/bin/opencode, but
       // detached + killGroup is the right default for any agent runtime.
       killGroup: true,
+      // claude already drains every chunk via onStdout (NDJSON parsing) and
+      // onStderr (recentStderr ring buffer). retaining a second copy in the
+      // spawn wrapper would grow unbounded for long sessions and previously
+      // crashed the wrapper with RangeError. see issue #680.
+      retain: "none",
       onStdout: async (chunk) => {
         const text = chunk.toString();
-        output += text;
+        output.append(text);
         markActivity();
         stdoutBuffer += text;
         const lines = stdoutBuffer.split("\n");
@@ -147395,6 +147580,8 @@ async function runClaude(params) {
             event = JSON.parse(trimmed);
           } catch {
             log.debug(`\xBB non-JSON stdout line: ${trimmed.substring(0, 200)}`);
+            recentNonJsonStdout.push(trimmed);
+            if (recentNonJsonStdout.length > MAX_STDERR_LINES) recentNonJsonStdout.shift();
             continue;
           }
           eventCount++;
@@ -147457,16 +147644,19 @@ ${stderrContext}`);
     const usage = buildUsage();
     if (result.exitCode !== 0) {
       const errorContext = lastProviderError ? ` (${lastProviderError})` : "";
-      const truncatedStdout = result.stdout ? tailLines(result.stdout, 2048) : "";
-      const errorMessage = lastResultError || result.stderr || truncatedStdout || `unknown error - no output from Claude CLI${errorContext}`;
+      const stdoutSnapshot = output.toString();
+      const stderrSnapshot = recentStderr.join("\n");
+      const truncatedStdout = stdoutSnapshot ? tailLines(stdoutSnapshot, 2048) : "";
+      const nonJsonStdoutSnapshot = recentNonJsonStdout.join("\n");
+      const errorMessage = lastResultError || stderrSnapshot || nonJsonStdoutSnapshot || truncatedStdout || `unknown error - no output from Claude CLI${errorContext}`;
       log.error(
         `${params.label} exited with code ${result.exitCode}${errorContext}: ${errorMessage}`
       );
-      log.debug(`stdout: ${result.stdout?.substring(0, 500)}`);
-      log.debug(`stderr: ${result.stderr?.substring(0, 500)}`);
+      log.debug(`stdout: ${stdoutSnapshot.substring(0, 500)}`);
+      log.debug(`stderr: ${stderrSnapshot.substring(0, 500)}`);
       return {
         success: false,
-        output: finalOutput || output,
+        output: finalOutput || stdoutSnapshot,
         error: errorMessage,
         usage,
         sessionId
@@ -147475,7 +147665,7 @@ ${stderrContext}`);
     if (eventCount === 0 && lastProviderError) {
       return {
         success: false,
-        output: finalOutput || output,
+        output: finalOutput || output.toString(),
         error: `provider error: ${lastProviderError}`,
         usage,
         sessionId
@@ -147484,13 +147674,13 @@ ${stderrContext}`);
     if (resultErrorSubtype) {
       return {
         success: false,
-        output: finalOutput || output,
+        output: finalOutput || output.toString(),
         error: lastResultError || `result subtype: ${resultErrorSubtype}`,
         usage,
         sessionId
       };
     }
-    return { success: true, output: finalOutput || output, usage, sessionId };
+    return { success: true, output: finalOutput || output.toString(), usage, sessionId };
   } catch (error49) {
     params.todoTracker?.cancel();
     const duration4 = performance6.now() - startTime;
@@ -147509,7 +147699,7 @@ ${stderrContext}`
       );
     return {
       success: false,
-      output: finalOutput || output,
+      output: finalOutput || output.toString(),
       error: `${errorMessage} [${diagnosis}]`,
       usage: buildUsage(),
       sessionId
@@ -147559,7 +147749,9 @@ var claude = agent({
   run: async (ctx) => {
     const cliPath = await installClaudeCli();
     const specifier = ctx.payload.proxyModel ?? ctx.resolvedModel;
-    const model = specifier ? stripProviderPrefix(specifier) : void 0;
+    const bedrockModelId = process.env[BEDROCK_MODEL_ID_ENV]?.trim();
+    const isBedrockRoute = specifier !== void 0 && bedrockModelId !== void 0 && bedrockModelId === specifier && isBedrockAnthropicId(specifier);
+    const model = !specifier ? void 0 : isBedrockRoute ? specifier : stripProviderPrefix(specifier);
     const homeEnv = {
       HOME: ctx.tmpdir,
       XDG_CONFIG_HOME: join10(ctx.tmpdir, ".config")
@@ -147598,6 +147790,9 @@ var claude = agent({
       ...process.env,
       ...homeEnv
     };
+    if (isBedrockRoute) {
+      env2.CLAUDE_CODE_USE_BEDROCK = "1";
+    }
     const repoDir = process.cwd();
     log.info(`\xBB effort: ${effort}`);
     log.debug(`\xBB starting Pullfrog (Claude Code): node ${baseArgs.join(" ")}`);
@@ -147719,6 +147914,22 @@ export default async function pullfrogEventsPlugin() {
 }
 `;
+// agents/subagentModels.ts
+function deriveSubagentModels(orchestratorSpec) {
+  if (!orchestratorSpec) return { reviewer: void 0 };
+  for (const source of modelAliases) {
+    const matchedDirect = source.resolve === orchestratorSpec;
+    const matchedOR = source.openRouterResolve === orchestratorSpec;
+    if (!matchedDirect && !matchedOR) continue;
+    if (!source.subagentModel) return { reviewer: void 0 };
+    const target = modelAliases.find((a) => a.slug === source.subagentModel);
+    if (!target) return { reviewer: void 0 };
+    const reviewer = matchedOR ? target.openRouterResolve : target.resolve;
+    return { reviewer };
+  }
+  return { reviewer: void 0 };
+}
 // agents/opencode.ts
 async function installOpencodeCli() {
   return await installFromNpmTarball({
@@ -147728,7 +147939,6 @@ async function installOpencodeCli() {
     installDependencies: true
   });
 }
-var PULLFROG_OPENCODE_OUTPUT_LIMIT = 5e3;
 var GEMINI_3_DIRECT_THINKING_LEVEL = "medium";
 var GEMINI_3_DIRECT_API_IDS = ["gemini-3.1-pro-preview", "gemini-3-flash-preview"];
 function buildSecurityConfig(ctx, model) {
@@ -147744,7 +147954,21 @@ function buildSecurityConfig(ctx, model) {
     mcp: {
       [pullfrogMcpName]: { type: "remote", url: ctx.mcpServerUrl }
     },
-    agent: buildReviewerAgentConfig(),
+    agent: (() => {
+      const cfg = buildReviewerAgentConfig(model);
+      const reviewerModel = cfg[REVIEWER_AGENT_NAME]?.model ?? "(inherit)";
+      log.info(`\xBB subagent models: reviewfrog=${reviewerModel}`);
+      return cfg;
+    })(),
+    // opt into opencode's experimental `batch` tool (added in
+    // anomalyco/opencode PR #2983, opt-in via `experimental.batch_tool`). it
+    // exposes a single `batch` tool that runs 1-25 independent tool calls
+    // (read/grep/glob/bash/etc.) concurrently in one assistant turn, which
+    // collapses the dominant grep→20×read pattern into a single round trip.
+    // edits are explicitly disallowed inside the batch upstream. paired with
+    // the "Parallel tool execution" guidance in utils/instructions.ts so the
+    // model actually reaches for it. see wiki/prompt.md.
+    experimental: { batch_tool: true },
     provider: {
       google: {
         models: Object.fromEntries(
@@ -147769,12 +147993,14 @@ function buildSecurityConfig(ctx, model) {
   }
   return JSON.stringify(config3);
 }
-function buildReviewerAgentConfig() {
+function buildReviewerAgentConfig(orchestratorModel) {
+  const overrides = deriveSubagentModels(orchestratorModel);
   return {
     [REVIEWER_AGENT_NAME]: {
-      description: "Read-only review subagent for self-review and lens-based code review. Reads only \u2014 no writes, no state-changing shell or MCP calls, no nested subagent dispatch.",
+      description: "Read-only review subagent for lens-based code review (correctness, security, billing-subsystem, etc.). Reads only \u2014 no writes, no state-changing shell or MCP calls, no nested subagent dispatch.",
       mode: "subagent",
-      prompt: REVIEWER_SYSTEM_PROMPT
+      prompt: REVIEWER_SYSTEM_PROMPT,
+      ...overrides.reviewer !== void 0 ? { model: overrides.reviewer } : {}
     }
   };
 }
@@ -147799,7 +148025,7 @@ function autoSelectModel(cliPath) {
   const availableSet = new Set(availableModels);
   if (availableSet.size > 0) {
     log.debug(`\xBB opencode models (${availableSet.size}): ${availableModels.join(", ")}`);
-    const match3 = modelAliases.find((a) => a.preferred && availableSet.has(a.resolve)) ?? modelAliases.find((a) => availableSet.has(a.resolve));
+    const match3 = modelAliases.find((a) => !a.hidden && a.preferred && availableSet.has(a.resolve)) ?? modelAliases.find((a) => !a.hidden && availableSet.has(a.resolve));
     if (match3) {
       log.info(
         `\xBB model: ${match3.resolve} (auto-selected${match3.preferred ? " \u2014 preferred" : ""} curated match)`
@@ -147817,7 +148043,6 @@ function autoSelectModel(cliPath) {
 async function runOpenCode(params) {
   const startTime = performance7.now();
   let eventCount = 0;
-  const thinkingTimer = new ThinkingTimer();
   let finalOutput = "";
   let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
   let accumulatedCostUsd = 0;
@@ -147834,6 +148059,16 @@ async function runOpenCode(params) {
   function withLabel(label, message) {
     return label === ORCHESTRATOR_LABEL ? message : formatWithLabel(label, message);
   }
+  const thinkingTimers = /* @__PURE__ */ new Map();
+  function timerFor(label) {
+    let t = thinkingTimers.get(label);
+    if (!t) {
+      const formatLine = (line) => label === ORCHESTRATOR_LABEL ? line : formatWithLabel(label, line);
+      t = new ThinkingTimer(formatLine);
+      thinkingTimers.set(label, t);
+    }
+    return t;
+  }
   const taskDispatchByCallID = /* @__PURE__ */ new Map();
   const pendingTaskDispatches = [];
   const knownNonTaskCallIDs = /* @__PURE__ */ new Set();
@@ -147982,7 +148217,7 @@ async function runOpenCode(params) {
           input: event.part?.state?.input
         });
       }
-      thinkingTimer.markToolCall();
+      timerFor(label).markToolCall();
       const inputFormatted = formatJsonValue(event.part?.state?.input || {});
       const toolCallLine = inputFormatted !== "{}" ? `\xBB ${toolName}(${inputFormatted})` : `\xBB ${toolName}()`;
       log.info(withLabel(label, toolCallLine));
@@ -148006,7 +148241,7 @@ async function runOpenCode(params) {
       const status = event.part?.state?.status || event.status || "unknown";
       const output2 = event.part?.state?.output || event.output;
       const label = eventLabel(event);
-      thinkingTimer.markToolResult();
+      timerFor(label).markToolResult();
       if (taskDispatchByCallID.size > 0 || pendingTaskDispatches.length > 0) {
         if (toolId && taskDispatchByCallID.has(toolId)) {
           const dispatch = taskDispatchByCallID.get(toolId);
@@ -148131,7 +148366,7 @@ async function runOpenCode(params) {
   const recentStderr = [];
   let lastProviderError = null;
   let agentErrorEvent = null;
-  let output = "";
+  const output = new TailBuffer(DEFAULT_MAX_RETAINED_BYTES);
   let stdoutBuffer = "";
   try {
     const result = await spawn({
@@ -148149,6 +148384,11 @@ async function runOpenCode(params) {
       // never fires — producing zombie runs. detached + killGroup nukes the
       // whole tree.
       killGroup: true,
+      // we already drain every chunk via onStdout/onStderr (NDJSON parsing
+      // + recentStderr ring buffer). retaining a second copy in the spawn
+      // wrapper would grow unbounded for multi-lens Reviews and previously
+      // crashed the wrapper with RangeError at ~1 GiB. see issue #680.
+      retain: "none",
       // NB: we used to pass `isPausedExternally: isSubagentInFlight` to suspend
       // the activity timer during subagent dispatches. unnecessary now that
       // our injected plugin (action/agents/opencodePlugin.ts) re-emits
@@ -148158,7 +148398,7 @@ async function runOpenCode(params) {
       // (~3.3 plugin events/sec during a typical subagent run).
       onStdout: async (chunk) => {
         const text = chunk.toString();
-        output += text;
+        output.append(text);
         markActivity();
         stdoutBuffer += text;
         const lines = stdoutBuffer.split("\n");
@@ -148247,18 +148487,25 @@ ${stderrContext}`);
     const usage = buildUsage();
     if (result.exitCode !== 0) {
       const errorContext = lastProviderError ? ` (${lastProviderError})` : "";
-      const errorMessage = result.stderr || result.stdout || `unknown error - no output from OpenCode CLI${errorContext}`;
+      const stdoutSnapshot = output.toString();
+      const stderrSnapshot = recentStderr.join("\n");
+      const errorMessage = stderrSnapshot || stdoutSnapshot || `unknown error - no output from OpenCode CLI${errorContext}`;
       log.error(
         `${params.label} exited with code ${result.exitCode}${errorContext}: ${errorMessage}`
       );
-      log.debug(`stdout: ${result.stdout?.substring(0, 500)}`);
-      log.debug(`stderr: ${result.stderr?.substring(0, 500)}`);
-      return { success: false, output: finalOutput || output, error: errorMessage, usage };
+      log.debug(`stdout: ${stdoutSnapshot.substring(0, 500)}`);
+      log.debug(`stderr: ${stderrSnapshot.substring(0, 500)}`);
+      return {
+        success: false,
+        output: finalOutput || stdoutSnapshot,
+        error: errorMessage,
+        usage
+      };
     }
     if (eventCount === 0 && lastProviderError) {
       return {
         success: false,
-        output: finalOutput || output,
+        output: finalOutput || output.toString(),
         error: `provider error: ${lastProviderError}`,
         usage
       };
@@ -148269,12 +148516,12 @@ ${stderrContext}`);
       const errorMessage = errorEvent.error?.data?.message || errorEvent.error?.name || JSON.stringify(errorEvent);
       return {
         success: false,
-        output: finalOutput || output,
+        output: finalOutput || output.toString(),
         error: `${errorName}: ${errorMessage}`,
         usage
       };
     }
-    return { success: true, output: finalOutput || output, usage };
+    return { success: true, output: finalOutput || output.toString(), usage };
   } catch (error49) {
     params.todoTracker?.cancel();
     const duration4 = performance7.now() - startTime;
@@ -148293,7 +148540,7 @@ ${stderrContext}`
       );
     return {
       success: false,
-      output: finalOutput || output,
+      output: finalOutput || output.toString(),
       error: `${errorMessage} [${diagnosis}]`,
       usage: buildUsage()
     };
@@ -148304,7 +148551,10 @@ var opencode = agent({
   install: installOpencodeCli,
   run: async (ctx) => {
     const cliPath = await installOpencodeCli();
-    const model = ctx.payload.proxyModel ?? ctx.resolvedModel ?? autoSelectModel(cliPath);
+    const rawModel = ctx.payload.proxyModel ?? ctx.resolvedModel ?? autoSelectModel(cliPath);
+    const bedrockModelId = process.env[BEDROCK_MODEL_ID_ENV]?.trim();
+    const isBedrockRoute = rawModel !== void 0 && bedrockModelId !== void 0 && bedrockModelId === rawModel;
+    const model = isBedrockRoute ? `amazon-bedrock/${rawModel}` : rawModel;
     const homeEnv = {
       HOME: ctx.tmpdir,
       XDG_CONFIG_HOME: join11(ctx.tmpdir, ".config")
@@ -148333,7 +148583,6 @@ var opencode = agent({
       ...homeEnv,
       OPENCODE_CONFIG_CONTENT: buildSecurityConfig(ctx, model),
       OPENCODE_PERMISSION: permissionOverride,
-      OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX: PULLFROG_OPENCODE_OUTPUT_LIMIT.toString(),
       GOOGLE_GENERATIVE_AI_API_KEY: process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GEMINI_API_KEY
     };
     const repoDir = process.cwd();
@@ -148376,13 +148625,29 @@ function hasEnvVar(name) {
 function hasClaudeCodeAuth() {
   return hasEnvVar("CLAUDE_CODE_OAUTH_TOKEN") || hasEnvVar("ANTHROPIC_API_KEY");
 }
+function hasBedrockAuth() {
+  return hasEnvVar("AWS_BEARER_TOKEN_BEDROCK") || hasEnvVar("AWS_ACCESS_KEY_ID") && hasEnvVar("AWS_SECRET_ACCESS_KEY");
+}
+function resolveSlug(slug2) {
+  const alias = resolveDisplayAlias(slug2);
+  if (alias?.routing === "bedrock") {
+    const bedrockId = process.env[BEDROCK_MODEL_ID_ENV]?.trim();
+    if (!bedrockId) {
+      throw new Error(
+        `${BEDROCK_MODEL_ID_ENV} env var is required when the model is set to "${slug2}". set it to an AWS Bedrock model ID (e.g. "us.anthropic.claude-opus-4-7", "amazon.nova-pro-v1:0"). see https://docs.pullfrog.com/bedrock for setup.`
+      );
+    }
+    return bedrockId;
+  }
+  return resolveCliModel(slug2);
+}
 function resolveModel(ctx) {
   const envModel = process.env.PULLFROG_MODEL?.trim();
   if (envModel) {
-    return resolveCliModel(envModel) ?? envModel;
+    return resolveSlug(envModel) ?? envModel;
   }
   if (ctx.slug) {
-    const resolved = resolveCliModel(ctx.slug);
+    const resolved = resolveSlug(ctx.slug);
     if (resolved) {
       return resolved;
     }
@@ -148398,6 +148663,9 @@ function resolveAgent(ctx) {
     }
     log.warning(`\xBB unknown PULLFROG_AGENT="${envAgent}" \u2014 falling through to auto-select`);
   }
+  if (ctx.model && hasBedrockAuth() && process.env[BEDROCK_MODEL_ID_ENV]?.trim() === ctx.model) {
+    return isBedrockAnthropicId(ctx.model) ? agents.claude : agents.opencode;
+  }
   if (ctx.model) {
     try {
       const provider2 = getModelProvider(ctx.model);
@@ -148412,31 +148680,56 @@ function resolveAgent(ctx) {
 // utils/apiKeys.ts
 var knownApiKeys = new Set(Object.values(providers).flatMap((p) => [...p.envVars]));
+var MISSING_KEY_MARKER = "no API key found";
 function buildMissingApiKeyError(params) {
-  const apiUrl = getApiUrl();
-  const settingsUrl = `${apiUrl}/console/${params.owner}/${params.name}`;
-  const githubRepoUrl = `https://github.com/${params.owner}/${params.name}`;
-  const githubSecretsUrl = `${githubRepoUrl}/settings/secrets/actions`;
-  return `no API key found. Pullfrog requires at least one LLM provider API key.
+  const githubSecretsUrl = `https://github.com/${params.owner}/${params.name}/settings/secrets/actions`;
+  const settingsUrl = `${getApiUrl()}/console/${params.owner}/${params.name}`;
+  return [
+    `**${MISSING_KEY_MARKER}** \u2014 Pullfrog needs at least one LLM provider API key (e.g. \`ANTHROPIC_API_KEY\`, \`OPENAI_API_KEY\`, \`GEMINI_API_KEY\`) configured as a GitHub Actions secret.`,
+    "",
+    `[Open repo secrets \u2192](${githubSecretsUrl}) \xB7 [Configure model \u2192](${settingsUrl}) \xB7 [Setup docs \u2192](https://docs.pullfrog.com/keys) \xB7 [Ask in Discord \u2192](https://discord.gg/8y96raFg8e)`
+  ].join("\n");
+}
+function buildBedrockSetupError(params) {
+  const githubSecretsUrl = `https://github.com/${params.owner}/${params.name}/settings/secrets/actions`;
+  return `Bedrock model selected but required configuration is missing: ${params.missing.join(", ")}.
-to fix this, add the required secret to your GitHub repository:
+add the missing secret(s) to your GitHub repository at ${githubSecretsUrl}, then reference them in your workflow's \`env:\` block:
-1. go to: ${githubSecretsUrl}
-2. click "New repository secret"
-3. set the name to your provider's key (e.g., \`ANTHROPIC_API_KEY\`, \`OPENAI_API_KEY\`, \`GEMINI_API_KEY\`)
-4. set the value to your API key
-5. click "Add secret"
+  AWS_BEARER_TOKEN_BEDROCK: \${{ secrets.AWS_BEARER_TOKEN_BEDROCK }}
+  AWS_REGION: \${{ secrets.AWS_REGION }}
+  ${BEDROCK_MODEL_ID_ENV}: \${{ secrets.${BEDROCK_MODEL_ID_ENV} }}
-configure your model at ${settingsUrl}
+\`AWS_BEARER_TOKEN_BEDROCK\` may be substituted with \`AWS_ACCESS_KEY_ID\` + \`AWS_SECRET_ACCESS_KEY\` (and optional \`AWS_SESSION_TOKEN\`) if you prefer access keys.
-for full setup instructions, see https://docs.pullfrog.com/keys`;
+for full setup instructions, see https://docs.pullfrog.com/bedrock`;
 }
 function hasEnvVar2(name) {
   const value2 = process.env[name];
   return typeof value2 === "string" && value2.length > 0;
 }
+function validateBedrockSetup(params) {
+  const hasAuth = hasEnvVar2("AWS_BEARER_TOKEN_BEDROCK") || hasEnvVar2("AWS_ACCESS_KEY_ID") && hasEnvVar2("AWS_SECRET_ACCESS_KEY");
+  const missing = [];
+  if (!hasAuth)
+    missing.push("AWS_BEARER_TOKEN_BEDROCK (or AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY)");
+  if (!hasEnvVar2("AWS_REGION")) missing.push("AWS_REGION");
+  if (!hasEnvVar2(BEDROCK_MODEL_ID_ENV)) missing.push(BEDROCK_MODEL_ID_ENV);
+  if (missing.length > 0) {
+    throw new Error(buildBedrockSetupError({ owner: params.owner, name: params.name, missing }));
+  }
+}
 function validateAgentApiKey(params) {
   if (params.model) {
+    const alias = resolveDisplayAlias(params.model);
+    if (alias?.routing === "bedrock") {
+      validateBedrockSetup({ owner: params.owner, name: params.name });
+      return;
+    }
+    if (!params.model.includes("/")) {
+      validateBedrockSetup({ owner: params.owner, name: params.name });
+      return;
+    }
     const requiredVars = getModelEnvVars(params.model);
     if (requiredVars.length === 0) return;
     if (requiredVars.some((v) => hasEnvVar2(v))) return;
@@ -148447,6 +148740,22 @@ function validateAgentApiKey(params) {
     throw new Error(buildMissingApiKeyError({ owner: params.owner, name: params.name }));
   }
 }
+function isApiKeyAuthError(text) {
+  if (!text) return false;
+  return text.includes(MISSING_KEY_MARKER) || /Invalid API key/i.test(text) || /\bUser not found\b/i.test(text) || /\bInvalid authentication\b/i.test(text);
+}
+function formatApiKeyErrorSummary(params) {
+  if (params.raw.includes(MISSING_KEY_MARKER)) {
+    return buildMissingApiKeyError({ owner: params.owner, name: params.name });
+  }
+  const githubSecretsUrl = `https://github.com/${params.owner}/${params.name}/settings/secrets/actions`;
+  const settingsUrl = `${getApiUrl()}/console/${params.owner}/${params.name}`;
+  return [
+    `**Your LLM provider API key was rejected (401).** Rotate the key in your provider dashboard, then update the matching GitHub Actions secret.`,
+    "",
+    `[Update repo secret \u2192](${githubSecretsUrl}) \xB7 [Model settings \u2192](${settingsUrl}) \xB7 [Setup docs \u2192](https://docs.pullfrog.com/keys) \xB7 [Ask in Discord \u2192](https://discord.gg/8y96raFg8e)`
+  ].join("\n");
+}
 // utils/body.ts
 var import_turndown = __toESM(require_turndown_cjs(), 1);
@@ -152199,6 +152508,14 @@ function isOIDCAvailable() {
     process.env.ACTIONS_ID_TOKEN_REQUEST_URL && process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN
   );
 }
+var TokenExchangeError = class extends Error {
+  status;
+  constructor(status, message) {
+    super(message);
+    this.name = "TokenExchangeError";
+    this.status = status;
+  }
+};
 async function acquireTokenViaOIDC(opts) {
   const oidcToken = await core2.getIDToken("pullfrog-api");
   const repos = [...opts?.repos ?? []];
@@ -152223,7 +152540,16 @@ async function acquireTokenViaOIDC(opts) {
     });
     clearTimeout(timeoutId);
     if (!tokenResponse.ok) {
-      throw new Error(`Token exchange failed: ${tokenResponse.status} ${tokenResponse.statusText}`);
+      let serverMessage;
+      try {
+        const body = await tokenResponse.json();
+        if (typeof body.error === "string") serverMessage = body.error;
+      } catch {
+      }
+      throw new TokenExchangeError(
+        tokenResponse.status,
+        serverMessage ?? `Token exchange failed: ${tokenResponse.status} ${tokenResponse.statusText}`
+      );
     }
     const tokenData = await tokenResponse.json();
     return tokenData.token;
@@ -152344,7 +152670,10 @@ async function acquireNewToken(opts) {
   if (isOIDCAvailable()) {
     return await retry(() => acquireTokenViaOIDC(opts), {
       label: "token exchange",
-      shouldRetry: (error49) => error49 instanceof Error && (error49.name === "AbortError" || error49.message.includes("fetch failed") || error49.message.includes("ECONNRESET") || error49.message.includes("ETIMEDOUT") || error49.message.includes("Token exchange failed"))
+      shouldRetry: (error49) => {
+        if (error49 instanceof TokenExchangeError) return error49.status >= 500 || error49.status === 429;
+        return error49 instanceof Error && (error49.message.includes("timed out") || error49.message.includes("fetch failed") || error49.message.includes("ECONNRESET") || error49.message.includes("ETIMEDOUT"));
+      }
     });
   } else {
     return await acquireTokenViaGitHubApp(opts);
@@ -152891,6 +153220,21 @@ ${getStandaloneModeInstructions(ctx.payload.event.trigger, t, ctx.outputSchema)}
 Trust the tools \u2014 do not repeatedly verify file contents or git status after operations. If a tool reports success, proceed to the next step. Only verify if you encounter an actual error. Exception: right before \`${t("push_branch")}\`, ensure the working tree is clean \u2014 that tool rejects dirty trees, and tests you ran earlier often leave untracked output.
+### Parallel tool execution
+For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously in a single assistant turn rather than sequentially. The dominant failure mode is grep \u2192 read \u2192 read \u2192 read \u2192 read across separate turns when one round trip would do. Always parallelize when calls are independent:
+- reading multiple files (especially after a grep returns candidates)
+- multiple greps with different patterns
+- glob + grep + read combos
+- listing multiple directories
+- inspecting multiple MCP tools or resources
+Do NOT parallelize operations that depend on prior output (e.g. create a file then read it), or ordered stateful mutations. Edits are not parallelizable \u2014 sequence those normally.${ctx.agentId === "opencode" ? `
+On OpenCode you also have a \`batch\` tool that bundles 1-25 independent calls into one wrapper call. Reach for it whenever you have >=2 independent calls. Native parallel tool_use and \`batch\` both achieve one round trip instead of N \u2014 use whichever your provider supports best.` : `
+Emit multiple \`tool_use\` blocks in the same assistant message for independent calls \u2014 the runtime executes them concurrently. Do not wait for one tool result before issuing the next independent call.`}
 ### Command execution
 Never use \`sleep\` to wait for commands to complete. Commands run synchronously \u2014 when the shell tool returns, the command has finished.
@@ -152936,10 +153280,31 @@ function buildPromptContext(ctx) {
     userQuoted: user ? user.split("\n").map((line) => `> ${line}`).join("\n") : ""
   };
 }
-function assembleFullPrompt(ctx) {
-  const learningsSection = ctx.learningsFilePath ? `************* LEARNINGS *************
+function renderLearningsToc(headings) {
+  if (headings.length === 0) return "";
+  const rootDepth = Math.min(...headings.map((h) => h.depth));
+  return headings.map((h) => {
+    const indent2 = " ".repeat((h.depth - rootDepth) * 2);
+    return `${indent2}- ${h.title} (L${h.startLine}-L${h.endLine})`;
+  }).join("\n");
+}
+function buildLearningsSection(ctx) {
+  if (!ctx.filePath) return "";
+  const intro = `Repo-level learnings accumulated by previous agent runs live at \`${ctx.filePath}\`. Use this file as durable context (test commands, conventions, gotchas, architecture notes).`;
+  const tocBody = ctx.headings.length === 0 ? "(no headings yet \u2014 file is empty or a flat list. read the whole file. during the post-run reflection turn, structure it with `## ` / `### ` headings so future runs can read targeted ranges.)" : `Read targeted line ranges via your native file tool \u2014 do NOT slurp the whole file. Each range starts at the section heading line, so reading the range gives you heading + body together.
+${renderLearningsToc(ctx.headings)}`;
+  return `************* LEARNINGS *************
-Repo-level learnings accumulated by previous agent runs live at \`${ctx.learningsFilePath}\`. Read this file early and let the entries inform your approach (test commands, conventions, gotchas, etc.). The file may be empty if no learnings have been collected yet.` : "";
+${intro}
+${tocBody}`;
+}
+function assembleFullPrompt(ctx) {
+  const learningsSection = buildLearningsSection({
+    filePath: ctx.learningsFilePath,
+    headings: ctx.learningsHeadings
+  });
   const runtimeSection = `************* RUNTIME *************
 ${ctx.runtime}`;
@@ -152967,7 +153332,10 @@ function resolveInstructions(ctx) {
     tocEntries.push({ label: "EVENT CONTEXT", description: "related PR/issue data" });
   tocEntries.push({ label: "SYSTEM", description: "persona, security, tools, workflow rules" });
   if (pctx.learningsFilePath)
-    tocEntries.push({ label: "LEARNINGS", description: "repo-specific knowledge file path" });
+    tocEntries.push({
+      label: "LEARNINGS",
+      description: "repo-specific knowledge file path + heading TOC"
+    });
   tocEntries.push({ label: "RUNTIME", description: "environment metadata" });
   const toc = buildToc(tocEntries);
   const full = assembleFullPrompt({
@@ -152977,6 +153345,7 @@ function resolveInstructions(ctx) {
     eventContext,
     system,
     learningsFilePath: pctx.learningsFilePath,
+    learningsHeadings: pctx.learningsHeadings,
     runtime: pctx.runtime
   });
   const event = [pctx.eventTitle, pctx.eventMetadata].filter(Boolean).join("\n\n---\n\n");
@@ -152994,7 +153363,7 @@ function resolveInstructions(ctx) {
 import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
 import { dirname as dirname4, join as join14 } from "node:path";
 var LEARNINGS_FILE_NAME = "pullfrog-learnings.md";
-var MAX_LEARNINGS_LENGTH = 1e4;
+var MAX_LEARNINGS_LENGTH = 1e5;
 function learningsFilePath(tmpdir3) {
   return join14(tmpdir3, LEARNINGS_FILE_NAME);
 }
@@ -153004,6 +153373,15 @@ async function seedLearningsFile(params) {
   await writeFile2(path3, params.current ?? "", "utf8");
   return path3;
 }
+var TRUNCATION_LINE_BOUNDARY_TOLERANCE = 4096;
+function truncateAtLineBoundary(body, cap) {
+  if (body.length <= cap) return body;
+  const head = body.slice(0, cap);
+  const lastNewline = head.lastIndexOf("\n");
+  if (lastNewline <= 0) return head;
+  if (cap - lastNewline > TRUNCATION_LINE_BOUNDARY_TOLERANCE) return head;
+  return head.slice(0, lastNewline);
+}
 async function readLearningsFile(path3) {
   let raw2;
   try {
@@ -153011,16 +153389,26 @@ async function readLearningsFile(path3) {
   } catch {
     return null;
   }
-  const trimmed = raw2.trim();
-  if (trimmed.length > MAX_LEARNINGS_LENGTH) return trimmed.slice(0, MAX_LEARNINGS_LENGTH);
-  return trimmed;
+  return truncateAtLineBoundary(raw2.trim(), MAX_LEARNINGS_LENGTH);
 }
 // utils/normalizeEnv.ts
-function maskValue(value2) {
-  if (value2 && typeof value2 === "string" && value2.trim().length > 0) {
-    console.log(`::add-mask::${value2}`);
+var core4 = __toESM(require_core(), 1);
+function sanitizeSecret(key, value2) {
+  const trimmed = value2.trim();
+  if (trimmed.length === 0) {
+    log.warning(
+      `\xBB ${key} is whitespace-only \u2014 leaving env var unchanged. check your secret value.`
+    );
+    return null;
+  }
+  if (trimmed !== value2) {
+    log.warning(
+      `\xBB stripped whitespace from ${key} (whitespace in secret values breaks GitHub Actions log masking)`
+    );
   }
+  core4.setSecret(trimmed);
+  return trimmed;
 }
 function normalizeEnv() {
   const upperKeys = /* @__PURE__ */ new Map();
@@ -153031,11 +153419,6 @@ function normalizeEnv() {
     upperKeys.set(upper2, existing);
   }
   for (const [upperKey, keys] of upperKeys) {
-    if (isSensitiveEnvName(upperKey)) {
-      for (const key of keys) {
-        maskValue(process.env[key]);
-      }
-    }
     if (keys.length === 1) {
       const key = keys[0];
       if (key !== upperKey) {
@@ -153058,10 +153441,17 @@ function normalizeEnv() {
     }
     process.env[upperKey] = preferredValue;
   }
+  for (const key of Object.keys(process.env)) {
+    if (!isSensitiveEnvName(key)) continue;
+    const value2 = process.env[key];
+    if (typeof value2 !== "string" || value2.length === 0) continue;
+    const sanitized = sanitizeSecret(key, value2);
+    if (sanitized !== null) process.env[key] = sanitized;
+  }
 }
 // utils/payload.ts
-var core4 = __toESM(require_core(), 1);
+var core5 = __toESM(require_core(), 1);
 import { isAbsolute as isAbsolute2, resolve as resolve2 } from "node:path";
 // utils/versioning.ts
@@ -153125,7 +153515,7 @@ function resolveCwd(cwd) {
   return workspace ? resolve2(workspace, cwd) : cwd;
 }
 function resolvePromptInput() {
-  const prompt = core4.getInput("prompt", { required: true });
+  const prompt = core5.getInput("prompt", { required: true });
   let parsed2;
   try {
     parsed2 = JSON.parse(prompt);
@@ -153141,11 +153531,11 @@ function resolvePromptInput() {
 }
 function resolveNonPromptInputs() {
   return Inputs.omit("prompt").assert({
-    model: core4.getInput("model") || void 0,
-    timeout: core4.getInput("timeout") || void 0,
-    cwd: core4.getInput("cwd") || void 0,
-    push: core4.getInput("push") || void 0,
-    shell: core4.getInput("shell") || void 0
+    model: core5.getInput("model") || void 0,
+    timeout: core5.getInput("timeout") || void 0,
+    cwd: core5.getInput("cwd") || void 0,
+    push: core5.getInput("push") || void 0,
+    shell: core5.getInput("shell") || void 0
   });
 }
 var isPullfrog = (actor) => {
@@ -153346,6 +153736,7 @@ var defaultSettings = {
   prApproveEnabled: false,
   modeInstructions: {},
   learnings: null,
+  learningsHeadings: [],
   envAllowlist: null
 };
 var defaultRunContext = {
@@ -153386,7 +153777,8 @@ async function fetchRunContext(params) {
         setupScript: data.settings?.setupScript ?? null,
         postCheckoutScript: data.settings?.postCheckoutScript ?? null,
         prepushScript: data.settings?.prepushScript ?? null,
-        stopScript: data.settings?.stopScript ?? null
+        stopScript: data.settings?.stopScript ?? null,
+        learningsHeadings: data.settings?.learningsHeadings ?? []
       },
       apiToken: data.apiToken,
       oss: data.oss ?? false,
@@ -153401,13 +153793,13 @@ async function fetchRunContext(params) {
 }
 // utils/runContextData.ts
-var core5 = __toESM(require_core(), 1);
+var core6 = __toESM(require_core(), 1);
 async function resolveRunContextData(params) {
   log.info(`\xBB running Pullfrog v${package_default.version}...`);
   const repoContext = parseRepoContext();
   let oidcToken;
   try {
-    oidcToken = await core5.getIDToken("pullfrog-api");
+    oidcToken = await core6.getIDToken("pullfrog-api");
   } catch {
   }
   const [repoResponse, runContext] = await Promise.all([
@@ -153710,7 +154102,7 @@ async function resolveRun(params) {
 // main.ts
 function resolveOutputSchema() {
-  const raw2 = core6.getInput("output_schema");
+  const raw2 = core7.getInput("output_schema");
   if (!raw2) return void 0;
   let parsed2;
   try {
@@ -153878,7 +154270,7 @@ async function buildProxyTokenHeaders(ctx) {
   if (ctx.oidcCredentials) {
     process.env.ACTIONS_ID_TOKEN_REQUEST_URL = ctx.oidcCredentials.requestUrl;
     process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = ctx.oidcCredentials.requestToken;
-    const oidcToken = await core6.getIDToken("pullfrog-api");
+    const oidcToken = await core7.getIDToken("pullfrog-api");
     delete process.env.ACTIONS_ID_TOKEN_REQUEST_URL;
     delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
     return { Authorization: `Bearer ${oidcToken}` };
@@ -153900,7 +154292,7 @@ async function resolveProxyModel(ctx) {
   const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials, repo: ctx.repo });
   if (!key) return;
   process.env.OPENROUTER_API_KEY = key;
-  core6.setSecret(key);
+  core7.setSecret(key);
   ctx.payload.proxyModel = ctx.proxyModel;
   const label = ctx.oss ? "oss" : "router";
   log.info(`\xBB proxy: ${label} \u2192 ${ctx.proxyModel}`);
@@ -154012,8 +154404,8 @@ async function main() {
     if (runContext.dbSecrets) {
       for (const [key, value2] of Object.entries(runContext.dbSecrets)) {
         if (!process.env[key]) {
-          process.env[key] = value2;
-          core6.setSecret(value2);
+          const sanitized = sanitizeSecret(key, value2);
+          if (sanitized !== null) process.env[key] = sanitized;
         }
       }
       const count = Object.keys(runContext.dbSecrets).length;
@@ -154152,10 +154544,7 @@ async function main() {
             current: runContext.repoSettings.learnings
           });
           toolState.learningsFilePath = learningsPath;
-          try {
-            toolState.learningsSeed = await readFile4(learningsPath, "utf8");
-          } catch {
-          }
+          toolState.learningsSeed = (runContext.repoSettings.learnings ?? "").trim();
           log.info(
             `\xBB learnings seeded at ${learningsPath} (existing=${runContext.repoSettings.learnings ? "yes" : "no"})`
           );
@@ -154195,7 +154584,8 @@ async function main() {
           modes: modes2,
           agentId,
           outputSchema,
-          learningsFilePath: toolState.learningsFilePath ?? null
+          learningsFilePath: toolState.learningsFilePath ?? null,
+          learningsHeadings: runContext.repoSettings.learningsHeadings
         });
         const logParts = [
           instructions.eventInstructions ? `EVENT-LEVEL INSTRUCTIONS:
@@ -154332,10 +154722,13 @@ ${instructions.user}` : null,
           await persistLearnings(toolContext);
         }
         if (!result.success && toolContext && toolState.progressComment) {
-          await reportErrorToComment({
-            toolState,
-            error: result.error || "agent run failed"
-          }).catch((error49) => {
+          const rawError = result.error || "agent run failed";
+          const errorBody = isApiKeyAuthError(rawError) ? formatApiKeyErrorSummary({
+            owner: runContext.repo.owner,
+            name: runContext.repo.name,
+            raw: rawError
+          }) : rawError;
+          await reportErrorToComment({ toolState, error: errorBody }).catch((error49) => {
             log.debug(`failure error report failed: ${error49}`);
           });
         }
@@ -154351,7 +154744,7 @@ ${instructions.user}` : null,
         }
         if (toolState.output) {
           log.info(`::pullfrog-output::${Buffer.from(toolState.output).toString("base64")}`);
-          core6.setOutput("result", toolState.output);
+          core7.setOutput("result", toolState.output);
         }
         return await handleAgentResult({
           result,
@@ -154371,8 +154764,13 @@ ${instructions.user}` : null,
       killTrackedChildren();
       log.error(errorMessage);
       const billingError = isRouterKeylimitExhaustedError(errorMessage) ? new BillingError(errorMessage, { code: "router_keylimit_exhausted" }) : null;
+      const apiKeyErrorSummary = !billingError && isApiKeyAuthError(errorMessage) ? formatApiKeyErrorSummary({
+        owner: runContext.repo.owner,
+        name: runContext.repo.name,
+        raw: errorMessage
+      }) : null;
       try {
-        const errorSummary = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : `### \u274C Pullfrog failed
+        const errorSummary = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : apiKeyErrorSummary ?? `### \u274C Pullfrog failed
 \`\`\`
 ${errorMessage}
@@ -154383,7 +154781,7 @@ ${errorMessage}
       } catch {
       }
       try {
-        const commentBody = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : errorMessage;
+        const commentBody = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : apiKeyErrorSummary ?? errorMessage;
         await reportErrorToComment({ toolState, error: commentBody });
       } catch {
       }