pullfrog 0.0.203 → 0.0.205

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -18198,7 +18198,7 @@ var require_summary = __commonJS({
18198
18198
  exports.summary = exports.markdownSummary = exports.SUMMARY_DOCS_URL = exports.SUMMARY_ENV_VAR = void 0;
18199
18199
  var os_1 = __require("os");
18200
18200
  var fs_1 = __require("fs");
18201
- var { access, appendFile, writeFile: writeFile2 } = fs_1.promises;
18201
+ var { access, appendFile, writeFile: writeFile3 } = fs_1.promises;
18202
18202
  exports.SUMMARY_ENV_VAR = "GITHUB_STEP_SUMMARY";
18203
18203
  exports.SUMMARY_DOCS_URL = "https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary";
18204
18204
  var Summary = class {
@@ -18256,7 +18256,7 @@ var require_summary = __commonJS({
18256
18256
  return __awaiter(this, void 0, void 0, function* () {
18257
18257
  const overwrite = !!(options === null || options === void 0 ? void 0 : options.overwrite);
18258
18258
  const filePath = yield this.filePath();
18259
- const writeFunc = overwrite ? writeFile2 : appendFile;
18259
+ const writeFunc = overwrite ? writeFile3 : appendFile;
18260
18260
  yield writeFunc(filePath, this._buffer, { encoding: "utf8" });
18261
18261
  return this.emptyBuffer();
18262
18262
  });
@@ -62662,8 +62662,8 @@ var require_snapshot_utils = __commonJS({
62662
62662
  var require_snapshot_recorder = __commonJS({
62663
62663
  "node_modules/.pnpm/undici@7.22.0/node_modules/undici/lib/mock/snapshot-recorder.js"(exports, module) {
62664
62664
  "use strict";
62665
- var { writeFile: writeFile2, readFile, mkdir } = __require("node:fs/promises");
62666
- var { dirname: dirname4, resolve: resolve3 } = __require("node:path");
62665
+ var { writeFile: writeFile3, readFile: readFile4, mkdir: mkdir2 } = __require("node:fs/promises");
62666
+ var { dirname: dirname5, resolve: resolve3 } = __require("node:path");
62667
62667
  var { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = __require("node:timers");
62668
62668
  var { InvalidArgumentError, UndiciError } = require_errors4();
62669
62669
  var { hashId, isUrlExcludedFactory, normalizeHeaders, createHeaderFilters } = require_snapshot_utils();
@@ -62864,7 +62864,7 @@ var require_snapshot_recorder = __commonJS({
62864
62864
  throw new InvalidArgumentError("Snapshot path is required");
62865
62865
  }
62866
62866
  try {
62867
- const data = await readFile(resolve3(path3), "utf8");
62867
+ const data = await readFile4(resolve3(path3), "utf8");
62868
62868
  const parsed2 = JSON.parse(data);
62869
62869
  if (Array.isArray(parsed2)) {
62870
62870
  this.#snapshots.clear();
@@ -62894,12 +62894,12 @@ var require_snapshot_recorder = __commonJS({
62894
62894
  throw new InvalidArgumentError("Snapshot path is required");
62895
62895
  }
62896
62896
  const resolvedPath = resolve3(path3);
62897
- await mkdir(dirname4(resolvedPath), { recursive: true });
62897
+ await mkdir2(dirname5(resolvedPath), { recursive: true });
62898
62898
  const data = Array.from(this.#snapshots.entries()).map(([hash2, snapshot2]) => ({
62899
62899
  hash: hash2,
62900
62900
  snapshot: snapshot2
62901
62901
  }));
62902
- await writeFile2(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
62902
+ await writeFile3(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
62903
62903
  }
62904
62904
  /**
62905
62905
  * Clears all recorded snapshots
@@ -97475,14 +97475,14 @@ var require_turndown_cjs = __commonJS({
97475
97475
  } else if (node2.nodeType === 1) {
97476
97476
  replacement = replacementForNode.call(self2, node2);
97477
97477
  }
97478
- return join16(output, replacement);
97478
+ return join17(output, replacement);
97479
97479
  }, "");
97480
97480
  }
97481
97481
  function postProcess(output) {
97482
97482
  var self2 = this;
97483
97483
  this.rules.forEach(function(rule) {
97484
97484
  if (typeof rule.append === "function") {
97485
- output = join16(output, rule.append(self2.options));
97485
+ output = join17(output, rule.append(self2.options));
97486
97486
  }
97487
97487
  });
97488
97488
  return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
@@ -97494,7 +97494,7 @@ var require_turndown_cjs = __commonJS({
97494
97494
  if (whitespace.leading || whitespace.trailing) content = content.trim();
97495
97495
  return whitespace.leading + rule.replacement(content, node2, this.options) + whitespace.trailing;
97496
97496
  }
97497
- function join16(output, replacement) {
97497
+ function join17(output, replacement) {
97498
97498
  var s1 = trimTrailingNewlines(output);
97499
97499
  var s2 = trimLeadingNewlines(replacement);
97500
97500
  var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
@@ -98926,7 +98926,8 @@ var require_fast_content_type_parse = __commonJS({
98926
98926
  // main.ts
98927
98927
  var core6 = __toESM(require_core(), 1);
98928
98928
  import { existsSync as existsSync7, readdirSync } from "node:fs";
98929
- import { join as join15 } from "node:path";
98929
+ import { readFile as readFile3 } from "node:fs/promises";
98930
+ import { join as join16 } from "node:path";
98930
98931
 
98931
98932
  // node_modules/.pnpm/@ark+util@0.56.0/node_modules/@ark/util/out/arrays.js
98932
98933
  var liftArray = (data) => Array.isArray(data) ? data : [data];
@@ -107422,7 +107423,7 @@ function buildCommitPrompt(status) {
107422
107423
  ].join("\n");
107423
107424
  }
107424
107425
  function hasPostRunIssues(issues) {
107425
- return issues.stopHook !== void 0 || issues.dirtyTree !== void 0;
107426
+ return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0;
107426
107427
  }
107427
107428
  var agent = (input) => {
107428
107429
  return {
@@ -107816,14 +107817,14 @@ var providers = {
107816
107817
  models: {
107817
107818
  grok: {
107818
107819
  displayName: "Grok",
107819
- resolve: "xai/grok-4",
107820
- openRouterResolve: "openrouter/x-ai/grok-4",
107820
+ resolve: "xai/grok-4.3",
107821
+ openRouterResolve: "openrouter/x-ai/grok-4.3",
107821
107822
  preferred: true
107822
107823
  },
107823
107824
  "grok-fast": {
107824
107825
  displayName: "Grok Fast",
107825
- resolve: "xai/grok-4-fast",
107826
- openRouterResolve: "openrouter/x-ai/grok-4-fast"
107826
+ resolve: "xai/grok-4-1-fast",
107827
+ openRouterResolve: "openrouter/x-ai/grok-4.1-fast"
107827
107828
  },
107828
107829
  "grok-code-fast": {
107829
107830
  displayName: "Grok Code Fast",
@@ -108030,8 +108031,8 @@ var providers = {
108030
108031
  },
108031
108032
  grok: {
108032
108033
  displayName: "Grok",
108033
- resolve: "openrouter/x-ai/grok-4",
108034
- openRouterResolve: "openrouter/x-ai/grok-4"
108034
+ resolve: "openrouter/x-ai/grok-4.3",
108035
+ openRouterResolve: "openrouter/x-ai/grok-4.3"
108035
108036
  },
108036
108037
  "deepseek-pro": {
108037
108038
  displayName: "DeepSeek Pro",
@@ -108217,7 +108218,8 @@ var STRING_KEYS = [
108217
108218
  "issueNodeId",
108218
108219
  "reviewNodeId",
108219
108220
  "planCommentNodeId",
108220
- "summaryCommentNodeId"
108221
+ "summaryCommentNodeId",
108222
+ "summarySnapshot"
108221
108223
  ];
108222
108224
  var NUMBER_KEYS = [
108223
108225
  "inputTokens",
@@ -108300,6 +108302,93 @@ function aggregateUsage(entries) {
108300
108302
  return out;
108301
108303
  }
108302
108304
 
108305
+ // utils/progressComment.ts
108306
+ function parseProgressComment(raw2) {
108307
+ if (!raw2?.id) return void 0;
108308
+ const id = parseInt(raw2.id, 10);
108309
+ if (Number.isNaN(id) || id <= 0) return void 0;
108310
+ return { id, type: raw2.type };
108311
+ }
108312
+ async function updateProgressComment(ctx, comment, body) {
108313
+ const result = await (comment.type === "review" ? ctx.octokit.rest.pulls.updateReviewComment({
108314
+ owner: ctx.owner,
108315
+ repo: ctx.repo,
108316
+ comment_id: comment.id,
108317
+ body
108318
+ }) : ctx.octokit.rest.issues.updateComment({
108319
+ owner: ctx.owner,
108320
+ repo: ctx.repo,
108321
+ comment_id: comment.id,
108322
+ body
108323
+ }));
108324
+ return {
108325
+ id: result.data.id,
108326
+ body: result.data.body ?? void 0,
108327
+ html_url: result.data.html_url,
108328
+ node_id: result.data.node_id
108329
+ };
108330
+ }
108331
+ async function deleteProgressCommentApi(ctx, comment) {
108332
+ if (comment.type === "review") {
108333
+ await ctx.octokit.rest.pulls.deleteReviewComment({
108334
+ owner: ctx.owner,
108335
+ repo: ctx.repo,
108336
+ comment_id: comment.id
108337
+ });
108338
+ return;
108339
+ }
108340
+ await ctx.octokit.rest.issues.deleteComment({
108341
+ owner: ctx.owner,
108342
+ repo: ctx.repo,
108343
+ comment_id: comment.id
108344
+ });
108345
+ }
108346
+ async function createLeapingProgressComment(ctx, target, body) {
108347
+ if (target.kind === "reviewReply") {
108348
+ try {
108349
+ const result2 = await ctx.octokit.rest.pulls.createReplyForReviewComment({
108350
+ owner: ctx.owner,
108351
+ repo: ctx.repo,
108352
+ pull_number: target.pullNumber,
108353
+ comment_id: target.replyToCommentId,
108354
+ body
108355
+ });
108356
+ return {
108357
+ comment: { id: result2.data.id, type: "review" },
108358
+ body: result2.data.body ?? void 0,
108359
+ html_url: result2.data.html_url
108360
+ };
108361
+ } catch (error49) {
108362
+ console.warn(
108363
+ `[progressComment] review reply failed (parent ${target.replyToCommentId} on PR #${target.pullNumber}), falling back to issue comment:`,
108364
+ error49
108365
+ );
108366
+ const fallback = await ctx.octokit.rest.issues.createComment({
108367
+ owner: ctx.owner,
108368
+ repo: ctx.repo,
108369
+ issue_number: target.pullNumber,
108370
+ body
108371
+ });
108372
+ return {
108373
+ comment: { id: fallback.data.id, type: "issue" },
108374
+ body: fallback.data.body ?? void 0,
108375
+ html_url: fallback.data.html_url
108376
+ };
108377
+ }
108378
+ }
108379
+ const result = await ctx.octokit.rest.issues.createComment({
108380
+ owner: ctx.owner,
108381
+ repo: ctx.repo,
108382
+ issue_number: target.issueNumber,
108383
+ body
108384
+ });
108385
+ return {
108386
+ comment: { id: result.data.id, type: "issue" },
108387
+ body: result.data.body ?? void 0,
108388
+ html_url: result.data.html_url
108389
+ };
108390
+ }
108391
+
108303
108392
  // node_modules/.pnpm/@toon-format+toon@1.4.0/node_modules/@toon-format/toon/dist/index.mjs
108304
108393
  var LIST_ITEM_MARKER = "-";
108305
108394
  var LIST_ITEM_PREFIX = "- ";
@@ -108850,43 +108939,22 @@ function addFooter(ctx, body) {
108850
108939
  var Comment = type({
108851
108940
  issueNumber: type.number.describe("the issue number to comment on"),
108852
108941
  body: type.string.describe("the comment body content"),
108853
- type: type.enumerated("Plan", "Summary", "Comment").describe(
108854
- "Plan: record as the plan for this run. Summary: record as the PR summary comment (one per PR, updated in place). Comment: regular comment (default)."
108855
- ).optional()
108942
+ type: type.enumerated("Plan", "Comment").describe("Plan: record as the plan for this run. Comment: regular comment (default).").optional()
108856
108943
  });
108857
108944
  function CreateCommentTool(ctx) {
108858
108945
  return tool({
108859
108946
  name: "create_issue_comment",
108860
- description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments, type: 'Summary' for PR summary comments.",
108947
+ description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments.",
108861
108948
  parameters: Comment,
108862
108949
  execute: execute(async ({ issueNumber, body, type: commentType }) => {
108863
108950
  const bodyWithFooter = addFooter(ctx, body);
108864
- if (commentType === "Summary" && ctx.toolState.existingSummaryCommentId) {
108865
- log.info(
108866
- `\xBB redirecting create_issue_comment(Summary) to update existing comment ${ctx.toolState.existingSummaryCommentId}`
108867
- );
108868
- const result2 = await ctx.octokit.rest.issues.updateComment({
108869
- owner: ctx.repo.owner,
108870
- repo: ctx.repo.name,
108871
- comment_id: ctx.toolState.existingSummaryCommentId,
108872
- body: bodyWithFooter
108873
- });
108874
- if (result2.data.node_id) {
108875
- await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result2.data.node_id });
108876
- }
108877
- return {
108878
- success: true,
108879
- commentId: result2.data.id,
108880
- url: result2.data.html_url,
108881
- body: result2.data.body
108882
- };
108883
- }
108884
108951
  const result = await ctx.octokit.rest.issues.createComment({
108885
108952
  owner: ctx.repo.owner,
108886
108953
  repo: ctx.repo.name,
108887
108954
  issue_number: issueNumber,
108888
108955
  body: bodyWithFooter
108889
108956
  });
108957
+ ctx.toolState.wasUpdated = true;
108890
108958
  if (commentType === "Plan") {
108891
108959
  if (result.data.node_id) {
108892
108960
  await patchWorkflowRunFields(ctx, { planCommentNodeId: result.data.node_id });
@@ -108907,9 +108975,6 @@ function CreateCommentTool(ctx) {
108907
108975
  body: updateResult.data.body
108908
108976
  };
108909
108977
  }
108910
- if (commentType === "Summary" && result.data.node_id) {
108911
- await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result.data.node_id });
108912
- }
108913
108978
  return {
108914
108979
  success: true,
108915
108980
  commentId: result.data.id,
@@ -108960,6 +109025,7 @@ async function reportProgress(ctx, params) {
108960
109025
  }
108961
109026
  const issueNumber = ctx.payload.event.issue_number ?? ctx.toolState.issueNumber;
108962
109027
  const isPlanMode = ctx.toolState.selectedMode === "Plan";
109028
+ const apiCtx = { octokit: ctx.octokit, owner: ctx.repo.owner, repo: ctx.repo.name };
108963
109029
  if (target_plan_comment === true && ctx.toolState.existingPlanCommentId === void 0) {
108964
109030
  log.warning("target_plan_comment requested but no existingPlanCommentId in tool state");
108965
109031
  }
@@ -108969,86 +109035,74 @@ async function reportProgress(ctx, params) {
108969
109035
  const bodyWithoutFooter = stripExistingFooter(body);
108970
109036
  const footer = buildCommentFooter(ctx, customParts);
108971
109037
  const bodyWithFooter = `${bodyWithoutFooter}${footer}`;
108972
- const result2 = await ctx.octokit.rest.issues.updateComment({
108973
- owner: ctx.repo.owner,
108974
- repo: ctx.repo.name,
108975
- comment_id: commentId,
108976
- body: bodyWithFooter
108977
- });
109038
+ const result = await updateProgressComment(
109039
+ apiCtx,
109040
+ { id: commentId, type: "issue" },
109041
+ bodyWithFooter
109042
+ );
108978
109043
  ctx.toolState.wasUpdated = true;
108979
- if (isPlanMode && result2.data.node_id) {
108980
- await patchWorkflowRunFields(ctx, { planCommentNodeId: result2.data.node_id });
109044
+ if (isPlanMode && result.node_id) {
109045
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: result.node_id });
108981
109046
  }
108982
109047
  return {
108983
- commentId: result2.data.id,
108984
- url: result2.data.html_url,
108985
- body: result2.data.body || "",
109048
+ commentId: result.id,
109049
+ url: result.html_url,
109050
+ body: result.body || "",
108986
109051
  action: "updated"
108987
109052
  };
108988
109053
  }
108989
- const existingCommentId = ctx.toolState.progressCommentId;
108990
- if (existingCommentId) {
108991
- const customParts = isPlanMode && issueNumber !== void 0 ? [buildImplementPlanLink(ctx, issueNumber, existingCommentId)] : void 0;
109054
+ const existingComment = ctx.toolState.progressComment;
109055
+ if (existingComment) {
109056
+ const customParts = isPlanMode && issueNumber !== void 0 ? [buildImplementPlanLink(ctx, issueNumber, existingComment.id)] : void 0;
108992
109057
  const bodyWithoutFooter = stripExistingFooter(body);
108993
109058
  const footer = buildCommentFooter(ctx, customParts);
108994
109059
  const bodyWithFooter = `${bodyWithoutFooter}${footer}`;
108995
- const result2 = await ctx.octokit.rest.issues.updateComment({
108996
- owner: ctx.repo.owner,
108997
- repo: ctx.repo.name,
108998
- comment_id: existingCommentId,
108999
- body: bodyWithFooter
109000
- });
109060
+ const result = await updateProgressComment(apiCtx, existingComment, bodyWithFooter);
109001
109061
  ctx.toolState.wasUpdated = true;
109002
- if (isPlanMode && result2.data.node_id) {
109003
- await patchWorkflowRunFields(ctx, { planCommentNodeId: result2.data.node_id });
109062
+ if (isPlanMode && result.node_id) {
109063
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: result.node_id });
109004
109064
  }
109005
109065
  return {
109006
- commentId: result2.data.id,
109007
- url: result2.data.html_url,
109008
- body: result2.data.body || "",
109066
+ commentId: result.id,
109067
+ url: result.html_url,
109068
+ body: result.body || "",
109009
109069
  action: "updated"
109010
109070
  };
109011
109071
  }
109012
- if (existingCommentId === null) {
109072
+ if (existingComment === null) {
109013
109073
  return { body, action: "skipped" };
109014
109074
  }
109015
109075
  if (issueNumber === void 0) {
109016
109076
  return { body, action: "skipped" };
109017
109077
  }
109018
109078
  const initialBody = addFooter(ctx, body);
109019
- const result = await ctx.octokit.rest.issues.createComment({
109020
- owner: ctx.repo.owner,
109021
- repo: ctx.repo.name,
109022
- issue_number: issueNumber,
109023
- body: initialBody
109024
- });
109025
- ctx.toolState.progressCommentId = result.data.id;
109079
+ const created = await createLeapingProgressComment(
109080
+ apiCtx,
109081
+ { kind: "issue", issueNumber },
109082
+ initialBody
109083
+ );
109084
+ ctx.toolState.progressComment = created.comment;
109026
109085
  ctx.toolState.wasUpdated = true;
109027
109086
  if (isPlanMode) {
109028
- const customParts = [buildImplementPlanLink(ctx, issueNumber, result.data.id)];
109087
+ const customParts = [buildImplementPlanLink(ctx, issueNumber, created.comment.id)];
109029
109088
  const bodyWithoutFooter = stripExistingFooter(body);
109030
109089
  const footer = buildCommentFooter(ctx, customParts);
109031
109090
  const bodyWithPlanLink = `${bodyWithoutFooter}${footer}`;
109032
- const updateResult = await ctx.octokit.rest.issues.updateComment({
109033
- owner: ctx.repo.owner,
109034
- repo: ctx.repo.name,
109035
- comment_id: result.data.id,
109036
- body: bodyWithPlanLink
109037
- });
109038
- if (updateResult.data.node_id) {
109039
- await patchWorkflowRunFields(ctx, { planCommentNodeId: updateResult.data.node_id });
109091
+ const updateResult = await updateProgressComment(apiCtx, created.comment, bodyWithPlanLink);
109092
+ if (updateResult.node_id) {
109093
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: updateResult.node_id });
109040
109094
  }
109041
109095
  return {
109042
- commentId: updateResult.data.id,
109043
- url: updateResult.data.html_url,
109044
- body: updateResult.data.body || "",
109096
+ commentId: updateResult.id,
109097
+ url: updateResult.html_url,
109098
+ body: updateResult.body || "",
109045
109099
  action: "created"
109046
109100
  };
109047
109101
  }
109048
109102
  return {
109049
- commentId: result.data.id,
109050
- url: result.data.html_url,
109051
- body: result.data.body || "",
109103
+ commentId: created.comment.id,
109104
+ url: created.html_url,
109105
+ body: created.body || "",
109052
109106
  action: "created"
109053
109107
  };
109054
109108
  }
@@ -109076,15 +109130,15 @@ ${collapsible}`;
109076
109130
  reportParams.target_plan_comment = params.target_plan_comment;
109077
109131
  }
109078
109132
  const result = await reportProgress(ctx, reportParams);
109079
- if (!params.target_plan_comment) {
109080
- ctx.toolState.finalSummaryWritten = true;
109081
- }
109082
109133
  if (result.action === "skipped") {
109083
109134
  return {
109084
109135
  success: true,
109085
109136
  message: "progress recorded (no GitHub comment created - this may occur for workflow_dispatch events or when there is no associated issue/PR)"
109086
109137
  };
109087
109138
  }
109139
+ if (!params.target_plan_comment) {
109140
+ ctx.toolState.finalSummaryWritten = true;
109141
+ }
109088
109142
  return {
109089
109143
  success: true,
109090
109144
  ...result
@@ -109093,23 +109147,22 @@ ${collapsible}`;
109093
109147
  });
109094
109148
  }
109095
109149
  async function deleteProgressComment(ctx) {
109096
- const existingCommentId = ctx.toolState.progressCommentId;
109097
- if (!existingCommentId) {
109150
+ const existing = ctx.toolState.progressComment;
109151
+ if (!existing) {
109098
109152
  return false;
109099
109153
  }
109100
109154
  try {
109101
- await ctx.octokit.rest.issues.deleteComment({
109102
- owner: ctx.repo.owner,
109103
- repo: ctx.repo.name,
109104
- comment_id: existingCommentId
109105
- });
109155
+ await deleteProgressCommentApi(
109156
+ { octokit: ctx.octokit, owner: ctx.repo.owner, repo: ctx.repo.name },
109157
+ existing
109158
+ );
109106
109159
  } catch (error49) {
109107
109160
  if (error49 instanceof Error && error49.message.includes("Not Found")) {
109108
109161
  } else {
109109
109162
  throw error49;
109110
109163
  }
109111
109164
  }
109112
- ctx.toolState.progressCommentId = null;
109165
+ ctx.toolState.progressComment = null;
109113
109166
  return true;
109114
109167
  }
109115
109168
  var ReplyToReviewComment = type({
@@ -142190,7 +142243,7 @@ var import_semver = __toESM(require_semver2(), 1);
142190
142243
  // package.json
142191
142244
  var package_default = {
142192
142245
  name: "pullfrog",
142193
- version: "0.0.203",
142246
+ version: "0.0.205",
142194
142247
  type: "module",
142195
142248
  bin: {
142196
142249
  pullfrog: "dist/cli.mjs",
@@ -142387,7 +142440,7 @@ function closeBrowserDaemon(toolState) {
142387
142440
 
142388
142441
  // mcp/checkout.ts
142389
142442
  import { createHash as createHash2 } from "node:crypto";
142390
- import { writeFileSync } from "node:fs";
142443
+ import { statSync, unlinkSync as unlinkSync2, writeFileSync } from "node:fs";
142391
142444
  import { join as join3 } from "node:path";
142392
142445
 
142393
142446
  // utils/diffCoverage.ts
@@ -142416,7 +142469,10 @@ function createDiffCoverageState(params) {
142416
142469
  totalLines: params.totalLines,
142417
142470
  tocEntries: parseDiffTocEntries({ toc: params.toc }),
142418
142471
  coveredRanges: [],
142419
- coveragePreflightRan: false
142472
+ // carry forward across checkout_pr refreshes so the nudge stays "once per
142473
+ // review session". coveredRanges are intentionally not carried because
142474
+ // line numbers are tied to the previous diff's content.
142475
+ coveragePreflightRan: params.previous?.coveragePreflightRan ?? false
142420
142476
  };
142421
142477
  }
142422
142478
  function recordDiffReadFromToolUse(params) {
@@ -142727,8 +142783,13 @@ async function $git(subcommand, args2, options) {
142727
142783
  }
142728
142784
  if (result.exitCode !== 0) {
142729
142785
  const stderr = result.stderr.trim();
142730
- log.info(`git ${subcommand} failed: ${stderr}`);
142731
- throw new Error(`git ${subcommand} failed: ${stderr}`);
142786
+ const stdout = result.stdout.trim();
142787
+ const detail = stderr && stdout ? `${stderr}
142788
+ --- stdout ---
142789
+ ${stdout}` : stderr || stdout || "(no output)";
142790
+ const message = `git ${subcommand} failed (exit ${result.exitCode}): ${detail}`;
142791
+ log.info(message);
142792
+ throw new Error(message);
142732
142793
  }
142733
142794
  return {
142734
142795
  stdout: result.stdout.trim(),
@@ -143005,6 +143066,34 @@ var PushBranch = type({
143005
143066
  branchName: type.string.describe("The branch name to push (defaults to current branch)").optional(),
143006
143067
  force: type.boolean.describe("Force push (use with caution)").default(false)
143007
143068
  });
143069
+ var CONCURRENT_PUSH_PATTERNS = ["fetch first", "non-fast-forward", "cannot lock ref"];
143070
+ var TRANSIENT_PATTERNS = [
143071
+ /RPC failed/i,
143072
+ /early EOF/,
143073
+ /the remote end hung up unexpectedly/,
143074
+ /Connection reset/i,
143075
+ /Could not resolve host/i,
143076
+ /Operation timed out/i,
143077
+ /HTTP\/2 stream \d+ was not closed cleanly/i,
143078
+ /unexpected disconnect while reading sideband packet/i,
143079
+ // libcurl HTTP 5xx surfaced by git over https. matches both the
143080
+ // libcurl-style "The requested URL returned error: 502" and the more
143081
+ // recent "HTTP 502" wording. most 4xx is intentionally excluded —
143082
+ // 401/403/404 indicate auth/permission problems that are not
143083
+ // retry-safe — but 429 (rate-limited / abuse detection) IS retry-safe
143084
+ // and GitHub occasionally surfaces it on git push, so it's included
143085
+ // explicitly below.
143086
+ /HTTP 5\d\d/,
143087
+ /returned error: 5\d\d/i,
143088
+ /HTTP 429/,
143089
+ /returned error: 429/i
143090
+ ];
143091
+ function classifyPushError(msg) {
143092
+ if (CONCURRENT_PUSH_PATTERNS.some((p) => msg.includes(p))) return "concurrent-push";
143093
+ if (TRANSIENT_PATTERNS.some((p) => p.test(msg))) return "transient";
143094
+ return "unknown";
143095
+ }
143096
+ var TRANSIENT_RETRY_DELAYS_MS = [2e3, 5e3];
143008
143097
  function PushBranchTool(ctx) {
143009
143098
  const defaultBranch = ctx.repo.data.default_branch || "main";
143010
143099
  const pushPermission = ctx.payload.push;
@@ -143055,25 +143144,48 @@ ${postHookStatus}`
143055
143144
  if (force) {
143056
143145
  log.warning(`force pushing - this will overwrite remote history`);
143057
143146
  }
143058
- try {
143059
- await $git("push", pushArgs, {
143060
- token: ctx.gitToken
143061
- });
143062
- } catch (err) {
143063
- const msg = err instanceof Error ? err.message : String(err);
143064
- if (msg.includes("fetch first") || msg.includes("non-fast-forward")) {
143065
- const integrateStep = ctx.payload.shell === "disabled" ? `2. use the git tool to merge the remote branch into yours: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] })` : `2. use the git tool to rebase or merge your changes on top: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] }) (or 'rebase')`;
143066
- throw new Error(
143067
- `push rejected: the remote branch '${pushDest.remoteBranch}' has new commits you don't have locally.
143147
+ let lastErr;
143148
+ let pushed = false;
143149
+ for (let attempt = 0; attempt <= TRANSIENT_RETRY_DELAYS_MS.length; attempt++) {
143150
+ try {
143151
+ await $git("push", pushArgs, {
143152
+ token: ctx.gitToken
143153
+ });
143154
+ if (attempt > 0) {
143155
+ log.info(`push succeeded on attempt ${attempt + 1}`);
143156
+ }
143157
+ pushed = true;
143158
+ break;
143159
+ } catch (err) {
143160
+ lastErr = err;
143161
+ const msg = err instanceof Error ? err.message : String(err);
143162
+ const kind = classifyPushError(msg);
143163
+ if (kind === "concurrent-push") {
143164
+ const integrateStep = ctx.payload.shell === "disabled" ? `2. use the git tool to merge the remote branch into yours: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] })` : `2. use the git tool to rebase or merge your changes on top: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] }) (or 'rebase')`;
143165
+ throw new Error(
143166
+ `push rejected: the remote branch '${pushDest.remoteBranch}' has new commits you don't have locally (often a concurrent push to the same branch).
143068
143167
 
143069
143168
  to resolve this:
143070
143169
  1. use git_fetch to fetch the remote branch: git_fetch({ ref: "${pushDest.remoteBranch}" })
143071
143170
  ${integrateStep}
143072
143171
  3. resolve any merge conflicts if needed
143073
143172
  4. retry push_branch`
143074
- );
143173
+ );
143174
+ }
143175
+ if (kind === "transient" && attempt < TRANSIENT_RETRY_DELAYS_MS.length) {
143176
+ const baseDelay = TRANSIENT_RETRY_DELAYS_MS[attempt] ?? 5e3;
143177
+ const delay2 = Math.round(baseDelay * (0.75 + Math.random() * 0.5));
143178
+ log.info(
143179
+ `push attempt ${attempt + 1} failed (transient), retrying in ${delay2}ms: ${msg.slice(0, 300)}`
143180
+ );
143181
+ await new Promise((r) => setTimeout(r, delay2));
143182
+ continue;
143183
+ }
143184
+ throw err;
143075
143185
  }
143076
- throw err;
143186
+ }
143187
+ if (!pushed) {
143188
+ throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
143077
143189
  }
143078
143190
  return {
143079
143191
  success: true,
@@ -143164,6 +143276,11 @@ var GitFetch = type({
143164
143276
  ref: type.string.describe("Ref to fetch: branch name, tag, or 'pull/N/head' for PRs"),
143165
143277
  depth: type.number.describe("Fetch depth (for shallow clones)").optional()
143166
143278
  });
143279
+ var SHALLOW_UNREACHABLE_PATTERNS = [
143280
+ /Could not read [a-f0-9]{40,64}/,
143281
+ /remote did not send all necessary objects/
143282
+ ];
143283
+ var DEEPEN_RETRY_DEPTH = 1e3;
143167
143284
  function GitFetchTool(ctx) {
143168
143285
  return tool({
143169
143286
  name: "git_fetch",
@@ -143175,9 +143292,20 @@ function GitFetchTool(ctx) {
143175
143292
  if (params.depth !== void 0) {
143176
143293
  fetchArgs.push(`--depth=${params.depth}`);
143177
143294
  }
143178
- await $git("fetch", fetchArgs, {
143179
- token: ctx.gitToken
143180
- });
143295
+ try {
143296
+ await $git("fetch", fetchArgs, { token: ctx.gitToken });
143297
+ } catch (err) {
143298
+ const msg = err instanceof Error ? err.message : String(err);
143299
+ const isShallowUnreachable = SHALLOW_UNREACHABLE_PATTERNS.some((p) => p.test(msg));
143300
+ const isShallow = isShallowUnreachable && $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
143301
+ if (!isShallow) throw err;
143302
+ log.info(
143303
+ `\xBB git_fetch hit shallow-unreachable error, retrying with --deepen=${DEEPEN_RETRY_DEPTH}`
143304
+ );
143305
+ await $git("fetch", [`--deepen=${DEEPEN_RETRY_DEPTH}`, "--no-tags", "origin", params.ref], {
143306
+ token: ctx.gitToken
143307
+ });
143308
+ }
143181
143309
  return { success: true, ref: params.ref };
143182
143310
  })
143183
143311
  });
@@ -143542,6 +143670,7 @@ function CreatePullRequestReviewTool(ctx) {
143542
143670
  nodeId: reviewNodeId,
143543
143671
  reviewedSha: actuallyReviewedSha
143544
143672
  };
143673
+ ctx.toolState.wasUpdated = true;
143545
143674
  await deleteProgressComment(ctx).catch((err) => {
143546
143675
  log.debug(`progress comment cleanup after review failed: ${err}`);
143547
143676
  });
@@ -143893,11 +144022,38 @@ async function ensureBeforeShaReachable(params) {
143893
144022
  return false;
143894
144023
  }
143895
144024
  }
144025
+ var STALE_LOCK_AGE_MS = 3e4;
144026
+ var GIT_LOCK_PATHS = [
144027
+ ".git/shallow.lock",
144028
+ ".git/index.lock",
144029
+ ".git/objects/maintenance.lock"
144030
+ ];
144031
+ function cleanupStaleGitLocks() {
144032
+ const now = Date.now();
144033
+ for (const relPath of GIT_LOCK_PATHS) {
144034
+ let mtimeMs;
144035
+ try {
144036
+ mtimeMs = statSync(relPath).mtimeMs;
144037
+ } catch {
144038
+ continue;
144039
+ }
144040
+ if (now - mtimeMs < STALE_LOCK_AGE_MS) continue;
144041
+ try {
144042
+ unlinkSync2(relPath);
144043
+ log.warning(`\xBB removed stale ${relPath} from prior run`);
144044
+ } catch (e) {
144045
+ log.debug(
144046
+ `\xBB failed to remove stale ${relPath}: ${e instanceof Error ? e.message : String(e)}`
144047
+ );
144048
+ }
144049
+ }
144050
+ }
143896
144051
  async function checkoutPrBranch(pr, params) {
143897
144052
  const { octokit, owner, name, gitToken, toolState, beforeSha } = params;
143898
144053
  log.info(`\xBB checking out PR #${pr.number}...`);
143899
144054
  rejectIfLeadingDash(pr.baseRef, "PR base ref");
143900
144055
  rejectIfLeadingDash(pr.headRef, "PR head ref");
144056
+ cleanupStaleGitLocks();
143901
144057
  const isFork = pr.headRepoFullName !== pr.baseRepoFullName;
143902
144058
  const localBranch = `pr-${pr.number}`;
143903
144059
  const isShallow = $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
@@ -144067,7 +144223,8 @@ ${diffPreview}`);
144067
144223
  ctx.toolState.diffCoverage = createDiffCoverageState({
144068
144224
  diffPath,
144069
144225
  totalLines: countLines({ content: formatResult.content }),
144070
- toc: formatResult.toc
144226
+ toc: formatResult.toc,
144227
+ previous: ctx.toolState.diffCoverage
144071
144228
  });
144072
144229
  log.debug(
144073
144230
  `\xBB diff coverage initialized: diffPath=${diffPath}, totalLines=${ctx.toolState.diffCoverage.totalLines}, tocEntries=${ctx.toolState.diffCoverage.tocEntries.length}`
@@ -144697,6 +144854,7 @@ function UpdatePullRequestBodyTool(ctx) {
144697
144854
  pull_number: params.pull_number,
144698
144855
  body: bodyWithFooter
144699
144856
  });
144857
+ ctx.toolState.wasUpdated = true;
144700
144858
  return {
144701
144859
  success: true,
144702
144860
  number: result.data.number,
@@ -145127,35 +145285,20 @@ async function getReviewThreads(input) {
145127
145285
  const username = input.approvedBy;
145128
145286
  return threadsForReview.filter((thread) => threadHasThumbsUpFrom(thread, username));
145129
145287
  }
145130
- async function getReviewData(input) {
145131
- const [review, threads] = await Promise.all([
145132
- input.octokit.rest.pulls.getReview({
145133
- owner: input.owner,
145134
- repo: input.name,
145135
- pull_number: input.pullNumber,
145136
- review_id: input.reviewId
145137
- }),
145138
- getReviewThreads(input)
145139
- ]);
145140
- const rawReviewBody = review.data.body;
145288
+ function formatReviewData(input) {
145289
+ const rawReviewBody = input.review.body;
145141
145290
  const reviewBody = rawReviewBody ? stripExistingFooter(rawReviewBody) : "";
145142
- const reviewer = review.data.user?.login ?? "unknown";
145143
- if (threads.length === 0 && !reviewBody) return void 0;
145291
+ const reviewer = input.review.user?.login ?? "unknown";
145292
+ if (input.threads.length === 0 && !reviewBody) return void 0;
145144
145293
  let threadBlocks = [];
145145
- if (threads.length > 0) {
145146
- const prFiles = await input.octokit.paginate(input.octokit.rest.pulls.listFiles, {
145147
- owner: input.owner,
145148
- repo: input.name,
145149
- pull_number: input.pullNumber,
145150
- per_page: 100
145151
- });
145294
+ if (input.threads.length > 0) {
145152
145295
  const filePatchMap = /* @__PURE__ */ new Map();
145153
- for (const file2 of prFiles) {
145296
+ for (const file2 of input.prFiles) {
145154
145297
  if (file2.patch) {
145155
145298
  filePatchMap.set(file2.filename, parseFilePatches(file2.patch));
145156
145299
  }
145157
145300
  }
145158
- threadBlocks = buildThreadBlocks(threads, filePatchMap, input.reviewId);
145301
+ threadBlocks = buildThreadBlocks(input.threads, filePatchMap, input.reviewId);
145159
145302
  }
145160
145303
  const formatted = formatReviewThreads(threadBlocks, {
145161
145304
  pullNumber: input.pullNumber,
@@ -145165,6 +145308,30 @@ async function getReviewData(input) {
145165
145308
  });
145166
145309
  return { threadBlocks, reviewer, formatted };
145167
145310
  }
145311
+ async function getReviewData(input) {
145312
+ const [review, threads] = await Promise.all([
145313
+ input.octokit.rest.pulls.getReview({
145314
+ owner: input.owner,
145315
+ repo: input.name,
145316
+ pull_number: input.pullNumber,
145317
+ review_id: input.reviewId
145318
+ }),
145319
+ getReviewThreads(input)
145320
+ ]);
145321
+ const prFiles = threads.length > 0 ? await input.octokit.paginate(input.octokit.rest.pulls.listFiles, {
145322
+ owner: input.owner,
145323
+ repo: input.name,
145324
+ pull_number: input.pullNumber,
145325
+ per_page: 100
145326
+ }) : [];
145327
+ return formatReviewData({
145328
+ review: review.data,
145329
+ threads,
145330
+ prFiles,
145331
+ pullNumber: input.pullNumber,
145332
+ reviewId: input.reviewId
145333
+ });
145334
+ }
145168
145335
  function GetReviewCommentsTool(ctx) {
145169
145336
  return tool({
145170
145337
  name: "get_review_comments",
@@ -145288,425 +145455,10 @@ function ResolveReviewThreadTool(ctx) {
145288
145455
  });
145289
145456
  }
145290
145457
 
145291
- // agents/reviewer.ts
145292
- var REVIEWER_AGENT_NAME = "reviewfrog";
145293
- var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
145294
-
145295
- HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
145296
- - Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
145297
- - Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
145298
- - Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
145299
- - Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
145300
-
145301
- Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
145302
-
145303
- // modes.ts
145304
- var PR_SUMMARY_FORMAT = `### Default format
145305
-
145306
- Follow this structure exactly:
145307
-
145308
- <b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
145309
- NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
145310
-
145311
- ### Key changes
145312
-
145313
- - **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
145314
-
145315
- <sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
145316
- NOTE: the metadata line goes AFTER the bullet list, not before it.
145317
-
145318
- Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
145319
-
145320
- <br/>
145321
-
145322
- ## Example readable section title
145323
-
145324
- > **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
145325
- IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
145326
-
145327
- 1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
145328
-
145329
- If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
145330
- > <details><summary>How does X work?</summary>
145331
- > Extended explanation here.
145332
- > </details>
145333
-
145334
- End each section with a file links trail (3-4 key files max):
145335
- [\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
145336
-
145337
- Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
145338
-
145339
- CRITICAL \u2014 GitHub markdown rendering rule:
145340
- GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
145341
-
145342
- Rules:
145343
- - \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
145344
- - ALL variable names, identifiers, and file names in body text must be in backticks
145345
- - ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
145346
- - Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
145347
- - Do NOT include raw diff stats like '+123 / -45' or line counts
145348
- - Do NOT include code blocks or repeat diff contents
145349
- - Do NOT include a changelog section \u2014 the key changes list serves this purpose
145350
- - Focus on *intent*, not *what* \u2014 the diff already shows what changed
145351
- - Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
145352
- function learningsStep(t, n) {
145353
- return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
145354
- }
145355
- function computeModes(agentId) {
145356
- const t = (toolName) => formatMcpToolRef(agentId, toolName);
145357
- return [
145358
- {
145359
- name: "Build",
145360
- description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
145361
- prompt: `### Checklist
145362
-
145363
- 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
145364
-
145365
- 2. **setup**: checkout or create the branch:
145366
- - **PR event, modifying the existing PR**: call \`${t("checkout_pr")}\`
145367
- - **new branch**: use \`${t("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
145368
-
145369
- 3. **build**: implement changes using your native file and shell tools:
145370
- - follow the plan (if you ran a plan phase)
145371
- - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
145372
- - run relevant tests/lints before committing
145373
-
145374
- 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
145375
-
145376
- Skip self-review (commit directly) when the diff is **genuinely trivial**:
145377
- - doc typos, comment-only edits, whitespace/format-only, import reordering
145378
- - lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
145379
- - low-risk dep patch bump from a trusted source
145380
-
145381
- Run self-review when the diff has **any behavioral surface, however small**:
145382
- - 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
145383
- - any change to money / tax / currency / billing / fee / refund / payout calculations or constants
145384
- - any change to auth / permissions / roles / sessions / tokens / signature verification
145385
- - any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
145386
- - new endpoints, new code paths, new error branches \u2014 even small ones
145387
- - mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
145388
- - anything you're uncertain about
145389
-
145390
- Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
145391
-
145392
- Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
145393
-
145394
- Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
145395
-
145396
- Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
145397
- - Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
145398
- - Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
145399
- - Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
145400
- - Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
145401
- - For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
145402
-
145403
- Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is not enough \u2014 a fix that improves correctness while degrading elegance still degrades the codebase. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
145404
-
145405
- 5. **finalize**:
145406
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
145407
- - create a PR via \`${t("create_pull_request")}\`
145408
- - call \`${t("report_progress")}\` with the PR link or the exact error if push/PR failed
145409
-
145410
- ${learningsStep(t, 6)}
145411
-
145412
- ### Notes
145413
-
145414
- For simple, well-defined tasks, skip the plan phase and go straight to build.`
145415
- },
145416
- {
145417
- name: "AddressReviews",
145418
- description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
145419
- prompt: `### Checklist
145420
-
145421
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
145422
-
145423
- 2. Fetch review comments via \`${t("get_review_comments")}\`.
145424
-
145425
- 3. For each comment:
145426
- - understand the feedback
145427
- - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is not enough; improving correctness while degrading elegance still degrades the code.
145428
- - if the request stands, make the code change using your native tools; otherwise reply explaining why
145429
- - record what was done (or why nothing was done)
145430
-
145431
- 4. Quality check:
145432
- - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
145433
- - commit locally via shell (\`git add . && git commit -m "..."\`)
145434
-
145435
- 5. Finalize:
145436
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145437
- - reply to each comment using \`${t("reply_to_review_comment")}\`
145438
- - resolve addressed threads via \`${t("resolve_review_thread")}\`
145439
- - call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)
145440
-
145441
- ${learningsStep(t, 6)}`
145442
- },
145443
- // Review and IncrementalReview use the multi-lens orchestrator pattern
145444
- // (canonical source: .claude/commands/anneal.md). The orchestrator does
145445
- // triage → parallel read-only subagent fan-out → aggregate → draft comments
145446
- // → submit. For someone else's PR, parallel lenses (correctness, security,
145447
- // research-validated claims, user-journey, etc.) provide breadth across
145448
- // angles that a single subagent can't carry coherently. Build mode keeps
145449
- // a single fresh-eyes subagent (different problem shape — orchestrator
145450
- // wrote the code and bias-mitigation comes from delegating to one
145451
- // subagent that doesn't share the implementation context).
145452
- // Deliberate omission vs canonical /anneal: severity categorization in the
145453
- // final message (the review body has its own CAUTION/IMPORTANT framing
145454
- // instead of a severity table).
145455
- {
145456
- name: "Review",
145457
- description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
145458
- prompt: `### Checklist
145459
-
145460
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
145461
-
145462
- 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
145463
-
145464
- if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit \`Reviewed \u2014 no issues found.\` per step 5. there's no value in dispatching even one lens for a typo.
145465
-
145466
- "Genuinely trivial" (skip):
145467
- - single-word doc typo, whitespace/format-only, comment-only across any number of files
145468
- - lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
145469
- - mechanical rename whose only effect is import-path updates
145470
- - low-risk dep patch bump
145471
-
145472
- "Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
145473
- - any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
145474
- - flipping a feature-flag default, default config value, or retry/timeout constant
145475
- - changing a money/tax/currency/fee constant by any amount
145476
- - changing an HTTP method, redirect URL, response code, or status enum
145477
- - tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
145478
- - renaming a public API surface (still trivial in shape, but needs an impact lens)
145479
- - adding a new direct dependency (supply-chain surface)
145480
- - any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
145481
- - mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
145482
-
145483
- When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
145484
-
145485
- otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
145486
-
145487
- - **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
145488
- - **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
145489
- - **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
145490
- - **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
145491
-
145492
- lenses come in two flavors, and you can mix them:
145493
- - **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
145494
- - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
145495
-
145496
- starter menu (combine, omit, or invent your own):
145497
- - **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
145498
- - **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
145499
- - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
145500
- - **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
145501
- - **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
145502
- - **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
145503
- - **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
145504
- - **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
145505
- - **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
145506
- - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
145507
- - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
145508
-
145509
- 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
145510
- - the diff path / target \u2014 reading the diff and the codebase is its job
145511
- - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
145512
- - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
145513
- - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
145514
- - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
145515
- - ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
145516
-
145517
- delegation discipline:
145518
- - do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
145519
- - do NOT summarize the PR for them (biases toward a validation frame)
145520
- - do NOT hand them a curated reading list (let them discover scope)
145521
- - do NOT pre-shape their output with a finding schema
145522
- - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
145523
-
145524
- 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
145525
-
145526
- for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
145527
-
145528
- 5. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
145529
-
145530
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
145531
-
145532
- - **critical issues** (blocks merge \u2014 bugs, security, data loss):
145533
- \`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
145534
- \`> [!CAUTION]\\n> This PR introduces a race condition in ...\`
145535
- Follow with a brief summary if needed. Include all inline comments.
145536
- - **recommended changes** (non-critical):
145537
- \`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
145538
- \`> [!IMPORTANT]\\n> Consider adding input validation for ...\`
145539
- Follow with a brief summary if needed. Include all inline comments.
145540
- - **no actionable issues**:
145541
- \`approved: true\`, body: "Reviewed \u2014 no issues found."`
145542
- },
145543
- // IncrementalReview shares Review's multi-lens orchestrator pattern but
145544
- // scopes the target to the incremental diff and adds prior-review-feedback
145545
- // tracking. The "issues must be NEW since the last Pullfrog review" filter
145546
- // lives at aggregation time (step 5), NOT in the subagent prompt — pushing
145547
- // the filter into subagents matches the canonical anneal anti-pattern of
145548
- // "list known pre-existing failures — don't flag these" and suppresses
145549
- // signal on regressions the new commits amplified. The body-format rules
145550
- // (Reviewed changes / Prior review feedback) are unchanged from the prior
145551
- // version. Same severity-table omission as Review.
145552
- {
145553
- name: "IncrementalReview",
145554
- description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
145555
- prompt: `### Checklist
145556
-
145557
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
145558
-
145559
- 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
145560
-
145561
- 3. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll need this in step 6 to track which prior comments were addressed.
145562
-
145563
- 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
145564
-
145565
- if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
145566
-
145567
- "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
145568
- "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
145569
- When unsure, treat as non-trivial.
145570
-
145571
- otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
145572
-
145573
- dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
145574
- - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
145575
- - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
145576
- - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
145577
- - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
145578
- - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
145579
- - ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
145580
-
145581
- delegation discipline:
145582
- - do NOT lens-review the diff yourself in parallel with the subagents
145583
- - do NOT summarize the changes for them (biases toward validation frame)
145584
- - do NOT hand them a curated reading list (let them discover scope)
145585
- - do NOT pre-shape their output with a finding schema
145586
- - do NOT mention the other lenses (independence is the point)
145587
-
145588
- 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
145589
-
145590
- then check: which prior review comments were addressed by the new commits? track the addressed ones for step 6b.
145591
-
145592
- 6. **build the review body** \u2014 two distinct sections:
145593
- a. **Reviewed changes**: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed.
145594
- b. **Prior review feedback** (only if any were addressed): list only the prior review comments that WERE addressed by the new commits (\`- [x] safeParse instead of parse \u2014 addressed\`). omit unaddressed comments. omit this entire section if nothing was addressed. a change can appear in both sections.
145595
- - no headings, no tables, no prose paragraphs in either section \u2014 just bullets
145596
- - in some cases you may receive a complete diff for the whole pull request instead of an incremental one. when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
145597
-
145598
- 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. the review body always includes the reviewed changes from step 6a. append \`Prior review feedback:\\n\` with the checklist from step 6b only if any prior comments were addressed. Follow these rules:
145599
- - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
145600
- - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
145601
- - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the reviewed changes summary and prior feedback (if any).
145602
- - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the reviewed changes summary and prior feedback (if any).
145603
- - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the reviewed changes summary and prior feedback (if any).`
145604
- },
145605
- {
145606
- name: "Plan",
145607
- description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
145608
- prompt: `### Checklist
145609
-
145610
- 1. Analyze the task and gather context:
145611
- - read AGENTS.md and relevant codebase files
145612
- - understand the architecture and constraints
145613
-
145614
- 2. Produce a structured, actionable plan with clear milestones.
145615
-
145616
- 3. Call \`${t("report_progress")}\` with the plan.
145617
-
145618
- ${learningsStep(t, 4)}`
145619
- },
145620
- {
145621
- name: "Fix",
145622
- description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
145623
- prompt: `### Checklist
145624
-
145625
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
145626
-
145627
- 2. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
145628
-
145629
- 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
145630
-
145631
- 4. Diagnose and fix:
145632
- - read the workflow file, reproduce locally with the EXACT same commands CI runs
145633
- - fix the issue using your native file and shell tools
145634
- - verify the fix by re-running the exact CI command
145635
- - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
145636
- - commit locally via shell (\`git add . && git commit -m "..."\`)
145637
-
145638
- 5. Finalize:
145639
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145640
- - call \`${t("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
145641
-
145642
- ${learningsStep(t, 6)}`
145643
- },
145644
- {
145645
- name: "ResolveConflicts",
145646
- description: "Resolve merge conflicts in a PR branch against the base branch",
145647
- prompt: `### Checklist
145648
-
145649
- 1. **Setup**:
145650
- - Call \`${t("checkout_pr")}\` to get the PR branch.
145651
- - Call \`${t("get_pull_request")}\` to identify the base branch (e.g., 'main').
145652
- - Call \`${t("git_fetch")}\` to fetch the base branch.
145653
-
145654
- 2. **Merge Attempt**:
145655
- - Run \`git merge origin/<base_branch>\` via shell.
145656
- - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
145657
- - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
145658
-
145659
- 3. **Resolve Conflicts**:
145660
- - Run \`git status\` or parse the merge output to find the list of conflicting files.
145661
- - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
145662
- - Verify the file syntax is correct after resolution.
145663
-
145664
- 4. **Finalize**:
145665
- - Run a final verification (build/test) to ensure the resolution works.
145666
- - \`git add . && git commit -m "resolve merge conflicts"\`
145667
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145668
- - Call \`${t("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
145669
- },
145670
- {
145671
- name: "Task",
145672
- description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
145673
- prompt: `### Checklist
145674
-
145675
- 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
145676
-
145677
- 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
145678
- - plan your approach before starting
145679
- - use native file and shell tools for local operations
145680
- - use ${pullfrogMcpName} MCP tools for GitHub/git operations
145681
- - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
145682
-
145683
- 3. Finalize:
145684
- - if code changes were made, push to a pull request (new or existing) using \`${t("push_branch")}\` and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
145685
- - call \`${t("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
145686
- - if the task involved labeling, commenting, or other GitHub operations, perform those directly
145687
-
145688
- ${learningsStep(t, 4)}`
145689
- },
145690
- {
145691
- name: "Summarize",
145692
- description: "Summarize a PR with a structured comment that is updated in place on subsequent pushes",
145693
- prompt: `### Checklist
145694
-
145695
- 1. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
145696
- 2. Read the diff using the TOC to selectively read relevant sections (not the entire file). Produce a structured summary. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
145697
- 3. Call \`${t("create_issue_comment")}\` with \`type: "Summary"\` and the summary body.
145698
- 4. Call \`${t("report_progress")}\` with a brief note (e.g., "Posted PR summary.").
145699
-
145700
- ${PR_SUMMARY_FORMAT}`
145701
- }
145702
- ];
145703
- }
145704
- var modes = computeModes("opencode");
145705
-
145706
145458
  // mcp/selectMode.ts
145707
145459
  var SelectModeParams = type({
145708
145460
  mode: type.string.describe(
145709
- "the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts', 'Summarize')"
145461
+ "the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts')"
145710
145462
  ),
145711
145463
  "issue_number?": type("number").describe(
145712
145464
  "optional issue number; when provided with Plan mode, used to look up an existing plan comment for this issue (edit vs create)"
@@ -145727,18 +145479,7 @@ An existing plan comment was found for this issue. Update that comment with the
145727
145479
  - gather relevant codebase context (file paths, architecture notes from AGENTS.md)
145728
145480
  - produce a structured plan with clear milestones
145729
145481
  3. Call \`${t("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
145730
- 4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t("report_progress")}\` so it is not left as "Leaping...".`,
145731
- SummaryUpdate: `### Checklist (updating existing summary)
145732
-
145733
- An existing summary comment was found for this PR. Update it rather than creating a new one.
145734
-
145735
- 1. Use \`previousSummaryBody\` from this response as the current summary to revise.
145736
- 2. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
145737
- 3. Read the diff using the TOC to selectively read relevant sections. Produce an updated summary reflecting the current state of the PR, using the existing summary (\`previousSummaryBody\`) as a starting point. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
145738
- 4. Call \`${t("edit_issue_comment")}\` with \`commentId: existingSummaryCommentId\` (from this response) and the updated summary body.
145739
- 5. Call \`${t("report_progress")}\` with a brief note (e.g., "Updated PR summary.").
145740
-
145741
- ${PR_SUMMARY_FORMAT}`
145482
+ 4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t("report_progress")}\` so it is not left as "Leaping...".`
145742
145483
  };
145743
145484
  }
145744
145485
  var modeInstructionParent = {
@@ -145771,30 +145512,22 @@ async function fetchExistingPlanComment(ctx, issueNumber) {
145771
145512
  return null;
145772
145513
  }
145773
145514
  }
145774
- async function fetchExistingSummaryComment(ctx, prNumber) {
145775
- if (!ctx.githubInstallationToken) {
145776
- log.warning("fetchExistingSummaryComment: no token, skipping");
145777
- return null;
145778
- }
145779
- const path3 = `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/pr/${prNumber}/summary-comment`;
145780
- try {
145781
- const response = await apiFetch({
145782
- path: path3,
145783
- method: "GET",
145784
- headers: { authorization: `Bearer ${ctx.githubInstallationToken}` },
145785
- signal: AbortSignal.timeout(1e4)
145786
- });
145787
- const data = await response.json();
145788
- if (response.ok && "commentId" in data) {
145789
- return data;
145790
- }
145791
- const errMsg = "error" in data ? data.error : "(no error body)";
145792
- log.warning(`fetchExistingSummaryComment: ${response.status} ${path3} \u2014 ${errMsg}`);
145793
- return null;
145794
- } catch (error49) {
145795
- log.warning("fetchExistingSummaryComment failed:", error49);
145796
- return null;
145797
- }
145515
+ var SUMMARY_MODES = /* @__PURE__ */ new Set(["Review", "IncrementalReview", "Task"]);
145516
+ function buildSummaryAddendum(t, ctx) {
145517
+ const filePath = ctx.toolState.summaryFilePath;
145518
+ if (!filePath) return "";
145519
+ return `### PR summary snapshot \u2014 required step
145520
+
145521
+ A rolling PR summary lives at \`${filePath}\`. It is your durable cross-run agent context \u2014 a functional summary of what this PR does, the subsystems and files it touches, the material behavior of its changes, and any risks or open questions worth carrying forward. It is NOT a chronological log of past review runs; commit-level history can already be reconstructed from \`${t("list_pull_request_reviews")}\`.
145522
+
145523
+ How to use it:
145524
+
145525
+ - read \`${filePath}\` at the START of the run, alongside the diff. it represents what previous agent runs already understood about this PR \u2014 absorb it before picking lenses or crafting subagent dispatch prompts. if it's a fresh seed (file is one or two lines), this is a first review and you'll be filling it in from the diff.
145526
+ - let the snapshot inform triage and dispatch. when it already tracks a risk, your lens prompts to subagents are stronger when they reference that context (e.g. "the JSDoc explicitly scopes to code points \u2014 do not flag grapheme-cluster issues" if the snapshot already documents that contract). when something the snapshot tracks is now resolved by new commits, note that. when new commits introduce something the snapshot doesn't yet describe, that's exactly where your fan-out should focus.
145527
+ - update the file in place to reflect the PR's CURRENT state. revise stale claims, drop resolved risks, add new behavior or risks. accuracy over breadth \u2014 every claim must be grounded in the diff. write for the next agent run, not for a human.
145528
+ - structure however serves THIS PR. there is no required section template. a refactor might organize by renamed export and call-site impact; a feature by capability; a billing change by money path. a compact note of which commit ranges have been reviewed should always be present so future runs scope correctly, but the rest is your call. when the structure works across runs, keep it stable so range-diffs are clean; when the PR's character changes (e.g. scope expands), reshape.
145529
+
145530
+ Do NOT call \`${t("create_issue_comment")}\` for the summary \u2014 the server reads this file at end-of-run and persists it. The file edit is mandatory regardless of whether a review is submitted; the snapshot feeds the next run.`;
145798
145531
  }
145799
145532
  function SelectModeTool(ctx) {
145800
145533
  const t = (name) => formatMcpToolRef(ctx.agentId, name);
@@ -145836,21 +145569,18 @@ function SelectModeTool(ctx) {
145836
145569
  }
145837
145570
  }
145838
145571
  }
145839
- if (selectedMode.name === "Summarize") {
145840
- const prNumber = ctx.payload.event.issue_number;
145841
- if (prNumber !== void 0) {
145842
- const existing = await fetchExistingSummaryComment(ctx, prNumber);
145843
- if (existing !== null) {
145844
- ctx.toolState.existingSummaryCommentId = existing.commentId;
145845
- return {
145846
- ...buildOrchestratorGuidance(ctx, selectedMode, overrides.SummaryUpdate),
145847
- existingSummaryCommentId: existing.commentId,
145848
- previousSummaryBody: existing.body
145849
- };
145850
- }
145851
- }
145572
+ const summaryAddendum = SUMMARY_MODES.has(selectedMode.name) ? buildSummaryAddendum(t, ctx) : "";
145573
+ const base = buildOrchestratorGuidance(ctx, selectedMode);
145574
+ if (summaryAddendum.length > 0) {
145575
+ return {
145576
+ ...base,
145577
+ orchestratorGuidance: `${base.orchestratorGuidance}
145578
+
145579
+ ${summaryAddendum}`,
145580
+ summaryFilePath: ctx.toolState.summaryFilePath
145581
+ };
145852
145582
  }
145853
- return buildOrchestratorGuidance(ctx, selectedMode);
145583
+ return base;
145854
145584
  })
145855
145585
  });
145856
145586
  }
@@ -146181,14 +145911,13 @@ function UploadFileTool(ctx) {
146181
145911
 
146182
145912
  // mcp/server.ts
146183
145913
  function initToolState(params) {
146184
- const parsed2 = params.progressCommentId ? parseInt(params.progressCommentId, 10) : NaN;
146185
- const resolvedId = Number.isNaN(parsed2) || parsed2 <= 0 ? void 0 : parsed2;
146186
- if (resolvedId) {
146187
- log.info(`\xBB using pre-created progress comment: ${resolvedId}`);
145914
+ const resolved = parseProgressComment(params.progressComment);
145915
+ if (resolved) {
145916
+ log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
146188
145917
  }
146189
145918
  return {
146190
- progressCommentId: resolvedId,
146191
- hadProgressComment: !!resolvedId,
145919
+ progressComment: resolved,
145920
+ hadProgressComment: !!resolved,
146192
145921
  backgroundProcesses: /* @__PURE__ */ new Map(),
146193
145922
  usageEntries: []
146194
145923
  };
@@ -146365,6 +146094,405 @@ async function startMcpHttpServer(ctx, options) {
146365
146094
  };
146366
146095
  }
146367
146096
 
146097
+ // agents/reviewer.ts
146098
+ var REVIEWER_AGENT_NAME = "reviewfrog";
146099
+ var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
146100
+
146101
+ HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
146102
+ - Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
146103
+ - Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
146104
+ - Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
146105
+ - Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
146106
+
146107
+ Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
146108
+
146109
+ // modes.ts
146110
+ var PR_SUMMARY_FORMAT = `### Default format
146111
+
146112
+ Follow this structure exactly:
146113
+
146114
+ <b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
146115
+ NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
146116
+
146117
+ ### Key changes
146118
+
146119
+ - **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
146120
+
146121
+ <sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
146122
+ NOTE: the metadata line goes AFTER the bullet list, not before it.
146123
+
146124
+ Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
146125
+
146126
+ <br/>
146127
+
146128
+ ## Example readable section title
146129
+
146130
+ > **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
146131
+ IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
146132
+
146133
+ 1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
146134
+
146135
+ If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
146136
+ > <details><summary>How does X work?</summary>
146137
+ > Extended explanation here.
146138
+ > </details>
146139
+
146140
+ End each section with a file links trail (3-4 key files max):
146141
+ [\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
146142
+
146143
+ Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
146144
+
146145
+ CRITICAL \u2014 GitHub markdown rendering rule:
146146
+ GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
146147
+
146148
+ Rules:
146149
+ - \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
146150
+ - ALL variable names, identifiers, and file names in body text must be in backticks
146151
+ - ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
146152
+ - Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
146153
+ - Do NOT include raw diff stats like '+123 / -45' or line counts
146154
+ - Do NOT include code blocks or repeat diff contents
146155
+ - Do NOT include a changelog section \u2014 the key changes list serves this purpose
146156
+ - Focus on *intent*, not *what* \u2014 the diff already shows what changed
146157
+ - Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
146158
+ function learningsStep(t, n) {
146159
+ return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
146160
+ }
146161
+ function computeModes(agentId) {
146162
+ const t = (toolName) => formatMcpToolRef(agentId, toolName);
146163
+ return [
146164
+ {
146165
+ name: "Build",
146166
+ description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
146167
+ prompt: `### Checklist
146168
+
146169
+ 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
146170
+
146171
+ 2. **setup**: checkout or create the branch:
146172
+ - **PR event, modifying the existing PR**: call \`${t("checkout_pr")}\`
146173
+ - **new branch**: use \`${t("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
146174
+
146175
+ 3. **build**: implement changes using your native file and shell tools:
146176
+ - follow the plan (if you ran a plan phase)
146177
+ - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
146178
+ - run relevant tests/lints before committing
146179
+
146180
+ 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
146181
+
146182
+ Skip self-review (commit directly) when the diff is **genuinely trivial**:
146183
+ - doc typos, comment-only edits, whitespace/format-only, import reordering
146184
+ - lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
146185
+ - low-risk dep patch bump from a trusted source
146186
+
146187
+ Run self-review when the diff has **any behavioral surface, however small**:
146188
+ - 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
146189
+ - any change to money / tax / currency / billing / fee / refund / payout calculations or constants
146190
+ - any change to auth / permissions / roles / sessions / tokens / signature verification
146191
+ - any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
146192
+ - new endpoints, new code paths, new error branches \u2014 even small ones
146193
+ - mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
146194
+ - anything you're uncertain about
146195
+
146196
+ Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
146197
+
146198
+ Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
146199
+
146200
+ Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
146201
+
146202
+ Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
146203
+ - Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
146204
+ - Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
146205
+ - Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
146206
+ - Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
146207
+ - For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
146208
+
146209
+ Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
146210
+
146211
+ 5. **finalize**:
146212
+ - confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
146213
+ - create a PR via \`${t("create_pull_request")}\`
146214
+ - call \`${t("report_progress")}\` with the PR link or the exact error if push/PR failed
146215
+
146216
+ ${learningsStep(t, 6)}
146217
+
146218
+ ### Notes
146219
+
146220
+ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146221
+ },
146222
+ {
146223
+ name: "AddressReviews",
146224
+ description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
146225
+ prompt: `### Checklist
146226
+
146227
+ 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
146228
+
146229
+ 2. Fetch review comments via \`${t("get_review_comments")}\`.
146230
+
146231
+ 3. For each comment:
146232
+ - understand the feedback
146233
+ - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
146234
+ - if the request stands, make the code change using your native tools; otherwise reply explaining why
146235
+ - record what was done (or why nothing was done)
146236
+
146237
+ 4. Quality check:
146238
+ - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
146239
+ - commit locally via shell (\`git add . && git commit -m "..."\`)
146240
+
146241
+ 5. Finalize:
146242
+ - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146243
+ - reply to each comment using \`${t("reply_to_review_comment")}\`
146244
+ - resolve addressed threads via \`${t("resolve_review_thread")}\`
146245
+ - call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)
146246
+
146247
+ ${learningsStep(t, 6)}`
146248
+ },
146249
+ // Review and IncrementalReview use the multi-lens orchestrator pattern
146250
+ // (canonical source: .claude/commands/anneal.md). The orchestrator does
146251
+ // triage → parallel read-only subagent fan-out → aggregate → draft comments
146252
+ // → submit. For someone else's PR, parallel lenses (correctness, security,
146253
+ // research-validated claims, user-journey, etc.) provide breadth across
146254
+ // angles that a single subagent can't carry coherently. Build mode keeps
146255
+ // a single fresh-eyes subagent (different problem shape — orchestrator
146256
+ // wrote the code and bias-mitigation comes from delegating to one
146257
+ // subagent that doesn't share the implementation context).
146258
+ // Deliberate omission vs canonical /anneal: severity categorization in the
146259
+ // final message (the review body has its own CAUTION/IMPORTANT framing
146260
+ // instead of a severity table).
146261
+ {
146262
+ name: "Review",
146263
+ description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
146264
+ prompt: `### Checklist
146265
+
146266
+ 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
146267
+
146268
+ 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
146269
+
146270
+ if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
146271
+
146272
+ "Genuinely trivial" (skip):
146273
+ - single-word doc typo, whitespace/format-only, comment-only across any number of files
146274
+ - lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
146275
+ - mechanical rename whose only effect is import-path updates
146276
+ - low-risk dep patch bump
146277
+
146278
+ "Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
146279
+ - any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
146280
+ - flipping a feature-flag default, default config value, or retry/timeout constant
146281
+ - changing a money/tax/currency/fee constant by any amount
146282
+ - changing an HTTP method, redirect URL, response code, or status enum
146283
+ - tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
146284
+ - renaming a public API surface (still trivial in shape, but needs an impact lens)
146285
+ - adding a new direct dependency (supply-chain surface)
146286
+ - any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
146287
+ - mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
146288
+
146289
+ When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
146290
+
146291
+ otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
146292
+
146293
+ - **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
146294
+ - **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
146295
+ - **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
146296
+ - **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
146297
+
146298
+ lenses come in two flavors, and you can mix them:
146299
+ - **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
146300
+ - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
146301
+
146302
+ starter menu (combine, omit, or invent your own):
146303
+ - **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
146304
+ - **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
146305
+ - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
146306
+ - **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
146307
+ - **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
146308
+ - **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
146309
+ - **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
146310
+ - **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
146311
+ - **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
146312
+ - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
146313
+ - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
146314
+
146315
+ 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
146316
+ - the diff path / target \u2014 reading the diff and the codebase is its job
146317
+ - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146318
+ - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
146319
+ - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
146320
+ - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
146321
+ - ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
146322
+
146323
+ delegation discipline:
146324
+ - do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
146325
+ - do NOT summarize the PR for them (biases toward a validation frame)
146326
+ - do NOT hand them a curated reading list (let them discover scope)
146327
+ - do NOT pre-shape their output with a finding schema
146328
+ - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
146329
+
146330
+ 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
146331
+
146332
+ for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
146333
+
146334
+ 5. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
146335
+
146336
+ note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146337
+
146338
+ The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
146339
+
146340
+ - **critical issues** (blocks merge \u2014 bugs, security, data loss):
146341
+ \`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146342
+ - **recommended changes** (non-critical):
146343
+ \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146344
+ - **no actionable issues**:
146345
+ \`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
146346
+
146347
+ ${PR_SUMMARY_FORMAT}`
146348
+ },
146349
+ // IncrementalReview shares Review's multi-lens orchestrator pattern but
146350
+ // scopes the target to the incremental diff. The "issues must be NEW
146351
+ // since the last Pullfrog review" filter lives at aggregation time
146352
+ // (step 5), NOT in the subagent prompt — pushing the filter into
146353
+ // subagents matches the canonical anneal anti-pattern of "list known
146354
+ // pre-existing failures — don't flag these" and suppresses signal on
146355
+ // regressions the new commits amplified. The review body is just
146356
+ // "Reviewed changes" — a separate "Prior review feedback" checklist
146357
+ // would duplicate the rolling PR summary snapshot's record of what
146358
+ // earlier runs already addressed and add noise to the user-facing
146359
+ // body. Same severity-table omission as Review.
146360
+ {
146361
+ name: "IncrementalReview",
146362
+ description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
146363
+ prompt: `### Checklist
146364
+
146365
+ 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
146366
+
146367
+ 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
146368
+
146369
+ 3. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
146370
+
146371
+ 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
146372
+
146373
+ if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
146374
+
146375
+ "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
146376
+ "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
146377
+ When unsure, treat as non-trivial.
146378
+
146379
+ otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
146380
+
146381
+ dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
146382
+ - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
146383
+ - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146384
+ - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
146385
+ - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
146386
+ - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
146387
+ - ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
146388
+
146389
+ delegation discipline:
146390
+ - do NOT lens-review the diff yourself in parallel with the subagents
146391
+ - do NOT summarize the changes for them (biases toward validation frame)
146392
+ - do NOT hand them a curated reading list (let them discover scope)
146393
+ - do NOT pre-shape their output with a finding schema
146394
+ - do NOT mention the other lenses (independence is the point)
146395
+
146396
+ 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
146397
+
146398
+ 6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
146399
+
146400
+ 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
146401
+ - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146402
+ - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
146403
+ - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
146404
+ - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
146405
+ - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
146406
+ },
146407
+ {
146408
+ name: "Plan",
146409
+ description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
146410
+ prompt: `### Checklist
146411
+
146412
+ 1. Analyze the task and gather context:
146413
+ - read AGENTS.md and relevant codebase files
146414
+ - understand the architecture and constraints
146415
+
146416
+ 2. Produce a structured, actionable plan with clear milestones.
146417
+
146418
+ 3. Call \`${t("report_progress")}\` with the plan.
146419
+
146420
+ ${learningsStep(t, 4)}`
146421
+ },
146422
+ {
146423
+ name: "Fix",
146424
+ description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
146425
+ prompt: `### Checklist
146426
+
146427
+ 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
146428
+
146429
+ 2. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
146430
+
146431
+ 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
146432
+
146433
+ 4. Diagnose and fix:
146434
+ - read the workflow file, reproduce locally with the EXACT same commands CI runs
146435
+ - fix the issue using your native file and shell tools
146436
+ - verify the fix by re-running the exact CI command
146437
+ - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
146438
+ - commit locally via shell (\`git add . && git commit -m "..."\`)
146439
+
146440
+ 5. Finalize:
146441
+ - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146442
+ - call \`${t("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
146443
+
146444
+ ${learningsStep(t, 6)}`
146445
+ },
146446
+ {
146447
+ name: "ResolveConflicts",
146448
+ description: "Resolve merge conflicts in a PR branch against the base branch",
146449
+ prompt: `### Checklist
146450
+
146451
+ 1. **Setup**:
146452
+ - Call \`${t("checkout_pr")}\` to get the PR branch.
146453
+ - Call \`${t("get_pull_request")}\` to identify the base branch (e.g., 'main').
146454
+ - Call \`${t("git_fetch")}\` to fetch the base branch.
146455
+
146456
+ 2. **Merge Attempt**:
146457
+ - Run \`git merge origin/<base_branch>\` via shell.
146458
+ - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
146459
+ - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
146460
+
146461
+ 3. **Resolve Conflicts**:
146462
+ - Run \`git status\` or parse the merge output to find the list of conflicting files.
146463
+ - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
146464
+ - Verify the file syntax is correct after resolution.
146465
+
146466
+ 4. **Finalize**:
146467
+ - Run a final verification (build/test) to ensure the resolution works.
146468
+ - \`git add . && git commit -m "resolve merge conflicts"\`
146469
+ - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146470
+ - Call \`${t("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
146471
+ },
146472
+ {
146473
+ name: "Task",
146474
+ description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
146475
+ prompt: `### Checklist
146476
+
146477
+ 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
146478
+
146479
+ 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
146480
+ - plan your approach before starting
146481
+ - use native file and shell tools for local operations
146482
+ - use ${pullfrogMcpName} MCP tools for GitHub/git operations
146483
+ - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
146484
+
146485
+ 3. Finalize:
146486
+ - if code changes were made, push to a pull request (new or existing) using \`${t("push_branch")}\` and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
146487
+ - call \`${t("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
146488
+ - if the task involved labeling, commenting, or other GitHub operations, perform those directly
146489
+
146490
+ ${learningsStep(t, 4)}`
146491
+ }
146492
+ ];
146493
+ }
146494
+ var modes = computeModes("opencode");
146495
+
146368
146496
  // agents/claude.ts
146369
146497
  import { execFileSync as execFileSync3 } from "node:child_process";
146370
146498
  import { mkdirSync as mkdirSync4, writeFileSync as writeFileSync7 } from "node:fs";
@@ -146457,20 +146585,34 @@ async function installFromNpmTarball(params) {
146457
146585
  }
146458
146586
 
146459
146587
  // utils/providerErrors.ts
146588
+ var statusKey = `\\b(?:status[_ ]?code|http[_ ]?status|status)["']?\\s*[:=]\\s*["']?`;
146460
146589
  var PROVIDER_ERROR_PATTERNS = [
146461
- { pattern: "429", label: "rate limited (429)" },
146462
- { pattern: "RESOURCE_EXHAUSTED", label: "quota exhausted" },
146463
- { pattern: "quota", label: "quota error" },
146464
- { pattern: "status: 500", label: "provider 500 error" },
146465
- { pattern: "INTERNAL", label: "provider internal error" },
146466
- { pattern: "status: 503", label: "provider unavailable (503)" },
146467
- { pattern: "UNAVAILABLE", label: "provider unavailable" },
146468
- { pattern: "rate limit", label: "rate limited" },
146469
- { pattern: "limit: 0", label: "zero quota" }
146590
+ { regex: new RegExp(`${statusKey}429\\b`, "i"), label: "rate limited (429)" },
146591
+ { regex: new RegExp(`${statusKey}500\\b`, "i"), label: "provider 500 error" },
146592
+ { regex: new RegExp(`${statusKey}503\\b`, "i"), label: "provider unavailable (503)" },
146593
+ // matches `rate limit`, `rate limited`, `rate limits exceeded`,
146594
+ // `rate_limit_error`, `rate_limit_exceeded`. the leading `\b` + `[_ ]`
146595
+ // separator rejects `x-ratelimit-*` / `anthropic-ratelimit-*` response
146596
+ // headers (no separator between "rate" and "limit") which routinely
146597
+ // appear in dumped 401 / 4xx error JSON.
146598
+ { regex: /\brate[_ ]limit/i, label: "rate limited" },
146599
+ { regex: /\bRESOURCE_EXHAUSTED\b/, label: "quota exhausted" },
146600
+ // Google gRPC `INTERNAL` status. word-boundary anchors reject
146601
+ // `INTERNAL_SERVER_ERROR` (HTTP 500 message that may appear in unrelated
146602
+ // log lines) and identifiers like `INTERNALS`.
146603
+ { regex: /\bINTERNAL\b/, label: "provider internal error" },
146604
+ { regex: /\bUNAVAILABLE\b/, label: "provider unavailable" },
146605
+ // matches `quota`, `insufficient_quota`, `quota_exceeded`, `quotaExceeded`.
146606
+ // word-character lookarounds would reject `_quota` / `quotaX`; `quota` is
146607
+ // specific enough that a plain substring match is safe.
146608
+ { regex: /quota/i, label: "quota error" },
146609
+ // explicit zero-quota response, e.g. `{"limit": 0}`. the `\b` anchor
146610
+ // around `limit` rejects keys like `time_limit` or `field_limit`.
146611
+ { regex: /["']?\blimit\b["']?\s*:\s*0\b/, label: "zero quota" }
146470
146612
  ];
146471
146613
  function detectProviderError(text) {
146472
146614
  for (const entry of PROVIDER_ERROR_PATTERNS) {
146473
- if (text.includes(entry.pattern)) return entry.label;
146615
+ if (entry.regex.test(text)) return entry.label;
146474
146616
  }
146475
146617
  return null;
146476
146618
  }
@@ -146580,6 +146722,7 @@ var ThinkingTimer = class {
146580
146722
  };
146581
146723
 
146582
146724
  // agents/postRun.ts
146725
+ import { readFile } from "node:fs/promises";
146583
146726
  var MAX_HOOK_OUTPUT_CHARS = 4096;
146584
146727
  function truncateHookOutput(raw2) {
146585
146728
  if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
@@ -146624,6 +146767,23 @@ function buildStopHookPrompt(failure) {
146624
146767
  "```"
146625
146768
  ].join("\n");
146626
146769
  }
146770
+ async function isSummaryUnchanged(filePath, seed) {
146771
+ try {
146772
+ const current = await readFile(filePath, "utf8");
146773
+ return current === seed;
146774
+ } catch {
146775
+ return false;
146776
+ }
146777
+ }
146778
+ function buildSummaryStalePrompt(filePath) {
146779
+ return [
146780
+ `PR SUMMARY UNTOUCHED \u2014 the rolling PR summary file at \`${filePath}\` is byte-identical to its seed; this run did not edit it.`,
146781
+ "",
146782
+ "review the diff and update the file in place to reflect what changed in the PR. update intent, key changes, and any risks worth flagging \u2014 keep the existing section headings stable so incremental runs produce clean diffs.",
146783
+ "",
146784
+ "if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is \u2014 but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context."
146785
+ ].join("\n");
146786
+ }
146627
146787
  async function collectPostRunIssues(params) {
146628
146788
  const issues = {};
146629
146789
  if (params.stopScript) {
@@ -146632,12 +146792,17 @@ async function collectPostRunIssues(params) {
146632
146792
  }
146633
146793
  const status = getGitStatus();
146634
146794
  if (status) issues.dirtyTree = status;
146795
+ if (params.summaryFilePath && params.summarySeed !== void 0) {
146796
+ const stale = await isSummaryUnchanged(params.summaryFilePath, params.summarySeed);
146797
+ if (stale) issues.summaryStale = { filePath: params.summaryFilePath };
146798
+ }
146635
146799
  return issues;
146636
146800
  }
146637
146801
  function buildPostRunPrompt(issues) {
146638
146802
  const parts = [];
146639
146803
  if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
146640
146804
  if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
146805
+ if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
146641
146806
  return parts.join("\n\n---\n\n");
146642
146807
  }
146643
146808
  function buildLearningsReflectionPrompt(agentId) {
@@ -146660,9 +146825,15 @@ async function runPostRunRetryLoop(params) {
146660
146825
  let finalIssues = {};
146661
146826
  let gateResumeCount = 0;
146662
146827
  let pendingReflection = params.reflectionPrompt;
146828
+ let summaryStaleNudged = false;
146663
146829
  while (gateResumeCount < MAX_POST_RUN_RETRIES) {
146664
146830
  if (!result.success) break;
146665
- const issues = await collectPostRunIssues({ stopScript: params.stopScript });
146831
+ const issues = await collectPostRunIssues({
146832
+ stopScript: params.stopScript,
146833
+ summaryFilePath: summaryStaleNudged ? void 0 : params.summaryFilePath,
146834
+ summarySeed: summaryStaleNudged ? void 0 : params.summarySeed
146835
+ });
146836
+ if (issues.summaryStale) summaryStaleNudged = true;
146666
146837
  finalIssues = issues;
146667
146838
  if (!hasPostRunIssues(issues)) {
146668
146839
  if (!pendingReflection) break;
@@ -146694,8 +146865,17 @@ async function runPostRunRetryLoop(params) {
146694
146865
  }
146695
146866
  log.info(`\xBB post-run retry (attempt ${gateResumeCount + 1}/${MAX_POST_RUN_RETRIES})`);
146696
146867
  const prompt = buildPostRunPrompt(issues);
146868
+ const onlySummaryStale = issues.summaryStale !== void 0 && issues.stopHook === void 0 && issues.dirtyTree === void 0;
146869
+ const preResume = result;
146697
146870
  result = await params.resume({ prompt, previousResult: result });
146698
146871
  aggregatedUsage = mergeAgentUsage(aggregatedUsage, result.usage);
146872
+ if (!result.success && onlySummaryStale) {
146873
+ log.warning(
146874
+ `\xBB summary-stale resume turn failed (${result.error ?? "unknown error"}), preserving prior successful result`
146875
+ );
146876
+ result = preResume;
146877
+ break;
146878
+ }
146699
146879
  gateResumeCount++;
146700
146880
  }
146701
146881
  if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
@@ -146832,6 +147012,7 @@ async function runClaude(params) {
146832
147012
  const thinkingTimer = new ThinkingTimer();
146833
147013
  let finalOutput = "";
146834
147014
  let sessionId;
147015
+ let resultErrorSubtype = null;
146835
147016
  let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
146836
147017
  let accumulatedCostUsd = 0;
146837
147018
  let tokensLogged = false;
@@ -146935,9 +147116,14 @@ async function runClaude(params) {
146935
147116
  tokensLogged = true;
146936
147117
  }
146937
147118
  } else if (subtype === "error_max_turns") {
147119
+ resultErrorSubtype = subtype;
146938
147120
  log.info(`\xBB ${params.label} max turns reached: ${JSON.stringify(event)}`);
146939
147121
  } else if (subtype === "error_during_execution") {
147122
+ resultErrorSubtype = subtype;
146940
147123
  log.info(`\xBB ${params.label} execution error: ${JSON.stringify(event)}`);
147124
+ } else if (subtype.startsWith("error")) {
147125
+ resultErrorSubtype = subtype;
147126
+ log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
146941
147127
  } else {
146942
147128
  log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
146943
147129
  }
@@ -147068,6 +147254,15 @@ ${stderrContext}`);
147068
147254
  sessionId
147069
147255
  };
147070
147256
  }
147257
+ if (resultErrorSubtype) {
147258
+ return {
147259
+ success: false,
147260
+ output: finalOutput || output,
147261
+ error: `result subtype: ${resultErrorSubtype}`,
147262
+ usage,
147263
+ sessionId
147264
+ };
147265
+ }
147071
147266
  return { success: true, output: finalOutput || output, usage, sessionId };
147072
147267
  } catch (error49) {
147073
147268
  params.todoTracker?.cancel();
@@ -147196,6 +147391,8 @@ var claude = agent({
147196
147391
  initialResult: result,
147197
147392
  initialUsage: result.usage,
147198
147393
  stopScript: ctx.stopScript,
147394
+ summaryFilePath: ctx.summaryFilePath,
147395
+ summarySeed: ctx.summarySeed,
147199
147396
  reflectionPrompt: buildLearningsReflectionPrompt("claude"),
147200
147397
  canResume: (r) => Boolean(r.sessionId),
147201
147398
  resume: async (c) => {
@@ -147529,6 +147726,12 @@ async function runOpenCode(params) {
147529
147726
  log.debug(withLabel(label, `tool output: ${outputStr}`));
147530
147727
  }
147531
147728
  },
147729
+ error: (event) => {
147730
+ agentErrorEvent = event;
147731
+ const errorName = event.error?.name || "unknown";
147732
+ const errorMessage = event.error?.data?.message || event.error?.name || JSON.stringify(event);
147733
+ log.info(`\xBB ${params.label} error event: ${errorName}: ${errorMessage}`);
147734
+ },
147532
147735
  result: async (event) => {
147533
147736
  const status = event.status || "unknown";
147534
147737
  const duration4 = event.stats?.duration_ms || 0;
@@ -147549,6 +147752,7 @@ async function runOpenCode(params) {
147549
147752
  };
147550
147753
  const recentStderr = [];
147551
147754
  let lastProviderError = null;
147755
+ let agentErrorEvent = null;
147552
147756
  let output = "";
147553
147757
  let stdoutBuffer = "";
147554
147758
  try {
@@ -147667,6 +147871,17 @@ ${stderrContext}`);
147667
147871
  usage
147668
147872
  };
147669
147873
  }
147874
+ if (agentErrorEvent) {
147875
+ const errorEvent = agentErrorEvent;
147876
+ const errorName = errorEvent.error?.name || "agent error";
147877
+ const errorMessage = errorEvent.error?.data?.message || errorEvent.error?.name || JSON.stringify(errorEvent);
147878
+ return {
147879
+ success: false,
147880
+ output: finalOutput || output,
147881
+ error: `${errorName}: ${errorMessage}`,
147882
+ usage
147883
+ };
147884
+ }
147670
147885
  return { success: true, output: finalOutput || output, usage };
147671
147886
  } catch (error49) {
147672
147887
  params.todoTracker?.cancel();
@@ -147742,6 +147957,8 @@ var opencode = agent({
147742
147957
  initialResult: result,
147743
147958
  initialUsage: result.usage,
147744
147959
  stopScript: ctx.stopScript,
147960
+ summaryFilePath: ctx.summaryFilePath,
147961
+ summarySeed: ctx.summarySeed,
147745
147962
  reflectionPrompt: buildLearningsReflectionPrompt("opencode"),
147746
147963
  resume: async (c) => runOpenCode({
147747
147964
  ...runParams,
@@ -151925,8 +152142,8 @@ async function reportErrorToComment(ctx) {
151925
152142
  const formattedError = ctx.title ? `${ctx.title}
151926
152143
 
151927
152144
  ${ctx.error}` : ctx.error;
151928
- const commentId = ctx.toolState.progressCommentId;
151929
- if (!commentId) {
152145
+ const comment = ctx.toolState.progressComment;
152146
+ if (!comment) {
151930
152147
  return;
151931
152148
  }
151932
152149
  const repoContext = parseRepoContext();
@@ -151945,12 +152162,11 @@ ${ctx.error}` : ctx.error;
151945
152162
  customParts,
151946
152163
  model: ctx.toolState.model
151947
152164
  });
151948
- await octokit.rest.issues.updateComment({
151949
- owner: repoContext.owner,
151950
- repo: repoContext.name,
151951
- comment_id: commentId,
151952
- body: `${formattedError}${footer}`
151953
- });
152165
+ await updateProgressComment(
152166
+ { octokit, owner: repoContext.owner, repo: repoContext.name },
152167
+ comment,
152168
+ `${formattedError}${footer}`
152169
+ );
151954
152170
  ctx.toolState.wasUpdated = true;
151955
152171
  }
151956
152172
 
@@ -152289,7 +152505,7 @@ When embedding images (e.g. uploaded screenshots) in comments or PR bodies, alwa
152289
152505
 
152290
152506
  **\`report_progress\`**: call this exactly once at the end of every run with a brief final summary (1-3 sentences) unless the mode guidance instructs otherwise. Never call it for intermediate status updates (e.g., "Checking for changes...", "Starting review...") \u2014 the task list handles live progress automatically. Calling \`report_progress\` replaces the task list with your summary and preserves the current task list in a collapsible section. Keep the summary concise \u2014 do not repeat what the task list already shows. Focus on the outcome (what was accomplished, links to artifacts) rather than listing individual steps. If something failed, include the tool's error text even when that makes the summary longer.
152291
152507
 
152292
- Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments, PR Summary comments).
152508
+ Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments).
152293
152509
 
152294
152510
  ### If you get stuck
152295
152511
 
@@ -152450,7 +152666,11 @@ var JsonPayload = type({
152450
152666
  "eventInstructions?": "string",
152451
152667
  "event?": "object",
152452
152668
  "timeout?": "string | undefined",
152453
- "progressCommentId?": "string | undefined"
152669
+ "progressComment?": type({
152670
+ id: "string",
152671
+ type: "'issue' | 'review'"
152672
+ }).or("undefined"),
152673
+ "generateSummary?": "boolean | undefined"
152454
152674
  });
152455
152675
  var COLLABORATOR_PERMISSIONS = ["admin", "maintain", "write"];
152456
152676
  function isCollaborator(event) {
@@ -152532,7 +152752,8 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
152532
152752
  event,
152533
152753
  timeout: inputs.timeout ?? jsonPayload?.timeout,
152534
152754
  cwd: resolveCwd(inputs.cwd),
152535
- progressCommentId: jsonPayload?.progressCommentId,
152755
+ progressComment: jsonPayload?.progressComment,
152756
+ generateSummary: jsonPayload?.generateSummary,
152536
152757
  // permissions: inputs > repoSettings > fallbacks
152537
152758
  push: inputs.push ?? repoSettings.push ?? "restricted",
152538
152759
  shell: resolvedShell,
@@ -152541,6 +152762,40 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
152541
152762
  };
152542
152763
  }
152543
152764
 
152765
+ // utils/prSummary.ts
152766
+ import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
152767
+ import { dirname as dirname4, join as join14 } from "node:path";
152768
+ var SUMMARY_FILE_NAME = "pullfrog-summary.md";
152769
+ var SUMMARY_SCAFFOLD = `# PR summary
152770
+
152771
+ <!-- durable cross-run context. edit in place; the next agent run reads this
152772
+ before reviewing new commits. structure however serves the PR best. -->
152773
+ `;
152774
+ var MIN_SNAPSHOT_LENGTH = 60;
152775
+ var MAX_SNAPSHOT_LENGTH = 32768;
152776
+ function summaryFilePath(tmpdir3) {
152777
+ return join14(tmpdir3, SUMMARY_FILE_NAME);
152778
+ }
152779
+ async function seedSummaryFile(params) {
152780
+ const path3 = summaryFilePath(params.tmpdir);
152781
+ await mkdir(dirname4(path3), { recursive: true });
152782
+ const seed = params.previousSnapshot && params.previousSnapshot.trim().length >= MIN_SNAPSHOT_LENGTH ? params.previousSnapshot : SUMMARY_SCAFFOLD;
152783
+ await writeFile2(path3, seed, "utf8");
152784
+ return path3;
152785
+ }
152786
+ async function readSummaryFile(path3) {
152787
+ let raw2;
152788
+ try {
152789
+ raw2 = await readFile2(path3, "utf8");
152790
+ } catch {
152791
+ return null;
152792
+ }
152793
+ const trimmed = raw2.trim();
152794
+ if (trimmed.length < MIN_SNAPSHOT_LENGTH) return null;
152795
+ if (trimmed.length > MAX_SNAPSHOT_LENGTH) return trimmed.slice(0, MAX_SNAPSHOT_LENGTH);
152796
+ return trimmed;
152797
+ }
152798
+
152544
152799
  // utils/reviewCleanup.ts
152545
152800
  var RE_REVIEW_PREAMBLE = "Incrementally re-review the new commits on this pull request. Use the IncrementalReview mode.";
152546
152801
  async function postReviewCleanup(ctx) {
@@ -152600,11 +152855,16 @@ async function dispatchFollowUpReReview(ctx, reviewedSha) {
152600
152855
  await ctx.octokit.rest.actions.createWorkflowDispatch({
152601
152856
  owner: ctx.repo.owner,
152602
152857
  repo: ctx.repo.name,
152603
- workflow_id: "pullfrog.yml",
152858
+ workflow_id: getCurrentWorkflowFilename(),
152604
152859
  ref: pr.data.base.repo.default_branch,
152605
152860
  inputs: { prompt: JSON.stringify(payload) }
152606
152861
  });
152607
152862
  }
152863
+ function getCurrentWorkflowFilename() {
152864
+ const ref = process.env.GITHUB_WORKFLOW_REF ?? "";
152865
+ const match3 = ref.match(/\/([^/]+)@/);
152866
+ return match3?.[1] ?? "pullfrog.yml";
152867
+ }
152608
152868
 
152609
152869
  // utils/run.ts
152610
152870
  async function handleAgentResult(ctx) {
@@ -152640,10 +152900,10 @@ async function handleAgentResult(ctx) {
152640
152900
  };
152641
152901
  }
152642
152902
 
152643
- // utils/runContextData.ts
152644
- var core5 = __toESM(require_core(), 1);
152645
-
152646
152903
  // utils/runContext.ts
152904
+ function isInfraCovered(params) {
152905
+ return params.isOss || params.plan === "payg";
152906
+ }
152647
152907
  var defaultSettings = {
152648
152908
  model: null,
152649
152909
  modes: [],
@@ -152661,7 +152921,8 @@ var defaultSettings = {
152661
152921
  var defaultRunContext = {
152662
152922
  settings: defaultSettings,
152663
152923
  apiToken: "",
152664
- oss: false
152924
+ oss: false,
152925
+ plan: "none"
152665
152926
  };
152666
152927
  async function fetchRunContext(params) {
152667
152928
  const timeoutMs = 3e4;
@@ -152700,6 +152961,7 @@ async function fetchRunContext(params) {
152700
152961
  },
152701
152962
  apiToken: data.apiToken,
152702
152963
  oss: data.oss ?? false,
152964
+ plan: data.plan ?? "none",
152703
152965
  proxyModel: data.proxyModel,
152704
152966
  dbSecrets: data.dbSecrets
152705
152967
  };
@@ -152710,6 +152972,7 @@ async function fetchRunContext(params) {
152710
152972
  }
152711
152973
 
152712
152974
  // utils/runContextData.ts
152975
+ var core5 = __toESM(require_core(), 1);
152713
152976
  async function resolveRunContextData(params) {
152714
152977
  log.info(`\xBB running Pullfrog v${package_default.version}...`);
152715
152978
  const repoContext = parseRepoContext();
@@ -152731,6 +152994,7 @@ async function resolveRunContextData(params) {
152731
152994
  repoSettings: runContext.settings,
152732
152995
  apiToken: runContext.apiToken,
152733
152996
  oss: runContext.oss,
152997
+ plan: runContext.plan,
152734
152998
  proxyModel: runContext.proxyModel,
152735
152999
  dbSecrets: runContext.dbSecrets
152736
153000
  };
@@ -152740,9 +153004,9 @@ async function resolveRunContextData(params) {
152740
153004
  import { execFileSync as execFileSync5, execSync as execSync3 } from "node:child_process";
152741
153005
  import { mkdtempSync } from "node:fs";
152742
153006
  import { tmpdir as tmpdir2 } from "node:os";
152743
- import { join as join14 } from "node:path";
153007
+ import { join as join15 } from "node:path";
152744
153008
  function createTempDirectory() {
152745
- const sharedTempDir = mkdtempSync(join14(tmpdir2(), "pullfrog-"));
153009
+ const sharedTempDir = mkdtempSync(join15(tmpdir2(), "pullfrog-"));
152746
153010
  process.env.PULLFROG_TEMP_DIR = sharedTempDir;
152747
153011
  log.info(`\xBB created temp dir at ${sharedTempDir}`);
152748
153012
  return sharedTempDir;
@@ -153057,6 +153321,73 @@ function resolveAgentForLog(ctx) {
153057
153321
  }
153058
153322
  return ctx.agentName;
153059
153323
  }
153324
+ var BillingError = class extends Error {
153325
+ code;
153326
+ declineCode;
153327
+ needsReauthentication;
153328
+ constructor(message, opts = {}) {
153329
+ super(message);
153330
+ this.name = "BillingError";
153331
+ this.code = opts.code ?? null;
153332
+ this.declineCode = opts.declineCode ?? null;
153333
+ this.needsReauthentication = opts.needsReauthentication ?? false;
153334
+ }
153335
+ };
153336
+ var TransientError = class extends Error {
153337
+ constructor(message) {
153338
+ super(message);
153339
+ this.name = "TransientError";
153340
+ }
153341
+ };
153342
+ function billingConsoleUrl(owner, anchor) {
153343
+ return `https://pullfrog.com/console/${encodeURIComponent(owner)}#${anchor}`;
153344
+ }
153345
+ function formatBillingErrorSummary(error49, owner) {
153346
+ if (error49.code === "router_requires_card") {
153347
+ return [
153348
+ "**Add a card to start using Pullfrog Router.**",
153349
+ "",
153350
+ "Router proxies OpenRouter at raw cost \u2014 no platform markup, and your first $20 of usage is on us.",
153351
+ "",
153352
+ `[Add a card \u2192](${billingConsoleUrl(owner, "model-access")})`
153353
+ ].join("\n");
153354
+ }
153355
+ if (error49.needsReauthentication) {
153356
+ const code = error49.declineCode ?? "authentication_required";
153357
+ return [
153358
+ `**Your card issuer requires 3D Secure on every charge** (\`${code}\`).`,
153359
+ "",
153360
+ "Pullfrog can't complete a 3DS challenge from inside a workflow. Top up your Router balance once in Stripe Checkout \u2014 subsequent runs draw from the prepaid balance without re-triggering 3DS.",
153361
+ "",
153362
+ `[Top up balance \u2192](${billingConsoleUrl(owner, "billing")})`
153363
+ ].join("\n");
153364
+ }
153365
+ if (error49.declineCode) {
153366
+ return [
153367
+ `**Your card was declined** (\`${error49.declineCode}\`).`,
153368
+ "",
153369
+ "Update your payment method and Pullfrog will retry on the next run.",
153370
+ "",
153371
+ `[Update payment method \u2192](${billingConsoleUrl(owner, "billing")})`
153372
+ ].join("\n");
153373
+ }
153374
+ return [
153375
+ "**Your Pullfrog balance is empty.**",
153376
+ "",
153377
+ "Top up your balance or enable auto-reload to keep runs flowing.",
153378
+ "",
153379
+ `[Manage billing \u2192](${billingConsoleUrl(owner, "billing")})`
153380
+ ].join("\n");
153381
+ }
153382
+ function formatTransientErrorSummary(error49, owner) {
153383
+ return [
153384
+ "**Pullfrog billing is temporarily unavailable.**",
153385
+ "",
153386
+ error49.message,
153387
+ "",
153388
+ `Usually transient \u2014 the next dispatch should succeed. If it persists, check [status.pullfrog.com](https://status.pullfrog.com) or [your console](${billingConsoleUrl(owner, "billing")}).`
153389
+ ].join("\n");
153390
+ }
153060
153391
  async function mintProxyKey(ctx) {
153061
153392
  try {
153062
153393
  process.env.ACTIONS_ID_TOKEN_REQUEST_URL = ctx.oidcCredentials.requestUrl;
@@ -153069,6 +153400,20 @@ async function mintProxyKey(ctx) {
153069
153400
  method: "POST",
153070
153401
  headers: { Authorization: `Bearer ${oidcToken}` }
153071
153402
  });
153403
+ if (response.status === 402) {
153404
+ const body = await response.json().catch(() => null);
153405
+ throw new BillingError(body?.error ?? "insufficient balance", {
153406
+ code: body?.code ?? null,
153407
+ declineCode: body?.declineCode ?? null,
153408
+ needsReauthentication: body?.needsReauthentication ?? false
153409
+ });
153410
+ }
153411
+ if (response.status === 503) {
153412
+ const body = await response.json().catch(() => null);
153413
+ throw new TransientError(
153414
+ body?.error ?? "billing service temporarily unavailable \u2014 retry shortly"
153415
+ );
153416
+ }
153072
153417
  if (!response.ok) {
153073
153418
  log.warning(`proxy key mint failed (${response.status})`);
153074
153419
  return null;
@@ -153076,6 +153421,8 @@ async function mintProxyKey(ctx) {
153076
153421
  const data = await response.json();
153077
153422
  return data.key;
153078
153423
  } catch (error49) {
153424
+ if (error49 instanceof BillingError) throw error49;
153425
+ if (error49 instanceof TransientError) throw error49;
153079
153426
  log.warning(`proxy key mint error: ${error49 instanceof Error ? error49.message : String(error49)}`);
153080
153427
  return null;
153081
153428
  } finally {
@@ -153085,19 +153432,56 @@ async function mintProxyKey(ctx) {
153085
153432
  }
153086
153433
  async function resolveProxyModel(ctx) {
153087
153434
  if (process.env.PULLFROG_MODEL?.trim()) return;
153088
- if (ctx.oss && ctx.proxyModel) {
153089
- if (!ctx.oidcCredentials) {
153090
- log.warning("\xBB oss repo but no OIDC credentials available \u2014 skipping proxy");
153091
- return;
153092
- }
153093
- const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials });
153094
- if (!key) return;
153095
- process.env.OPENROUTER_API_KEY = key;
153096
- core6.setSecret(key);
153097
- ctx.payload.proxyModel = ctx.proxyModel;
153098
- log.info(`\xBB proxy: oss \u2192 ${ctx.proxyModel}`);
153435
+ const needsProxy = isInfraCovered({ isOss: ctx.oss, plan: ctx.plan }) && ctx.proxyModel;
153436
+ if (!needsProxy) return;
153437
+ if (!ctx.oidcCredentials) {
153438
+ log.warning("\xBB proxy requested but no OIDC credentials available \u2014 skipping");
153099
153439
  return;
153100
153440
  }
153441
+ const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials });
153442
+ if (!key) return;
153443
+ process.env.OPENROUTER_API_KEY = key;
153444
+ core6.setSecret(key);
153445
+ ctx.payload.proxyModel = ctx.proxyModel;
153446
+ const label = ctx.oss ? "oss" : "router";
153447
+ log.info(`\xBB proxy: ${label} \u2192 ${ctx.proxyModel}`);
153448
+ }
153449
+ async function fetchPreviousSnapshot(ctx, prNumber) {
153450
+ if (!ctx.githubInstallationToken) return null;
153451
+ try {
153452
+ const response = await apiFetch({
153453
+ path: `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/pr/${prNumber}/summary-comment`,
153454
+ method: "GET",
153455
+ headers: { authorization: `Bearer ${ctx.githubInstallationToken}` },
153456
+ signal: AbortSignal.timeout(1e4)
153457
+ });
153458
+ if (!response.ok) return null;
153459
+ const data = await response.json();
153460
+ return typeof data.snapshot === "string" && data.snapshot.length > 0 ? data.snapshot : null;
153461
+ } catch {
153462
+ return null;
153463
+ }
153464
+ }
153465
+ async function persistSummary(ctx) {
153466
+ const filePath = ctx.toolState.summaryFilePath;
153467
+ if (!filePath) return;
153468
+ if (ctx.toolState.summaryPersistAttempted) return;
153469
+ ctx.toolState.summaryPersistAttempted = true;
153470
+ const snapshot2 = await readSummaryFile(filePath);
153471
+ if (!snapshot2) {
153472
+ log.debug(`pr summary tmpfile missing or invalid at ${filePath} \u2014 skipping persist`);
153473
+ return;
153474
+ }
153475
+ const seed = ctx.toolState.summarySeed?.trim();
153476
+ if (seed !== void 0 && snapshot2 === seed) {
153477
+ log.warning(
153478
+ "\xBB pr summary tmpfile unchanged from seed \u2014 skipping persist (agent did not edit it)"
153479
+ );
153480
+ return;
153481
+ }
153482
+ await patchWorkflowRunFields(ctx, { summarySnapshot: snapshot2 }).catch((err) => {
153483
+ log.debug(`pr summary persist failed: ${err instanceof Error ? err.message : String(err)}`);
153484
+ });
153101
153485
  }
153102
153486
  async function writeJobSummary(toolState) {
153103
153487
  const usageSummary = formatUsageSummary(toolState.usageEntries);
@@ -153119,7 +153503,7 @@ async function main() {
153119
153503
  let safetyNetTimer;
153120
153504
  const resolvedPromptInput = resolvePromptInput();
153121
153505
  const toolState = initToolState({
153122
- progressCommentId: typeof resolvedPromptInput !== "string" ? resolvedPromptInput.progressCommentId : void 0
153506
+ progressComment: typeof resolvedPromptInput !== "string" ? resolvedPromptInput.progressComment : void 0
153123
153507
  });
153124
153508
  resolveGit();
153125
153509
  const jobToken = getJobToken();
@@ -153153,12 +153537,33 @@ async function main() {
153153
153537
  delete process.env.ACTIONS_ID_TOKEN_REQUEST_URL;
153154
153538
  delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
153155
153539
  }
153156
- await resolveProxyModel({
153157
- payload,
153158
- oss: runContext.oss,
153159
- proxyModel: runContext.proxyModel,
153160
- oidcCredentials
153161
- });
153540
+ try {
153541
+ await resolveProxyModel({
153542
+ payload,
153543
+ oss: runContext.oss,
153544
+ plan: runContext.plan,
153545
+ proxyModel: runContext.proxyModel,
153546
+ oidcCredentials
153547
+ });
153548
+ } catch (error49) {
153549
+ if (error49 instanceof BillingError) {
153550
+ const summary2 = formatBillingErrorSummary(error49, runContext.repo.owner);
153551
+ await writeSummary(summary2).catch(() => {
153552
+ });
153553
+ await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
153554
+ });
153555
+ throw error49;
153556
+ }
153557
+ if (error49 instanceof TransientError) {
153558
+ const summary2 = formatTransientErrorSummary(error49, runContext.repo.owner);
153559
+ await writeSummary(summary2).catch(() => {
153560
+ });
153561
+ await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
153562
+ });
153563
+ throw error49;
153564
+ }
153565
+ throw error49;
153566
+ }
153162
153567
  const octokit = createOctokit(tokenRef.mcpToken);
153163
153568
  const runInfo = await resolveRun({ octokit });
153164
153569
  let toolContext;
@@ -153232,12 +153637,28 @@ async function main() {
153232
153637
  jobId: runInfo.jobId,
153233
153638
  mcpServerUrl: "",
153234
153639
  tmpdir: tmpdir3,
153640
+ oss: runContext.oss,
153641
+ plan: runContext.plan,
153235
153642
  resolvedModel
153236
153643
  };
153237
153644
  const mcpHttpServer = __using(_stack, await startMcpHttpServer(toolContext, { outputSchema }), true);
153238
153645
  toolContext.mcpServerUrl = mcpHttpServer.url;
153239
153646
  log.info(`\xBB MCP server started at ${mcpHttpServer.url}`);
153240
153647
  timer.checkpoint("mcpServer");
153648
+ if (payload.generateSummary && payload.event.is_pr && payload.event.issue_number) {
153649
+ const previousSnapshot = await fetchPreviousSnapshot(toolContext, payload.event.issue_number);
153650
+ const filePath = await seedSummaryFile({ tmpdir: tmpdir3, previousSnapshot });
153651
+ toolState.summaryFilePath = filePath;
153652
+ try {
153653
+ toolState.summarySeed = await readFile3(filePath, "utf8");
153654
+ } catch {
153655
+ }
153656
+ log.info(
153657
+ `\xBB summary snapshot seeded at ${filePath} (previous=${previousSnapshot ? "yes" : "no"})`
153658
+ );
153659
+ const ctxForExit = toolContext;
153660
+ onExitSignal(() => persistSummary(ctxForExit));
153661
+ }
153241
153662
  startInstallation(toolContext);
153242
153663
  const modelForLog = resolveModelForLog({ payload, resolvedModel });
153243
153664
  const agentForLog = resolveAgentForLog({ agentName: agent2.name, resolvedModel });
@@ -153269,7 +153690,7 @@ ${instructions.user}` : null,
153269
153690
  log.info(instructions.full);
153270
153691
  });
153271
153692
  if (agentId === "opencode") {
153272
- const pluginDir = join15(process.cwd(), ".opencode", "plugin");
153693
+ const pluginDir = join16(process.cwd(), ".opencode", "plugin");
153273
153694
  const hasPlugins = existsSync7(pluginDir) && readdirSync(pluginDir).some((f) => /\.[jt]sx?$/.test(f));
153274
153695
  if (hasPlugins && toolState.dependencyInstallation?.promise) {
153275
153696
  log.info(
@@ -153295,6 +153716,9 @@ ${instructions.user}` : null,
153295
153716
  }
153296
153717
  });
153297
153718
  toolState.todoTracker = todoTracker;
153719
+ onExitSignal(() => {
153720
+ todoTracker?.cancel();
153721
+ });
153298
153722
  let innerTimeoutFired = false;
153299
153723
  const onInnerActivityTimeout = () => {
153300
153724
  if (innerTimeoutFired) return;
@@ -153325,6 +153749,8 @@ ${instructions.user}` : null,
153325
153749
  instructions,
153326
153750
  todoTracker,
153327
153751
  stopScript: runContext.repoSettings.stopScript,
153752
+ summaryFilePath: toolState.summaryFilePath,
153753
+ summarySeed: toolState.summarySeed,
153328
153754
  onActivityTimeout: onInnerActivityTimeout,
153329
153755
  onToolUse: (event) => {
153330
153756
  const wasTracked = recordDiffReadFromToolUse({
@@ -153379,8 +153805,10 @@ ${instructions.user}` : null,
153379
153805
  log.debug(`post-review cleanup failed: ${error49}`);
153380
153806
  });
153381
153807
  }
153382
- const trackerWasLastWriter = todoTracker?.hasPublished && !toolState.finalSummaryWritten;
153383
- if (toolContext && toolState.progressCommentId && (!toolState.wasUpdated || trackerWasLastWriter)) {
153808
+ if (toolContext) {
153809
+ await persistSummary(toolContext);
153810
+ }
153811
+ if (toolContext && toolState.progressComment && !toolState.finalSummaryWritten) {
153384
153812
  await deleteProgressComment(toolContext).catch((error49) => {
153385
153813
  log.debug(`stranded progress comment cleanup failed: ${error49}`);
153386
153814
  });
@@ -153427,6 +153855,9 @@ ${errorMessage}
153427
153855
  log.debug(`post-review cleanup failed: ${error50}`);
153428
153856
  });
153429
153857
  }
153858
+ if (toolContext) {
153859
+ await persistSummary(toolContext);
153860
+ }
153430
153861
  return {
153431
153862
  success: false,
153432
153863
  error: errorMessage