pullfrog 0.0.203 → 0.0.205

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -18415,7 +18415,7 @@ var require_summary = __commonJS({
18415
18415
  exports.summary = exports.markdownSummary = exports.SUMMARY_DOCS_URL = exports.SUMMARY_ENV_VAR = void 0;
18416
18416
  var os_1 = __require("os");
18417
18417
  var fs_1 = __require("fs");
18418
- var { access, appendFile, writeFile: writeFile2 } = fs_1.promises;
18418
+ var { access, appendFile, writeFile: writeFile3 } = fs_1.promises;
18419
18419
  exports.SUMMARY_ENV_VAR = "GITHUB_STEP_SUMMARY";
18420
18420
  exports.SUMMARY_DOCS_URL = "https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary";
18421
18421
  var Summary = class {
@@ -18473,7 +18473,7 @@ var require_summary = __commonJS({
18473
18473
  return __awaiter(this, void 0, void 0, function* () {
18474
18474
  const overwrite = !!(options === null || options === void 0 ? void 0 : options.overwrite);
18475
18475
  const filePath = yield this.filePath();
18476
- const writeFunc = overwrite ? writeFile2 : appendFile;
18476
+ const writeFunc = overwrite ? writeFile3 : appendFile;
18477
18477
  yield writeFunc(filePath, this._buffer, { encoding: "utf8" });
18478
18478
  return this.emptyBuffer();
18479
18479
  });
@@ -62879,8 +62879,8 @@ var require_snapshot_utils = __commonJS({
62879
62879
  var require_snapshot_recorder = __commonJS({
62880
62880
  "node_modules/.pnpm/undici@7.22.0/node_modules/undici/lib/mock/snapshot-recorder.js"(exports, module) {
62881
62881
  "use strict";
62882
- var { writeFile: writeFile2, readFile, mkdir } = __require("node:fs/promises");
62883
- var { dirname: dirname5, resolve: resolve3 } = __require("node:path");
62882
+ var { writeFile: writeFile3, readFile: readFile4, mkdir: mkdir2 } = __require("node:fs/promises");
62883
+ var { dirname: dirname6, resolve: resolve3 } = __require("node:path");
62884
62884
  var { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = __require("node:timers");
62885
62885
  var { InvalidArgumentError, UndiciError } = require_errors4();
62886
62886
  var { hashId, isUrlExcludedFactory, normalizeHeaders, createHeaderFilters } = require_snapshot_utils();
@@ -63081,7 +63081,7 @@ var require_snapshot_recorder = __commonJS({
63081
63081
  throw new InvalidArgumentError("Snapshot path is required");
63082
63082
  }
63083
63083
  try {
63084
- const data = await readFile(resolve3(path3), "utf8");
63084
+ const data = await readFile4(resolve3(path3), "utf8");
63085
63085
  const parsed2 = JSON.parse(data);
63086
63086
  if (Array.isArray(parsed2)) {
63087
63087
  this.#snapshots.clear();
@@ -63111,12 +63111,12 @@ var require_snapshot_recorder = __commonJS({
63111
63111
  throw new InvalidArgumentError("Snapshot path is required");
63112
63112
  }
63113
63113
  const resolvedPath = resolve3(path3);
63114
- await mkdir(dirname5(resolvedPath), { recursive: true });
63114
+ await mkdir2(dirname6(resolvedPath), { recursive: true });
63115
63115
  const data = Array.from(this.#snapshots.entries()).map(([hash2, snapshot2]) => ({
63116
63116
  hash: hash2,
63117
63117
  snapshot: snapshot2
63118
63118
  }));
63119
- await writeFile2(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
63119
+ await writeFile3(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
63120
63120
  }
63121
63121
  /**
63122
63122
  * Clears all recorded snapshots
@@ -97692,14 +97692,14 @@ var require_turndown_cjs = __commonJS({
97692
97692
  } else if (node2.nodeType === 1) {
97693
97693
  replacement = replacementForNode.call(self2, node2);
97694
97694
  }
97695
- return join16(output, replacement);
97695
+ return join17(output, replacement);
97696
97696
  }, "");
97697
97697
  }
97698
97698
  function postProcess(output) {
97699
97699
  var self2 = this;
97700
97700
  this.rules.forEach(function(rule) {
97701
97701
  if (typeof rule.append === "function") {
97702
- output = join16(output, rule.append(self2.options));
97702
+ output = join17(output, rule.append(self2.options));
97703
97703
  }
97704
97704
  });
97705
97705
  return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
@@ -97711,7 +97711,7 @@ var require_turndown_cjs = __commonJS({
97711
97711
  if (whitespace.leading || whitespace.trailing) content = content.trim();
97712
97712
  return whitespace.leading + rule.replacement(content, node2, this.options) + whitespace.trailing;
97713
97713
  }
97714
- function join16(output, replacement) {
97714
+ function join17(output, replacement) {
97715
97715
  var s1 = trimTrailingNewlines(output);
97716
97716
  var s2 = trimLeadingNewlines(replacement);
97717
97717
  var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
@@ -99204,12 +99204,13 @@ import { basename as basename2 } from "node:path";
99204
99204
  // commands/gha.ts
99205
99205
  var core7 = __toESM(require_core(), 1);
99206
99206
  var import_arg = __toESM(require_arg(), 1);
99207
- import { dirname as dirname4 } from "node:path";
99207
+ import { dirname as dirname5 } from "node:path";
99208
99208
 
99209
99209
  // main.ts
99210
99210
  var core6 = __toESM(require_core(), 1);
99211
99211
  import { existsSync as existsSync7, readdirSync } from "node:fs";
99212
- import { join as join15 } from "node:path";
99212
+ import { readFile as readFile3 } from "node:fs/promises";
99213
+ import { join as join16 } from "node:path";
99213
99214
 
99214
99215
  // node_modules/.pnpm/@ark+util@0.56.0/node_modules/@ark/util/out/arrays.js
99215
99216
  var liftArray = (data) => Array.isArray(data) ? data : [data];
@@ -107705,7 +107706,7 @@ function buildCommitPrompt(status) {
107705
107706
  ].join("\n");
107706
107707
  }
107707
107708
  function hasPostRunIssues(issues) {
107708
- return issues.stopHook !== void 0 || issues.dirtyTree !== void 0;
107709
+ return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0;
107709
107710
  }
107710
107711
  var agent = (input) => {
107711
107712
  return {
@@ -108099,14 +108100,14 @@ var providers = {
108099
108100
  models: {
108100
108101
  grok: {
108101
108102
  displayName: "Grok",
108102
- resolve: "xai/grok-4",
108103
- openRouterResolve: "openrouter/x-ai/grok-4",
108103
+ resolve: "xai/grok-4.3",
108104
+ openRouterResolve: "openrouter/x-ai/grok-4.3",
108104
108105
  preferred: true
108105
108106
  },
108106
108107
  "grok-fast": {
108107
108108
  displayName: "Grok Fast",
108108
- resolve: "xai/grok-4-fast",
108109
- openRouterResolve: "openrouter/x-ai/grok-4-fast"
108109
+ resolve: "xai/grok-4-1-fast",
108110
+ openRouterResolve: "openrouter/x-ai/grok-4.1-fast"
108110
108111
  },
108111
108112
  "grok-code-fast": {
108112
108113
  displayName: "Grok Code Fast",
@@ -108313,8 +108314,8 @@ var providers = {
108313
108314
  },
108314
108315
  grok: {
108315
108316
  displayName: "Grok",
108316
- resolve: "openrouter/x-ai/grok-4",
108317
- openRouterResolve: "openrouter/x-ai/grok-4"
108317
+ resolve: "openrouter/x-ai/grok-4.3",
108318
+ openRouterResolve: "openrouter/x-ai/grok-4.3"
108318
108319
  },
108319
108320
  "deepseek-pro": {
108320
108321
  displayName: "DeepSeek Pro",
@@ -108500,7 +108501,8 @@ var STRING_KEYS = [
108500
108501
  "issueNodeId",
108501
108502
  "reviewNodeId",
108502
108503
  "planCommentNodeId",
108503
- "summaryCommentNodeId"
108504
+ "summaryCommentNodeId",
108505
+ "summarySnapshot"
108504
108506
  ];
108505
108507
  var NUMBER_KEYS = [
108506
108508
  "inputTokens",
@@ -108583,6 +108585,93 @@ function aggregateUsage(entries) {
108583
108585
  return out;
108584
108586
  }
108585
108587
 
108588
+ // utils/progressComment.ts
108589
+ function parseProgressComment(raw2) {
108590
+ if (!raw2?.id) return void 0;
108591
+ const id = parseInt(raw2.id, 10);
108592
+ if (Number.isNaN(id) || id <= 0) return void 0;
108593
+ return { id, type: raw2.type };
108594
+ }
108595
+ async function updateProgressComment(ctx, comment, body) {
108596
+ const result = await (comment.type === "review" ? ctx.octokit.rest.pulls.updateReviewComment({
108597
+ owner: ctx.owner,
108598
+ repo: ctx.repo,
108599
+ comment_id: comment.id,
108600
+ body
108601
+ }) : ctx.octokit.rest.issues.updateComment({
108602
+ owner: ctx.owner,
108603
+ repo: ctx.repo,
108604
+ comment_id: comment.id,
108605
+ body
108606
+ }));
108607
+ return {
108608
+ id: result.data.id,
108609
+ body: result.data.body ?? void 0,
108610
+ html_url: result.data.html_url,
108611
+ node_id: result.data.node_id
108612
+ };
108613
+ }
108614
+ async function deleteProgressCommentApi(ctx, comment) {
108615
+ if (comment.type === "review") {
108616
+ await ctx.octokit.rest.pulls.deleteReviewComment({
108617
+ owner: ctx.owner,
108618
+ repo: ctx.repo,
108619
+ comment_id: comment.id
108620
+ });
108621
+ return;
108622
+ }
108623
+ await ctx.octokit.rest.issues.deleteComment({
108624
+ owner: ctx.owner,
108625
+ repo: ctx.repo,
108626
+ comment_id: comment.id
108627
+ });
108628
+ }
108629
+ async function createLeapingProgressComment(ctx, target, body) {
108630
+ if (target.kind === "reviewReply") {
108631
+ try {
108632
+ const result2 = await ctx.octokit.rest.pulls.createReplyForReviewComment({
108633
+ owner: ctx.owner,
108634
+ repo: ctx.repo,
108635
+ pull_number: target.pullNumber,
108636
+ comment_id: target.replyToCommentId,
108637
+ body
108638
+ });
108639
+ return {
108640
+ comment: { id: result2.data.id, type: "review" },
108641
+ body: result2.data.body ?? void 0,
108642
+ html_url: result2.data.html_url
108643
+ };
108644
+ } catch (error49) {
108645
+ console.warn(
108646
+ `[progressComment] review reply failed (parent ${target.replyToCommentId} on PR #${target.pullNumber}), falling back to issue comment:`,
108647
+ error49
108648
+ );
108649
+ const fallback = await ctx.octokit.rest.issues.createComment({
108650
+ owner: ctx.owner,
108651
+ repo: ctx.repo,
108652
+ issue_number: target.pullNumber,
108653
+ body
108654
+ });
108655
+ return {
108656
+ comment: { id: fallback.data.id, type: "issue" },
108657
+ body: fallback.data.body ?? void 0,
108658
+ html_url: fallback.data.html_url
108659
+ };
108660
+ }
108661
+ }
108662
+ const result = await ctx.octokit.rest.issues.createComment({
108663
+ owner: ctx.owner,
108664
+ repo: ctx.repo,
108665
+ issue_number: target.issueNumber,
108666
+ body
108667
+ });
108668
+ return {
108669
+ comment: { id: result.data.id, type: "issue" },
108670
+ body: result.data.body ?? void 0,
108671
+ html_url: result.data.html_url
108672
+ };
108673
+ }
108674
+
108586
108675
  // node_modules/.pnpm/@toon-format+toon@1.4.0/node_modules/@toon-format/toon/dist/index.mjs
108587
108676
  var LIST_ITEM_MARKER = "-";
108588
108677
  var LIST_ITEM_PREFIX = "- ";
@@ -109102,12 +109191,6 @@ var addTools = (ctx, server, tools) => {
109102
109191
  };
109103
109192
 
109104
109193
  // mcp/comment.ts
109105
- var LEAPING_INTO_ACTION_PREFIX = "Leaping into action";
109106
- function isLeapingIntoActionCommentBody(body) {
109107
- const content = stripExistingFooter(body).trimStart();
109108
- const firstLine = content.split(/\r?\n/, 1)[0]?.trimEnd() ?? "";
109109
- return new RegExp(`(^|\\s)${LEAPING_INTO_ACTION_PREFIX}(\\.\\.\\.)?$`).test(firstLine);
109110
- }
109111
109194
  function buildCommentFooter(ctx, customParts) {
109112
109195
  const runId = ctx.runId;
109113
109196
  return buildPullfrogFooter({
@@ -109139,43 +109222,22 @@ function addFooter(ctx, body) {
109139
109222
  var Comment = type({
109140
109223
  issueNumber: type.number.describe("the issue number to comment on"),
109141
109224
  body: type.string.describe("the comment body content"),
109142
- type: type.enumerated("Plan", "Summary", "Comment").describe(
109143
- "Plan: record as the plan for this run. Summary: record as the PR summary comment (one per PR, updated in place). Comment: regular comment (default)."
109144
- ).optional()
109225
+ type: type.enumerated("Plan", "Comment").describe("Plan: record as the plan for this run. Comment: regular comment (default).").optional()
109145
109226
  });
109146
109227
  function CreateCommentTool(ctx) {
109147
109228
  return tool({
109148
109229
  name: "create_issue_comment",
109149
- description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments, type: 'Summary' for PR summary comments.",
109230
+ description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments.",
109150
109231
  parameters: Comment,
109151
109232
  execute: execute(async ({ issueNumber, body, type: commentType }) => {
109152
109233
  const bodyWithFooter = addFooter(ctx, body);
109153
- if (commentType === "Summary" && ctx.toolState.existingSummaryCommentId) {
109154
- log.info(
109155
- `\xBB redirecting create_issue_comment(Summary) to update existing comment ${ctx.toolState.existingSummaryCommentId}`
109156
- );
109157
- const result2 = await ctx.octokit.rest.issues.updateComment({
109158
- owner: ctx.repo.owner,
109159
- repo: ctx.repo.name,
109160
- comment_id: ctx.toolState.existingSummaryCommentId,
109161
- body: bodyWithFooter
109162
- });
109163
- if (result2.data.node_id) {
109164
- await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result2.data.node_id });
109165
- }
109166
- return {
109167
- success: true,
109168
- commentId: result2.data.id,
109169
- url: result2.data.html_url,
109170
- body: result2.data.body
109171
- };
109172
- }
109173
109234
  const result = await ctx.octokit.rest.issues.createComment({
109174
109235
  owner: ctx.repo.owner,
109175
109236
  repo: ctx.repo.name,
109176
109237
  issue_number: issueNumber,
109177
109238
  body: bodyWithFooter
109178
109239
  });
109240
+ ctx.toolState.wasUpdated = true;
109179
109241
  if (commentType === "Plan") {
109180
109242
  if (result.data.node_id) {
109181
109243
  await patchWorkflowRunFields(ctx, { planCommentNodeId: result.data.node_id });
@@ -109196,9 +109258,6 @@ function CreateCommentTool(ctx) {
109196
109258
  body: updateResult.data.body
109197
109259
  };
109198
109260
  }
109199
- if (commentType === "Summary" && result.data.node_id) {
109200
- await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result.data.node_id });
109201
- }
109202
109261
  return {
109203
109262
  success: true,
109204
109263
  commentId: result.data.id,
@@ -109249,6 +109308,7 @@ async function reportProgress(ctx, params) {
109249
109308
  }
109250
109309
  const issueNumber = ctx.payload.event.issue_number ?? ctx.toolState.issueNumber;
109251
109310
  const isPlanMode = ctx.toolState.selectedMode === "Plan";
109311
+ const apiCtx = { octokit: ctx.octokit, owner: ctx.repo.owner, repo: ctx.repo.name };
109252
109312
  if (target_plan_comment === true && ctx.toolState.existingPlanCommentId === void 0) {
109253
109313
  log.warning("target_plan_comment requested but no existingPlanCommentId in tool state");
109254
109314
  }
@@ -109258,86 +109318,74 @@ async function reportProgress(ctx, params) {
109258
109318
  const bodyWithoutFooter = stripExistingFooter(body);
109259
109319
  const footer = buildCommentFooter(ctx, customParts);
109260
109320
  const bodyWithFooter = `${bodyWithoutFooter}${footer}`;
109261
- const result2 = await ctx.octokit.rest.issues.updateComment({
109262
- owner: ctx.repo.owner,
109263
- repo: ctx.repo.name,
109264
- comment_id: commentId,
109265
- body: bodyWithFooter
109266
- });
109321
+ const result = await updateProgressComment(
109322
+ apiCtx,
109323
+ { id: commentId, type: "issue" },
109324
+ bodyWithFooter
109325
+ );
109267
109326
  ctx.toolState.wasUpdated = true;
109268
- if (isPlanMode && result2.data.node_id) {
109269
- await patchWorkflowRunFields(ctx, { planCommentNodeId: result2.data.node_id });
109327
+ if (isPlanMode && result.node_id) {
109328
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: result.node_id });
109270
109329
  }
109271
109330
  return {
109272
- commentId: result2.data.id,
109273
- url: result2.data.html_url,
109274
- body: result2.data.body || "",
109331
+ commentId: result.id,
109332
+ url: result.html_url,
109333
+ body: result.body || "",
109275
109334
  action: "updated"
109276
109335
  };
109277
109336
  }
109278
- const existingCommentId = ctx.toolState.progressCommentId;
109279
- if (existingCommentId) {
109280
- const customParts = isPlanMode && issueNumber !== void 0 ? [buildImplementPlanLink(ctx, issueNumber, existingCommentId)] : void 0;
109337
+ const existingComment = ctx.toolState.progressComment;
109338
+ if (existingComment) {
109339
+ const customParts = isPlanMode && issueNumber !== void 0 ? [buildImplementPlanLink(ctx, issueNumber, existingComment.id)] : void 0;
109281
109340
  const bodyWithoutFooter = stripExistingFooter(body);
109282
109341
  const footer = buildCommentFooter(ctx, customParts);
109283
109342
  const bodyWithFooter = `${bodyWithoutFooter}${footer}`;
109284
- const result2 = await ctx.octokit.rest.issues.updateComment({
109285
- owner: ctx.repo.owner,
109286
- repo: ctx.repo.name,
109287
- comment_id: existingCommentId,
109288
- body: bodyWithFooter
109289
- });
109343
+ const result = await updateProgressComment(apiCtx, existingComment, bodyWithFooter);
109290
109344
  ctx.toolState.wasUpdated = true;
109291
- if (isPlanMode && result2.data.node_id) {
109292
- await patchWorkflowRunFields(ctx, { planCommentNodeId: result2.data.node_id });
109345
+ if (isPlanMode && result.node_id) {
109346
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: result.node_id });
109293
109347
  }
109294
109348
  return {
109295
- commentId: result2.data.id,
109296
- url: result2.data.html_url,
109297
- body: result2.data.body || "",
109349
+ commentId: result.id,
109350
+ url: result.html_url,
109351
+ body: result.body || "",
109298
109352
  action: "updated"
109299
109353
  };
109300
109354
  }
109301
- if (existingCommentId === null) {
109355
+ if (existingComment === null) {
109302
109356
  return { body, action: "skipped" };
109303
109357
  }
109304
109358
  if (issueNumber === void 0) {
109305
109359
  return { body, action: "skipped" };
109306
109360
  }
109307
109361
  const initialBody = addFooter(ctx, body);
109308
- const result = await ctx.octokit.rest.issues.createComment({
109309
- owner: ctx.repo.owner,
109310
- repo: ctx.repo.name,
109311
- issue_number: issueNumber,
109312
- body: initialBody
109313
- });
109314
- ctx.toolState.progressCommentId = result.data.id;
109362
+ const created = await createLeapingProgressComment(
109363
+ apiCtx,
109364
+ { kind: "issue", issueNumber },
109365
+ initialBody
109366
+ );
109367
+ ctx.toolState.progressComment = created.comment;
109315
109368
  ctx.toolState.wasUpdated = true;
109316
109369
  if (isPlanMode) {
109317
- const customParts = [buildImplementPlanLink(ctx, issueNumber, result.data.id)];
109370
+ const customParts = [buildImplementPlanLink(ctx, issueNumber, created.comment.id)];
109318
109371
  const bodyWithoutFooter = stripExistingFooter(body);
109319
109372
  const footer = buildCommentFooter(ctx, customParts);
109320
109373
  const bodyWithPlanLink = `${bodyWithoutFooter}${footer}`;
109321
- const updateResult = await ctx.octokit.rest.issues.updateComment({
109322
- owner: ctx.repo.owner,
109323
- repo: ctx.repo.name,
109324
- comment_id: result.data.id,
109325
- body: bodyWithPlanLink
109326
- });
109327
- if (updateResult.data.node_id) {
109328
- await patchWorkflowRunFields(ctx, { planCommentNodeId: updateResult.data.node_id });
109374
+ const updateResult = await updateProgressComment(apiCtx, created.comment, bodyWithPlanLink);
109375
+ if (updateResult.node_id) {
109376
+ await patchWorkflowRunFields(ctx, { planCommentNodeId: updateResult.node_id });
109329
109377
  }
109330
109378
  return {
109331
- commentId: updateResult.data.id,
109332
- url: updateResult.data.html_url,
109333
- body: updateResult.data.body || "",
109379
+ commentId: updateResult.id,
109380
+ url: updateResult.html_url,
109381
+ body: updateResult.body || "",
109334
109382
  action: "created"
109335
109383
  };
109336
109384
  }
109337
109385
  return {
109338
- commentId: result.data.id,
109339
- url: result.data.html_url,
109340
- body: result.data.body || "",
109386
+ commentId: created.comment.id,
109387
+ url: created.html_url,
109388
+ body: created.body || "",
109341
109389
  action: "created"
109342
109390
  };
109343
109391
  }
@@ -109365,15 +109413,15 @@ ${collapsible}`;
109365
109413
  reportParams.target_plan_comment = params.target_plan_comment;
109366
109414
  }
109367
109415
  const result = await reportProgress(ctx, reportParams);
109368
- if (!params.target_plan_comment) {
109369
- ctx.toolState.finalSummaryWritten = true;
109370
- }
109371
109416
  if (result.action === "skipped") {
109372
109417
  return {
109373
109418
  success: true,
109374
109419
  message: "progress recorded (no GitHub comment created - this may occur for workflow_dispatch events or when there is no associated issue/PR)"
109375
109420
  };
109376
109421
  }
109422
+ if (!params.target_plan_comment) {
109423
+ ctx.toolState.finalSummaryWritten = true;
109424
+ }
109377
109425
  return {
109378
109426
  success: true,
109379
109427
  ...result
@@ -109382,23 +109430,22 @@ ${collapsible}`;
109382
109430
  });
109383
109431
  }
109384
109432
  async function deleteProgressComment(ctx) {
109385
- const existingCommentId = ctx.toolState.progressCommentId;
109386
- if (!existingCommentId) {
109433
+ const existing = ctx.toolState.progressComment;
109434
+ if (!existing) {
109387
109435
  return false;
109388
109436
  }
109389
109437
  try {
109390
- await ctx.octokit.rest.issues.deleteComment({
109391
- owner: ctx.repo.owner,
109392
- repo: ctx.repo.name,
109393
- comment_id: existingCommentId
109394
- });
109438
+ await deleteProgressCommentApi(
109439
+ { octokit: ctx.octokit, owner: ctx.repo.owner, repo: ctx.repo.name },
109440
+ existing
109441
+ );
109395
109442
  } catch (error49) {
109396
109443
  if (error49 instanceof Error && error49.message.includes("Not Found")) {
109397
109444
  } else {
109398
109445
  throw error49;
109399
109446
  }
109400
109447
  }
109401
- ctx.toolState.progressCommentId = null;
109448
+ ctx.toolState.progressComment = null;
109402
109449
  return true;
109403
109450
  }
109404
109451
  var ReplyToReviewComment = type({
@@ -142479,7 +142526,7 @@ var import_semver = __toESM(require_semver2(), 1);
142479
142526
  // package.json
142480
142527
  var package_default = {
142481
142528
  name: "pullfrog",
142482
- version: "0.0.203",
142529
+ version: "0.0.205",
142483
142530
  type: "module",
142484
142531
  bin: {
142485
142532
  pullfrog: "dist/cli.mjs",
@@ -142676,7 +142723,7 @@ function closeBrowserDaemon(toolState) {
142676
142723
 
142677
142724
  // mcp/checkout.ts
142678
142725
  import { createHash as createHash2 } from "node:crypto";
142679
- import { writeFileSync } from "node:fs";
142726
+ import { statSync, unlinkSync as unlinkSync2, writeFileSync } from "node:fs";
142680
142727
  import { join as join3 } from "node:path";
142681
142728
 
142682
142729
  // utils/diffCoverage.ts
@@ -142705,7 +142752,10 @@ function createDiffCoverageState(params) {
142705
142752
  totalLines: params.totalLines,
142706
142753
  tocEntries: parseDiffTocEntries({ toc: params.toc }),
142707
142754
  coveredRanges: [],
142708
- coveragePreflightRan: false
142755
+ // carry forward across checkout_pr refreshes so the nudge stays "once per
142756
+ // review session". coveredRanges are intentionally not carried because
142757
+ // line numbers are tied to the previous diff's content.
142758
+ coveragePreflightRan: params.previous?.coveragePreflightRan ?? false
142709
142759
  };
142710
142760
  }
142711
142761
  function recordDiffReadFromToolUse(params) {
@@ -143016,8 +143066,13 @@ async function $git(subcommand, args2, options) {
143016
143066
  }
143017
143067
  if (result.exitCode !== 0) {
143018
143068
  const stderr = result.stderr.trim();
143019
- log.info(`git ${subcommand} failed: ${stderr}`);
143020
- throw new Error(`git ${subcommand} failed: ${stderr}`);
143069
+ const stdout = result.stdout.trim();
143070
+ const detail = stderr && stdout ? `${stderr}
143071
+ --- stdout ---
143072
+ ${stdout}` : stderr || stdout || "(no output)";
143073
+ const message = `git ${subcommand} failed (exit ${result.exitCode}): ${detail}`;
143074
+ log.info(message);
143075
+ throw new Error(message);
143021
143076
  }
143022
143077
  return {
143023
143078
  stdout: result.stdout.trim(),
@@ -143294,6 +143349,34 @@ var PushBranch = type({
143294
143349
  branchName: type.string.describe("The branch name to push (defaults to current branch)").optional(),
143295
143350
  force: type.boolean.describe("Force push (use with caution)").default(false)
143296
143351
  });
143352
+ var CONCURRENT_PUSH_PATTERNS = ["fetch first", "non-fast-forward", "cannot lock ref"];
143353
+ var TRANSIENT_PATTERNS = [
143354
+ /RPC failed/i,
143355
+ /early EOF/,
143356
+ /the remote end hung up unexpectedly/,
143357
+ /Connection reset/i,
143358
+ /Could not resolve host/i,
143359
+ /Operation timed out/i,
143360
+ /HTTP\/2 stream \d+ was not closed cleanly/i,
143361
+ /unexpected disconnect while reading sideband packet/i,
143362
+ // libcurl HTTP 5xx surfaced by git over https. matches both the
143363
+ // libcurl-style "The requested URL returned error: 502" and the more
143364
+ // recent "HTTP 502" wording. most 4xx is intentionally excluded —
143365
+ // 401/403/404 indicate auth/permission problems that are not
143366
+ // retry-safe — but 429 (rate-limited / abuse detection) IS retry-safe
143367
+ // and GitHub occasionally surfaces it on git push, so it's included
143368
+ // explicitly below.
143369
+ /HTTP 5\d\d/,
143370
+ /returned error: 5\d\d/i,
143371
+ /HTTP 429/,
143372
+ /returned error: 429/i
143373
+ ];
143374
+ function classifyPushError(msg) {
143375
+ if (CONCURRENT_PUSH_PATTERNS.some((p2) => msg.includes(p2))) return "concurrent-push";
143376
+ if (TRANSIENT_PATTERNS.some((p2) => p2.test(msg))) return "transient";
143377
+ return "unknown";
143378
+ }
143379
+ var TRANSIENT_RETRY_DELAYS_MS = [2e3, 5e3];
143297
143380
  function PushBranchTool(ctx) {
143298
143381
  const defaultBranch = ctx.repo.data.default_branch || "main";
143299
143382
  const pushPermission = ctx.payload.push;
@@ -143344,25 +143427,48 @@ ${postHookStatus}`
143344
143427
  if (force) {
143345
143428
  log.warning(`force pushing - this will overwrite remote history`);
143346
143429
  }
143347
- try {
143348
- await $git("push", pushArgs, {
143349
- token: ctx.gitToken
143350
- });
143351
- } catch (err) {
143352
- const msg = err instanceof Error ? err.message : String(err);
143353
- if (msg.includes("fetch first") || msg.includes("non-fast-forward")) {
143354
- const integrateStep = ctx.payload.shell === "disabled" ? `2. use the git tool to merge the remote branch into yours: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] })` : `2. use the git tool to rebase or merge your changes on top: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] }) (or 'rebase')`;
143355
- throw new Error(
143356
- `push rejected: the remote branch '${pushDest.remoteBranch}' has new commits you don't have locally.
143430
+ let lastErr;
143431
+ let pushed = false;
143432
+ for (let attempt = 0; attempt <= TRANSIENT_RETRY_DELAYS_MS.length; attempt++) {
143433
+ try {
143434
+ await $git("push", pushArgs, {
143435
+ token: ctx.gitToken
143436
+ });
143437
+ if (attempt > 0) {
143438
+ log.info(`push succeeded on attempt ${attempt + 1}`);
143439
+ }
143440
+ pushed = true;
143441
+ break;
143442
+ } catch (err) {
143443
+ lastErr = err;
143444
+ const msg = err instanceof Error ? err.message : String(err);
143445
+ const kind = classifyPushError(msg);
143446
+ if (kind === "concurrent-push") {
143447
+ const integrateStep = ctx.payload.shell === "disabled" ? `2. use the git tool to merge the remote branch into yours: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] })` : `2. use the git tool to rebase or merge your changes on top: git({ command: "merge", args: ["origin/${pushDest.remoteBranch}"] }) (or 'rebase')`;
143448
+ throw new Error(
143449
+ `push rejected: the remote branch '${pushDest.remoteBranch}' has new commits you don't have locally (often a concurrent push to the same branch).
143357
143450
 
143358
143451
  to resolve this:
143359
143452
  1. use git_fetch to fetch the remote branch: git_fetch({ ref: "${pushDest.remoteBranch}" })
143360
143453
  ${integrateStep}
143361
143454
  3. resolve any merge conflicts if needed
143362
143455
  4. retry push_branch`
143363
- );
143456
+ );
143457
+ }
143458
+ if (kind === "transient" && attempt < TRANSIENT_RETRY_DELAYS_MS.length) {
143459
+ const baseDelay = TRANSIENT_RETRY_DELAYS_MS[attempt] ?? 5e3;
143460
+ const delay2 = Math.round(baseDelay * (0.75 + Math.random() * 0.5));
143461
+ log.info(
143462
+ `push attempt ${attempt + 1} failed (transient), retrying in ${delay2}ms: ${msg.slice(0, 300)}`
143463
+ );
143464
+ await new Promise((r) => setTimeout(r, delay2));
143465
+ continue;
143466
+ }
143467
+ throw err;
143364
143468
  }
143365
- throw err;
143469
+ }
143470
+ if (!pushed) {
143471
+ throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
143366
143472
  }
143367
143473
  return {
143368
143474
  success: true,
@@ -143453,6 +143559,11 @@ var GitFetch = type({
143453
143559
  ref: type.string.describe("Ref to fetch: branch name, tag, or 'pull/N/head' for PRs"),
143454
143560
  depth: type.number.describe("Fetch depth (for shallow clones)").optional()
143455
143561
  });
143562
+ var SHALLOW_UNREACHABLE_PATTERNS = [
143563
+ /Could not read [a-f0-9]{40,64}/,
143564
+ /remote did not send all necessary objects/
143565
+ ];
143566
+ var DEEPEN_RETRY_DEPTH = 1e3;
143456
143567
  function GitFetchTool(ctx) {
143457
143568
  return tool({
143458
143569
  name: "git_fetch",
@@ -143464,9 +143575,20 @@ function GitFetchTool(ctx) {
143464
143575
  if (params.depth !== void 0) {
143465
143576
  fetchArgs.push(`--depth=${params.depth}`);
143466
143577
  }
143467
- await $git("fetch", fetchArgs, {
143468
- token: ctx.gitToken
143469
- });
143578
+ try {
143579
+ await $git("fetch", fetchArgs, { token: ctx.gitToken });
143580
+ } catch (err) {
143581
+ const msg = err instanceof Error ? err.message : String(err);
143582
+ const isShallowUnreachable = SHALLOW_UNREACHABLE_PATTERNS.some((p2) => p2.test(msg));
143583
+ const isShallow = isShallowUnreachable && $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
143584
+ if (!isShallow) throw err;
143585
+ log.info(
143586
+ `\xBB git_fetch hit shallow-unreachable error, retrying with --deepen=${DEEPEN_RETRY_DEPTH}`
143587
+ );
143588
+ await $git("fetch", [`--deepen=${DEEPEN_RETRY_DEPTH}`, "--no-tags", "origin", params.ref], {
143589
+ token: ctx.gitToken
143590
+ });
143591
+ }
143470
143592
  return { success: true, ref: params.ref };
143471
143593
  })
143472
143594
  });
@@ -143831,6 +143953,7 @@ function CreatePullRequestReviewTool(ctx) {
143831
143953
  nodeId: reviewNodeId,
143832
143954
  reviewedSha: actuallyReviewedSha
143833
143955
  };
143956
+ ctx.toolState.wasUpdated = true;
143834
143957
  await deleteProgressComment(ctx).catch((err) => {
143835
143958
  log.debug(`progress comment cleanup after review failed: ${err}`);
143836
143959
  });
@@ -144182,11 +144305,38 @@ async function ensureBeforeShaReachable(params) {
144182
144305
  return false;
144183
144306
  }
144184
144307
  }
144308
+ var STALE_LOCK_AGE_MS = 3e4;
144309
+ var GIT_LOCK_PATHS = [
144310
+ ".git/shallow.lock",
144311
+ ".git/index.lock",
144312
+ ".git/objects/maintenance.lock"
144313
+ ];
144314
+ function cleanupStaleGitLocks() {
144315
+ const now = Date.now();
144316
+ for (const relPath of GIT_LOCK_PATHS) {
144317
+ let mtimeMs;
144318
+ try {
144319
+ mtimeMs = statSync(relPath).mtimeMs;
144320
+ } catch {
144321
+ continue;
144322
+ }
144323
+ if (now - mtimeMs < STALE_LOCK_AGE_MS) continue;
144324
+ try {
144325
+ unlinkSync2(relPath);
144326
+ log.warning(`\xBB removed stale ${relPath} from prior run`);
144327
+ } catch (e) {
144328
+ log.debug(
144329
+ `\xBB failed to remove stale ${relPath}: ${e instanceof Error ? e.message : String(e)}`
144330
+ );
144331
+ }
144332
+ }
144333
+ }
144185
144334
  async function checkoutPrBranch(pr, params) {
144186
144335
  const { octokit, owner, name, gitToken, toolState, beforeSha } = params;
144187
144336
  log.info(`\xBB checking out PR #${pr.number}...`);
144188
144337
  rejectIfLeadingDash(pr.baseRef, "PR base ref");
144189
144338
  rejectIfLeadingDash(pr.headRef, "PR head ref");
144339
+ cleanupStaleGitLocks();
144190
144340
  const isFork = pr.headRepoFullName !== pr.baseRepoFullName;
144191
144341
  const localBranch = `pr-${pr.number}`;
144192
144342
  const isShallow = $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
@@ -144356,7 +144506,8 @@ ${diffPreview}`);
144356
144506
  ctx.toolState.diffCoverage = createDiffCoverageState({
144357
144507
  diffPath,
144358
144508
  totalLines: countLines({ content: formatResult.content }),
144359
- toc: formatResult.toc
144509
+ toc: formatResult.toc,
144510
+ previous: ctx.toolState.diffCoverage
144360
144511
  });
144361
144512
  log.debug(
144362
144513
  `\xBB diff coverage initialized: diffPath=${diffPath}, totalLines=${ctx.toolState.diffCoverage.totalLines}, tocEntries=${ctx.toolState.diffCoverage.tocEntries.length}`
@@ -144986,6 +145137,7 @@ function UpdatePullRequestBodyTool(ctx) {
144986
145137
  pull_number: params.pull_number,
144987
145138
  body: bodyWithFooter
144988
145139
  });
145140
+ ctx.toolState.wasUpdated = true;
144989
145141
  return {
144990
145142
  success: true,
144991
145143
  number: result.data.number,
@@ -145416,35 +145568,20 @@ async function getReviewThreads(input) {
145416
145568
  const username = input.approvedBy;
145417
145569
  return threadsForReview.filter((thread) => threadHasThumbsUpFrom(thread, username));
145418
145570
  }
145419
- async function getReviewData(input) {
145420
- const [review, threads] = await Promise.all([
145421
- input.octokit.rest.pulls.getReview({
145422
- owner: input.owner,
145423
- repo: input.name,
145424
- pull_number: input.pullNumber,
145425
- review_id: input.reviewId
145426
- }),
145427
- getReviewThreads(input)
145428
- ]);
145429
- const rawReviewBody = review.data.body;
145571
+ function formatReviewData(input) {
145572
+ const rawReviewBody = input.review.body;
145430
145573
  const reviewBody = rawReviewBody ? stripExistingFooter(rawReviewBody) : "";
145431
- const reviewer = review.data.user?.login ?? "unknown";
145432
- if (threads.length === 0 && !reviewBody) return void 0;
145574
+ const reviewer = input.review.user?.login ?? "unknown";
145575
+ if (input.threads.length === 0 && !reviewBody) return void 0;
145433
145576
  let threadBlocks = [];
145434
- if (threads.length > 0) {
145435
- const prFiles = await input.octokit.paginate(input.octokit.rest.pulls.listFiles, {
145436
- owner: input.owner,
145437
- repo: input.name,
145438
- pull_number: input.pullNumber,
145439
- per_page: 100
145440
- });
145577
+ if (input.threads.length > 0) {
145441
145578
  const filePatchMap = /* @__PURE__ */ new Map();
145442
- for (const file2 of prFiles) {
145579
+ for (const file2 of input.prFiles) {
145443
145580
  if (file2.patch) {
145444
145581
  filePatchMap.set(file2.filename, parseFilePatches(file2.patch));
145445
145582
  }
145446
145583
  }
145447
- threadBlocks = buildThreadBlocks(threads, filePatchMap, input.reviewId);
145584
+ threadBlocks = buildThreadBlocks(input.threads, filePatchMap, input.reviewId);
145448
145585
  }
145449
145586
  const formatted = formatReviewThreads(threadBlocks, {
145450
145587
  pullNumber: input.pullNumber,
@@ -145454,6 +145591,30 @@ async function getReviewData(input) {
145454
145591
  });
145455
145592
  return { threadBlocks, reviewer, formatted };
145456
145593
  }
145594
+ async function getReviewData(input) {
145595
+ const [review, threads] = await Promise.all([
145596
+ input.octokit.rest.pulls.getReview({
145597
+ owner: input.owner,
145598
+ repo: input.name,
145599
+ pull_number: input.pullNumber,
145600
+ review_id: input.reviewId
145601
+ }),
145602
+ getReviewThreads(input)
145603
+ ]);
145604
+ const prFiles = threads.length > 0 ? await input.octokit.paginate(input.octokit.rest.pulls.listFiles, {
145605
+ owner: input.owner,
145606
+ repo: input.name,
145607
+ pull_number: input.pullNumber,
145608
+ per_page: 100
145609
+ }) : [];
145610
+ return formatReviewData({
145611
+ review: review.data,
145612
+ threads,
145613
+ prFiles,
145614
+ pullNumber: input.pullNumber,
145615
+ reviewId: input.reviewId
145616
+ });
145617
+ }
145457
145618
  function GetReviewCommentsTool(ctx) {
145458
145619
  return tool({
145459
145620
  name: "get_review_comments",
@@ -145577,425 +145738,10 @@ function ResolveReviewThreadTool(ctx) {
145577
145738
  });
145578
145739
  }
145579
145740
 
145580
- // agents/reviewer.ts
145581
- var REVIEWER_AGENT_NAME = "reviewfrog";
145582
- var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
145583
-
145584
- HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
145585
- - Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
145586
- - Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
145587
- - Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
145588
- - Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
145589
-
145590
- Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
145591
-
145592
- // modes.ts
145593
- var PR_SUMMARY_FORMAT = `### Default format
145594
-
145595
- Follow this structure exactly:
145596
-
145597
- <b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
145598
- NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
145599
-
145600
- ### Key changes
145601
-
145602
- - **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
145603
-
145604
- <sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
145605
- NOTE: the metadata line goes AFTER the bullet list, not before it.
145606
-
145607
- Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
145608
-
145609
- <br/>
145610
-
145611
- ## Example readable section title
145612
-
145613
- > **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
145614
- IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
145615
-
145616
- 1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
145617
-
145618
- If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
145619
- > <details><summary>How does X work?</summary>
145620
- > Extended explanation here.
145621
- > </details>
145622
-
145623
- End each section with a file links trail (3-4 key files max):
145624
- [\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
145625
-
145626
- Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
145627
-
145628
- CRITICAL \u2014 GitHub markdown rendering rule:
145629
- GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
145630
-
145631
- Rules:
145632
- - \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
145633
- - ALL variable names, identifiers, and file names in body text must be in backticks
145634
- - ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
145635
- - Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
145636
- - Do NOT include raw diff stats like '+123 / -45' or line counts
145637
- - Do NOT include code blocks or repeat diff contents
145638
- - Do NOT include a changelog section \u2014 the key changes list serves this purpose
145639
- - Focus on *intent*, not *what* \u2014 the diff already shows what changed
145640
- - Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
145641
- function learningsStep(t2, n) {
145642
- return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t2("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
145643
- }
145644
- function computeModes(agentId) {
145645
- const t2 = (toolName) => formatMcpToolRef(agentId, toolName);
145646
- return [
145647
- {
145648
- name: "Build",
145649
- description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
145650
- prompt: `### Checklist
145651
-
145652
- 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
145653
-
145654
- 2. **setup**: checkout or create the branch:
145655
- - **PR event, modifying the existing PR**: call \`${t2("checkout_pr")}\`
145656
- - **new branch**: use \`${t2("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
145657
-
145658
- 3. **build**: implement changes using your native file and shell tools:
145659
- - follow the plan (if you ran a plan phase)
145660
- - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
145661
- - run relevant tests/lints before committing
145662
-
145663
- 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
145664
-
145665
- Skip self-review (commit directly) when the diff is **genuinely trivial**:
145666
- - doc typos, comment-only edits, whitespace/format-only, import reordering
145667
- - lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
145668
- - low-risk dep patch bump from a trusted source
145669
-
145670
- Run self-review when the diff has **any behavioral surface, however small**:
145671
- - 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
145672
- - any change to money / tax / currency / billing / fee / refund / payout calculations or constants
145673
- - any change to auth / permissions / roles / sessions / tokens / signature verification
145674
- - any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
145675
- - new endpoints, new code paths, new error branches \u2014 even small ones
145676
- - mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
145677
- - anything you're uncertain about
145678
-
145679
- Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
145680
-
145681
- Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
145682
-
145683
- Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
145684
-
145685
- Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
145686
- - Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
145687
- - Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
145688
- - Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
145689
- - Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
145690
- - For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
145691
-
145692
- Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is not enough \u2014 a fix that improves correctness while degrading elegance still degrades the codebase. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
145693
-
145694
- 5. **finalize**:
145695
- - confirm a clean working tree, then push via \`${t2("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
145696
- - create a PR via \`${t2("create_pull_request")}\`
145697
- - call \`${t2("report_progress")}\` with the PR link or the exact error if push/PR failed
145698
-
145699
- ${learningsStep(t2, 6)}
145700
-
145701
- ### Notes
145702
-
145703
- For simple, well-defined tasks, skip the plan phase and go straight to build.`
145704
- },
145705
- {
145706
- name: "AddressReviews",
145707
- description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
145708
- prompt: `### Checklist
145709
-
145710
- 1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
145711
-
145712
- 2. Fetch review comments via \`${t2("get_review_comments")}\`.
145713
-
145714
- 3. For each comment:
145715
- - understand the feedback
145716
- - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is not enough; improving correctness while degrading elegance still degrades the code.
145717
- - if the request stands, make the code change using your native tools; otherwise reply explaining why
145718
- - record what was done (or why nothing was done)
145719
-
145720
- 4. Quality check:
145721
- - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
145722
- - commit locally via shell (\`git add . && git commit -m "..."\`)
145723
-
145724
- 5. Finalize:
145725
- - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145726
- - reply to each comment using \`${t2("reply_to_review_comment")}\`
145727
- - resolve addressed threads via \`${t2("resolve_review_thread")}\`
145728
- - call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)
145729
-
145730
- ${learningsStep(t2, 6)}`
145731
- },
145732
- // Review and IncrementalReview use the multi-lens orchestrator pattern
145733
- // (canonical source: .claude/commands/anneal.md). The orchestrator does
145734
- // triage → parallel read-only subagent fan-out → aggregate → draft comments
145735
- // → submit. For someone else's PR, parallel lenses (correctness, security,
145736
- // research-validated claims, user-journey, etc.) provide breadth across
145737
- // angles that a single subagent can't carry coherently. Build mode keeps
145738
- // a single fresh-eyes subagent (different problem shape — orchestrator
145739
- // wrote the code and bias-mitigation comes from delegating to one
145740
- // subagent that doesn't share the implementation context).
145741
- // Deliberate omission vs canonical /anneal: severity categorization in the
145742
- // final message (the review body has its own CAUTION/IMPORTANT framing
145743
- // instead of a severity table).
145744
- {
145745
- name: "Review",
145746
- description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
145747
- prompt: `### Checklist
145748
-
145749
- 1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
145750
-
145751
- 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
145752
-
145753
- if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit \`Reviewed \u2014 no issues found.\` per step 5. there's no value in dispatching even one lens for a typo.
145754
-
145755
- "Genuinely trivial" (skip):
145756
- - single-word doc typo, whitespace/format-only, comment-only across any number of files
145757
- - lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
145758
- - mechanical rename whose only effect is import-path updates
145759
- - low-risk dep patch bump
145760
-
145761
- "Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
145762
- - any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
145763
- - flipping a feature-flag default, default config value, or retry/timeout constant
145764
- - changing a money/tax/currency/fee constant by any amount
145765
- - changing an HTTP method, redirect URL, response code, or status enum
145766
- - tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
145767
- - renaming a public API surface (still trivial in shape, but needs an impact lens)
145768
- - adding a new direct dependency (supply-chain surface)
145769
- - any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
145770
- - mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
145771
-
145772
- When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
145773
-
145774
- otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
145775
-
145776
- - **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
145777
- - **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
145778
- - **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
145779
- - **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
145780
-
145781
- lenses come in two flavors, and you can mix them:
145782
- - **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
145783
- - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
145784
-
145785
- starter menu (combine, omit, or invent your own):
145786
- - **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
145787
- - **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
145788
- - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
145789
- - **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
145790
- - **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
145791
- - **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
145792
- - **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
145793
- - **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
145794
- - **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
145795
- - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
145796
- - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
145797
-
145798
- 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
145799
- - the diff path / target \u2014 reading the diff and the codebase is its job
145800
- - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
145801
- - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
145802
- - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
145803
- - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
145804
- - ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
145805
-
145806
- delegation discipline:
145807
- - do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
145808
- - do NOT summarize the PR for them (biases toward a validation frame)
145809
- - do NOT hand them a curated reading list (let them discover scope)
145810
- - do NOT pre-shape their output with a finding schema
145811
- - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
145812
-
145813
- 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
145814
-
145815
- for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
145816
-
145817
- 5. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
145818
-
145819
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
145820
-
145821
- - **critical issues** (blocks merge \u2014 bugs, security, data loss):
145822
- \`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
145823
- \`> [!CAUTION]\\n> This PR introduces a race condition in ...\`
145824
- Follow with a brief summary if needed. Include all inline comments.
145825
- - **recommended changes** (non-critical):
145826
- \`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
145827
- \`> [!IMPORTANT]\\n> Consider adding input validation for ...\`
145828
- Follow with a brief summary if needed. Include all inline comments.
145829
- - **no actionable issues**:
145830
- \`approved: true\`, body: "Reviewed \u2014 no issues found."`
145831
- },
145832
- // IncrementalReview shares Review's multi-lens orchestrator pattern but
145833
- // scopes the target to the incremental diff and adds prior-review-feedback
145834
- // tracking. The "issues must be NEW since the last Pullfrog review" filter
145835
- // lives at aggregation time (step 5), NOT in the subagent prompt — pushing
145836
- // the filter into subagents matches the canonical anneal anti-pattern of
145837
- // "list known pre-existing failures — don't flag these" and suppresses
145838
- // signal on regressions the new commits amplified. The body-format rules
145839
- // (Reviewed changes / Prior review feedback) are unchanged from the prior
145840
- // version. Same severity-table omission as Review.
145841
- {
145842
- name: "IncrementalReview",
145843
- description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
145844
- prompt: `### Checklist
145845
-
145846
- 1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
145847
-
145848
- 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
145849
-
145850
- 3. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll need this in step 6 to track which prior comments were addressed.
145851
-
145852
- 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
145853
-
145854
- if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
145855
-
145856
- "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
145857
- "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
145858
- When unsure, treat as non-trivial.
145859
-
145860
- otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
145861
-
145862
- dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
145863
- - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
145864
- - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
145865
- - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
145866
- - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
145867
- - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
145868
- - ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
145869
-
145870
- delegation discipline:
145871
- - do NOT lens-review the diff yourself in parallel with the subagents
145872
- - do NOT summarize the changes for them (biases toward validation frame)
145873
- - do NOT hand them a curated reading list (let them discover scope)
145874
- - do NOT pre-shape their output with a finding schema
145875
- - do NOT mention the other lenses (independence is the point)
145876
-
145877
- 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
145878
-
145879
- then check: which prior review comments were addressed by the new commits? track the addressed ones for step 6b.
145880
-
145881
- 6. **build the review body** \u2014 two distinct sections:
145882
- a. **Reviewed changes**: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed.
145883
- b. **Prior review feedback** (only if any were addressed): list only the prior review comments that WERE addressed by the new commits (\`- [x] safeParse instead of parse \u2014 addressed\`). omit unaddressed comments. omit this entire section if nothing was addressed. a change can appear in both sections.
145884
- - no headings, no tables, no prose paragraphs in either section \u2014 just bullets
145885
- - in some cases you may receive a complete diff for the whole pull request instead of an incremental one. when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
145886
-
145887
- 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. the review body always includes the reviewed changes from step 6a. append \`Prior review feedback:\\n\` with the checklist from step 6b only if any prior comments were addressed. Follow these rules:
145888
- - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
145889
- - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
145890
- - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the reviewed changes summary and prior feedback (if any).
145891
- - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the reviewed changes summary and prior feedback (if any).
145892
- - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t2("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the reviewed changes summary and prior feedback (if any).`
145893
- },
145894
- {
145895
- name: "Plan",
145896
- description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
145897
- prompt: `### Checklist
145898
-
145899
- 1. Analyze the task and gather context:
145900
- - read AGENTS.md and relevant codebase files
145901
- - understand the architecture and constraints
145902
-
145903
- 2. Produce a structured, actionable plan with clear milestones.
145904
-
145905
- 3. Call \`${t2("report_progress")}\` with the plan.
145906
-
145907
- ${learningsStep(t2, 4)}`
145908
- },
145909
- {
145910
- name: "Fix",
145911
- description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
145912
- prompt: `### Checklist
145913
-
145914
- 1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
145915
-
145916
- 2. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
145917
-
145918
- 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
145919
-
145920
- 4. Diagnose and fix:
145921
- - read the workflow file, reproduce locally with the EXACT same commands CI runs
145922
- - fix the issue using your native file and shell tools
145923
- - verify the fix by re-running the exact CI command
145924
- - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
145925
- - commit locally via shell (\`git add . && git commit -m "..."\`)
145926
-
145927
- 5. Finalize:
145928
- - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145929
- - call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
145930
-
145931
- ${learningsStep(t2, 6)}`
145932
- },
145933
- {
145934
- name: "ResolveConflicts",
145935
- description: "Resolve merge conflicts in a PR branch against the base branch",
145936
- prompt: `### Checklist
145937
-
145938
- 1. **Setup**:
145939
- - Call \`${t2("checkout_pr")}\` to get the PR branch.
145940
- - Call \`${t2("get_pull_request")}\` to identify the base branch (e.g., 'main').
145941
- - Call \`${t2("git_fetch")}\` to fetch the base branch.
145942
-
145943
- 2. **Merge Attempt**:
145944
- - Run \`git merge origin/<base_branch>\` via shell.
145945
- - If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
145946
- - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
145947
-
145948
- 3. **Resolve Conflicts**:
145949
- - Run \`git status\` or parse the merge output to find the list of conflicting files.
145950
- - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
145951
- - Verify the file syntax is correct after resolution.
145952
-
145953
- 4. **Finalize**:
145954
- - Run a final verification (build/test) to ensure the resolution works.
145955
- - \`git add . && git commit -m "resolve merge conflicts"\`
145956
- - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
145957
- - Call \`${t2("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
145958
- },
145959
- {
145960
- name: "Task",
145961
- description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
145962
- prompt: `### Checklist
145963
-
145964
- 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
145965
-
145966
- 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
145967
- - plan your approach before starting
145968
- - use native file and shell tools for local operations
145969
- - use ${pullfrogMcpName} MCP tools for GitHub/git operations
145970
- - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
145971
-
145972
- 3. Finalize:
145973
- - if code changes were made, push to a pull request (new or existing) using \`${t2("push_branch")}\` and \`${t2("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
145974
- - call \`${t2("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
145975
- - if the task involved labeling, commenting, or other GitHub operations, perform those directly
145976
-
145977
- ${learningsStep(t2, 4)}`
145978
- },
145979
- {
145980
- name: "Summarize",
145981
- description: "Summarize a PR with a structured comment that is updated in place on subsequent pushes",
145982
- prompt: `### Checklist
145983
-
145984
- 1. Checkout the PR via \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
145985
- 2. Read the diff using the TOC to selectively read relevant sections (not the entire file). Produce a structured summary. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
145986
- 3. Call \`${t2("create_issue_comment")}\` with \`type: "Summary"\` and the summary body.
145987
- 4. Call \`${t2("report_progress")}\` with a brief note (e.g., "Posted PR summary.").
145988
-
145989
- ${PR_SUMMARY_FORMAT}`
145990
- }
145991
- ];
145992
- }
145993
- var modes = computeModes("opencode");
145994
-
145995
145741
  // mcp/selectMode.ts
145996
145742
  var SelectModeParams = type({
145997
145743
  mode: type.string.describe(
145998
- "the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts', 'Summarize')"
145744
+ "the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts')"
145999
145745
  ),
146000
145746
  "issue_number?": type("number").describe(
146001
145747
  "optional issue number; when provided with Plan mode, used to look up an existing plan comment for this issue (edit vs create)"
@@ -146016,18 +145762,7 @@ An existing plan comment was found for this issue. Update that comment with the
146016
145762
  - gather relevant codebase context (file paths, architecture notes from AGENTS.md)
146017
145763
  - produce a structured plan with clear milestones
146018
145764
  3. Call \`${t2("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
146019
- 4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping...".`,
146020
- SummaryUpdate: `### Checklist (updating existing summary)
146021
-
146022
- An existing summary comment was found for this PR. Update it rather than creating a new one.
146023
-
146024
- 1. Use \`previousSummaryBody\` from this response as the current summary to revise.
146025
- 2. Checkout the PR via \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
146026
- 3. Read the diff using the TOC to selectively read relevant sections. Produce an updated summary reflecting the current state of the PR, using the existing summary (\`previousSummaryBody\`) as a starting point. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
146027
- 4. Call \`${t2("edit_issue_comment")}\` with \`commentId: existingSummaryCommentId\` (from this response) and the updated summary body.
146028
- 5. Call \`${t2("report_progress")}\` with a brief note (e.g., "Updated PR summary.").
146029
-
146030
- ${PR_SUMMARY_FORMAT}`
145765
+ 4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping...".`
146031
145766
  };
146032
145767
  }
146033
145768
  var modeInstructionParent = {
@@ -146060,30 +145795,22 @@ async function fetchExistingPlanComment(ctx, issueNumber) {
146060
145795
  return null;
146061
145796
  }
146062
145797
  }
146063
- async function fetchExistingSummaryComment(ctx, prNumber) {
146064
- if (!ctx.githubInstallationToken) {
146065
- log.warning("fetchExistingSummaryComment: no token, skipping");
146066
- return null;
146067
- }
146068
- const path3 = `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/pr/${prNumber}/summary-comment`;
146069
- try {
146070
- const response = await apiFetch({
146071
- path: path3,
146072
- method: "GET",
146073
- headers: { authorization: `Bearer ${ctx.githubInstallationToken}` },
146074
- signal: AbortSignal.timeout(1e4)
146075
- });
146076
- const data = await response.json();
146077
- if (response.ok && "commentId" in data) {
146078
- return data;
146079
- }
146080
- const errMsg = "error" in data ? data.error : "(no error body)";
146081
- log.warning(`fetchExistingSummaryComment: ${response.status} ${path3} \u2014 ${errMsg}`);
146082
- return null;
146083
- } catch (error49) {
146084
- log.warning("fetchExistingSummaryComment failed:", error49);
146085
- return null;
146086
- }
145798
+ var SUMMARY_MODES = /* @__PURE__ */ new Set(["Review", "IncrementalReview", "Task"]);
145799
+ function buildSummaryAddendum(t2, ctx) {
145800
+ const filePath = ctx.toolState.summaryFilePath;
145801
+ if (!filePath) return "";
145802
+ return `### PR summary snapshot \u2014 required step
145803
+
145804
+ A rolling PR summary lives at \`${filePath}\`. It is your durable cross-run agent context \u2014 a functional summary of what this PR does, the subsystems and files it touches, the material behavior of its changes, and any risks or open questions worth carrying forward. It is NOT a chronological log of past review runs; commit-level history can already be reconstructed from \`${t2("list_pull_request_reviews")}\`.
145805
+
145806
+ How to use it:
145807
+
145808
+ - read \`${filePath}\` at the START of the run, alongside the diff. it represents what previous agent runs already understood about this PR \u2014 absorb it before picking lenses or crafting subagent dispatch prompts. if it's a fresh seed (file is one or two lines), this is a first review and you'll be filling it in from the diff.
145809
+ - let the snapshot inform triage and dispatch. when it already tracks a risk, your lens prompts to subagents are stronger when they reference that context (e.g. "the JSDoc explicitly scopes to code points \u2014 do not flag grapheme-cluster issues" if the snapshot already documents that contract). when something the snapshot tracks is now resolved by new commits, note that. when new commits introduce something the snapshot doesn't yet describe, that's exactly where your fan-out should focus.
145810
+ - update the file in place to reflect the PR's CURRENT state. revise stale claims, drop resolved risks, add new behavior or risks. accuracy over breadth \u2014 every claim must be grounded in the diff. write for the next agent run, not for a human.
145811
+ - structure however serves THIS PR. there is no required section template. a refactor might organize by renamed export and call-site impact; a feature by capability; a billing change by money path. a compact note of which commit ranges have been reviewed should always be present so future runs scope correctly, but the rest is your call. when the structure works across runs, keep it stable so range-diffs are clean; when the PR's character changes (e.g. scope expands), reshape.
145812
+
145813
+ Do NOT call \`${t2("create_issue_comment")}\` for the summary \u2014 the server reads this file at end-of-run and persists it. The file edit is mandatory regardless of whether a review is submitted; the snapshot feeds the next run.`;
146087
145814
  }
146088
145815
  function SelectModeTool(ctx) {
146089
145816
  const t2 = (name) => formatMcpToolRef(ctx.agentId, name);
@@ -146125,21 +145852,18 @@ function SelectModeTool(ctx) {
146125
145852
  }
146126
145853
  }
146127
145854
  }
146128
- if (selectedMode.name === "Summarize") {
146129
- const prNumber = ctx.payload.event.issue_number;
146130
- if (prNumber !== void 0) {
146131
- const existing = await fetchExistingSummaryComment(ctx, prNumber);
146132
- if (existing !== null) {
146133
- ctx.toolState.existingSummaryCommentId = existing.commentId;
146134
- return {
146135
- ...buildOrchestratorGuidance(ctx, selectedMode, overrides.SummaryUpdate),
146136
- existingSummaryCommentId: existing.commentId,
146137
- previousSummaryBody: existing.body
146138
- };
146139
- }
146140
- }
145855
+ const summaryAddendum = SUMMARY_MODES.has(selectedMode.name) ? buildSummaryAddendum(t2, ctx) : "";
145856
+ const base = buildOrchestratorGuidance(ctx, selectedMode);
145857
+ if (summaryAddendum.length > 0) {
145858
+ return {
145859
+ ...base,
145860
+ orchestratorGuidance: `${base.orchestratorGuidance}
145861
+
145862
+ ${summaryAddendum}`,
145863
+ summaryFilePath: ctx.toolState.summaryFilePath
145864
+ };
146141
145865
  }
146142
- return buildOrchestratorGuidance(ctx, selectedMode);
145866
+ return base;
146143
145867
  })
146144
145868
  });
146145
145869
  }
@@ -146470,14 +146194,13 @@ function UploadFileTool(ctx) {
146470
146194
 
146471
146195
  // mcp/server.ts
146472
146196
  function initToolState(params) {
146473
- const parsed2 = params.progressCommentId ? parseInt(params.progressCommentId, 10) : NaN;
146474
- const resolvedId = Number.isNaN(parsed2) || parsed2 <= 0 ? void 0 : parsed2;
146475
- if (resolvedId) {
146476
- log.info(`\xBB using pre-created progress comment: ${resolvedId}`);
146197
+ const resolved = parseProgressComment(params.progressComment);
146198
+ if (resolved) {
146199
+ log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
146477
146200
  }
146478
146201
  return {
146479
- progressCommentId: resolvedId,
146480
- hadProgressComment: !!resolvedId,
146202
+ progressComment: resolved,
146203
+ hadProgressComment: !!resolved,
146481
146204
  backgroundProcesses: /* @__PURE__ */ new Map(),
146482
146205
  usageEntries: []
146483
146206
  };
@@ -146654,6 +146377,405 @@ async function startMcpHttpServer(ctx, options) {
146654
146377
  };
146655
146378
  }
146656
146379
 
146380
+ // agents/reviewer.ts
146381
+ var REVIEWER_AGENT_NAME = "reviewfrog";
146382
+ var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
146383
+
146384
+ HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
146385
+ - Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
146386
+ - Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
146387
+ - Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
146388
+ - Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
146389
+
146390
+ Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
146391
+
146392
+ // modes.ts
146393
+ var PR_SUMMARY_FORMAT = `### Default format
146394
+
146395
+ Follow this structure exactly:
146396
+
146397
+ <b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
146398
+ NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
146399
+
146400
+ ### Key changes
146401
+
146402
+ - **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
146403
+
146404
+ <sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
146405
+ NOTE: the metadata line goes AFTER the bullet list, not before it.
146406
+
146407
+ Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
146408
+
146409
+ <br/>
146410
+
146411
+ ## Example readable section title
146412
+
146413
+ > **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
146414
+ IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
146415
+
146416
+ 1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
146417
+
146418
+ If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
146419
+ > <details><summary>How does X work?</summary>
146420
+ > Extended explanation here.
146421
+ > </details>
146422
+
146423
+ End each section with a file links trail (3-4 key files max):
146424
+ [\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
146425
+
146426
+ Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
146427
+
146428
+ CRITICAL \u2014 GitHub markdown rendering rule:
146429
+ GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
146430
+
146431
+ Rules:
146432
+ - \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
146433
+ - ALL variable names, identifiers, and file names in body text must be in backticks
146434
+ - ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
146435
+ - Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
146436
+ - Do NOT include raw diff stats like '+123 / -45' or line counts
146437
+ - Do NOT include code blocks or repeat diff contents
146438
+ - Do NOT include a changelog section \u2014 the key changes list serves this purpose
146439
+ - Focus on *intent*, not *what* \u2014 the diff already shows what changed
146440
+ - Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
146441
+ function learningsStep(t2, n) {
146442
+ return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t2("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
146443
+ }
146444
+ function computeModes(agentId) {
146445
+ const t2 = (toolName) => formatMcpToolRef(agentId, toolName);
146446
+ return [
146447
+ {
146448
+ name: "Build",
146449
+ description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
146450
+ prompt: `### Checklist
146451
+
146452
+ 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
146453
+
146454
+ 2. **setup**: checkout or create the branch:
146455
+ - **PR event, modifying the existing PR**: call \`${t2("checkout_pr")}\`
146456
+ - **new branch**: use \`${t2("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
146457
+
146458
+ 3. **build**: implement changes using your native file and shell tools:
146459
+ - follow the plan (if you ran a plan phase)
146460
+ - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
146461
+ - run relevant tests/lints before committing
146462
+
146463
+ 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
146464
+
146465
+ Skip self-review (commit directly) when the diff is **genuinely trivial**:
146466
+ - doc typos, comment-only edits, whitespace/format-only, import reordering
146467
+ - lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
146468
+ - low-risk dep patch bump from a trusted source
146469
+
146470
+ Run self-review when the diff has **any behavioral surface, however small**:
146471
+ - 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
146472
+ - any change to money / tax / currency / billing / fee / refund / payout calculations or constants
146473
+ - any change to auth / permissions / roles / sessions / tokens / signature verification
146474
+ - any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
146475
+ - new endpoints, new code paths, new error branches \u2014 even small ones
146476
+ - mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
146477
+ - anything you're uncertain about
146478
+
146479
+ Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
146480
+
146481
+ Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
146482
+
146483
+ Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
146484
+
146485
+ Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
146486
+ - Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
146487
+ - Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
146488
+ - Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
146489
+ - Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
146490
+ - For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
146491
+
146492
+ Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
146493
+
146494
+ 5. **finalize**:
146495
+ - confirm a clean working tree, then push via \`${t2("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
146496
+ - create a PR via \`${t2("create_pull_request")}\`
146497
+ - call \`${t2("report_progress")}\` with the PR link or the exact error if push/PR failed
146498
+
146499
+ ${learningsStep(t2, 6)}
146500
+
146501
+ ### Notes
146502
+
146503
+ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146504
+ },
146505
+ {
146506
+ name: "AddressReviews",
146507
+ description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
146508
+ prompt: `### Checklist
146509
+
146510
+ 1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
146511
+
146512
+ 2. Fetch review comments via \`${t2("get_review_comments")}\`.
146513
+
146514
+ 3. For each comment:
146515
+ - understand the feedback
146516
+ - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
146517
+ - if the request stands, make the code change using your native tools; otherwise reply explaining why
146518
+ - record what was done (or why nothing was done)
146519
+
146520
+ 4. Quality check:
146521
+ - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
146522
+ - commit locally via shell (\`git add . && git commit -m "..."\`)
146523
+
146524
+ 5. Finalize:
146525
+ - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146526
+ - reply to each comment using \`${t2("reply_to_review_comment")}\`
146527
+ - resolve addressed threads via \`${t2("resolve_review_thread")}\`
146528
+ - call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)
146529
+
146530
+ ${learningsStep(t2, 6)}`
146531
+ },
146532
+ // Review and IncrementalReview use the multi-lens orchestrator pattern
146533
+ // (canonical source: .claude/commands/anneal.md). The orchestrator does
146534
+ // triage → parallel read-only subagent fan-out → aggregate → draft comments
146535
+ // → submit. For someone else's PR, parallel lenses (correctness, security,
146536
+ // research-validated claims, user-journey, etc.) provide breadth across
146537
+ // angles that a single subagent can't carry coherently. Build mode keeps
146538
+ // a single fresh-eyes subagent (different problem shape — orchestrator
146539
+ // wrote the code and bias-mitigation comes from delegating to one
146540
+ // subagent that doesn't share the implementation context).
146541
+ // Deliberate omission vs canonical /anneal: severity categorization in the
146542
+ // final message (the review body has its own CAUTION/IMPORTANT framing
146543
+ // instead of a severity table).
146544
+ {
146545
+ name: "Review",
146546
+ description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
146547
+ prompt: `### Checklist
146548
+
146549
+ 1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
146550
+
146551
+ 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
146552
+
146553
+ if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
146554
+
146555
+ "Genuinely trivial" (skip):
146556
+ - single-word doc typo, whitespace/format-only, comment-only across any number of files
146557
+ - lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
146558
+ - mechanical rename whose only effect is import-path updates
146559
+ - low-risk dep patch bump
146560
+
146561
+ "Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
146562
+ - any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
146563
+ - flipping a feature-flag default, default config value, or retry/timeout constant
146564
+ - changing a money/tax/currency/fee constant by any amount
146565
+ - changing an HTTP method, redirect URL, response code, or status enum
146566
+ - tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
146567
+ - renaming a public API surface (still trivial in shape, but needs an impact lens)
146568
+ - adding a new direct dependency (supply-chain surface)
146569
+ - any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
146570
+ - mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
146571
+
146572
+ When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
146573
+
146574
+ otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
146575
+
146576
+ - **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
146577
+ - **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
146578
+ - **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
146579
+ - **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
146580
+
146581
+ lenses come in two flavors, and you can mix them:
146582
+ - **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
146583
+ - **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
146584
+
146585
+ starter menu (combine, omit, or invent your own):
146586
+ - **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
146587
+ - **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
146588
+ - **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
146589
+ - **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
146590
+ - **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
146591
+ - **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
146592
+ - **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
146593
+ - **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
146594
+ - **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
146595
+ - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
146596
+ - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
146597
+
146598
+ 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
146599
+ - the diff path / target \u2014 reading the diff and the codebase is its job
146600
+ - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146601
+ - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
146602
+ - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
146603
+ - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
146604
+ - ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
146605
+
146606
+ delegation discipline:
146607
+ - do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
146608
+ - do NOT summarize the PR for them (biases toward a validation frame)
146609
+ - do NOT hand them a curated reading list (let them discover scope)
146610
+ - do NOT pre-shape their output with a finding schema
146611
+ - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
146612
+
146613
+ 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
146614
+
146615
+ for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
146616
+
146617
+ 5. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
146618
+
146619
+ note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146620
+
146621
+ The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
146622
+
146623
+ - **critical issues** (blocks merge \u2014 bugs, security, data loss):
146624
+ \`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146625
+ - **recommended changes** (non-critical):
146626
+ \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146627
+ - **no actionable issues**:
146628
+ \`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
146629
+
146630
+ ${PR_SUMMARY_FORMAT}`
146631
+ },
146632
+ // IncrementalReview shares Review's multi-lens orchestrator pattern but
146633
+ // scopes the target to the incremental diff. The "issues must be NEW
146634
+ // since the last Pullfrog review" filter lives at aggregation time
146635
+ // (step 5), NOT in the subagent prompt — pushing the filter into
146636
+ // subagents matches the canonical anneal anti-pattern of "list known
146637
+ // pre-existing failures — don't flag these" and suppresses signal on
146638
+ // regressions the new commits amplified. The review body is just
146639
+ // "Reviewed changes" — a separate "Prior review feedback" checklist
146640
+ // would duplicate the rolling PR summary snapshot's record of what
146641
+ // earlier runs already addressed and add noise to the user-facing
146642
+ // body. Same severity-table omission as Review.
146643
+ {
146644
+ name: "IncrementalReview",
146645
+ description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
146646
+ prompt: `### Checklist
146647
+
146648
+ 1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
146649
+
146650
+ 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
146651
+
146652
+ 3. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
146653
+
146654
+ 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
146655
+
146656
+ if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
146657
+
146658
+ "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
146659
+ "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
146660
+ When unsure, treat as non-trivial.
146661
+
146662
+ otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
146663
+
146664
+ dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
146665
+ - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
146666
+ - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146667
+ - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
146668
+ - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
146669
+ - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
146670
+ - ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
146671
+
146672
+ delegation discipline:
146673
+ - do NOT lens-review the diff yourself in parallel with the subagents
146674
+ - do NOT summarize the changes for them (biases toward validation frame)
146675
+ - do NOT hand them a curated reading list (let them discover scope)
146676
+ - do NOT pre-shape their output with a finding schema
146677
+ - do NOT mention the other lenses (independence is the point)
146678
+
146679
+ 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
146680
+
146681
+ 6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
146682
+
146683
+ 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
146684
+ - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146685
+ - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
146686
+ - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
146687
+ - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
146688
+ - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t2("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
146689
+ },
146690
+ {
146691
+ name: "Plan",
146692
+ description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
146693
+ prompt: `### Checklist
146694
+
146695
+ 1. Analyze the task and gather context:
146696
+ - read AGENTS.md and relevant codebase files
146697
+ - understand the architecture and constraints
146698
+
146699
+ 2. Produce a structured, actionable plan with clear milestones.
146700
+
146701
+ 3. Call \`${t2("report_progress")}\` with the plan.
146702
+
146703
+ ${learningsStep(t2, 4)}`
146704
+ },
146705
+ {
146706
+ name: "Fix",
146707
+ description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
146708
+ prompt: `### Checklist
146709
+
146710
+ 1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
146711
+
146712
+ 2. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
146713
+
146714
+ 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
146715
+
146716
+ 4. Diagnose and fix:
146717
+ - read the workflow file, reproduce locally with the EXACT same commands CI runs
146718
+ - fix the issue using your native file and shell tools
146719
+ - verify the fix by re-running the exact CI command
146720
+ - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
146721
+ - commit locally via shell (\`git add . && git commit -m "..."\`)
146722
+
146723
+ 5. Finalize:
146724
+ - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146725
+ - call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
146726
+
146727
+ ${learningsStep(t2, 6)}`
146728
+ },
146729
+ {
146730
+ name: "ResolveConflicts",
146731
+ description: "Resolve merge conflicts in a PR branch against the base branch",
146732
+ prompt: `### Checklist
146733
+
146734
+ 1. **Setup**:
146735
+ - Call \`${t2("checkout_pr")}\` to get the PR branch.
146736
+ - Call \`${t2("get_pull_request")}\` to identify the base branch (e.g., 'main').
146737
+ - Call \`${t2("git_fetch")}\` to fetch the base branch.
146738
+
146739
+ 2. **Merge Attempt**:
146740
+ - Run \`git merge origin/<base_branch>\` via shell.
146741
+ - If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
146742
+ - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
146743
+
146744
+ 3. **Resolve Conflicts**:
146745
+ - Run \`git status\` or parse the merge output to find the list of conflicting files.
146746
+ - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
146747
+ - Verify the file syntax is correct after resolution.
146748
+
146749
+ 4. **Finalize**:
146750
+ - Run a final verification (build/test) to ensure the resolution works.
146751
+ - \`git add . && git commit -m "resolve merge conflicts"\`
146752
+ - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146753
+ - Call \`${t2("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
146754
+ },
146755
+ {
146756
+ name: "Task",
146757
+ description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
146758
+ prompt: `### Checklist
146759
+
146760
+ 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
146761
+
146762
+ 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
146763
+ - plan your approach before starting
146764
+ - use native file and shell tools for local operations
146765
+ - use ${pullfrogMcpName} MCP tools for GitHub/git operations
146766
+ - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
146767
+
146768
+ 3. Finalize:
146769
+ - if code changes were made, push to a pull request (new or existing) using \`${t2("push_branch")}\` and \`${t2("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
146770
+ - call \`${t2("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
146771
+ - if the task involved labeling, commenting, or other GitHub operations, perform those directly
146772
+
146773
+ ${learningsStep(t2, 4)}`
146774
+ }
146775
+ ];
146776
+ }
146777
+ var modes = computeModes("opencode");
146778
+
146657
146779
  // agents/claude.ts
146658
146780
  import { execFileSync as execFileSync3 } from "node:child_process";
146659
146781
  import { mkdirSync as mkdirSync4, writeFileSync as writeFileSync7 } from "node:fs";
@@ -146746,20 +146868,34 @@ async function installFromNpmTarball(params) {
146746
146868
  }
146747
146869
 
146748
146870
  // utils/providerErrors.ts
146871
+ var statusKey = `\\b(?:status[_ ]?code|http[_ ]?status|status)["']?\\s*[:=]\\s*["']?`;
146749
146872
  var PROVIDER_ERROR_PATTERNS = [
146750
- { pattern: "429", label: "rate limited (429)" },
146751
- { pattern: "RESOURCE_EXHAUSTED", label: "quota exhausted" },
146752
- { pattern: "quota", label: "quota error" },
146753
- { pattern: "status: 500", label: "provider 500 error" },
146754
- { pattern: "INTERNAL", label: "provider internal error" },
146755
- { pattern: "status: 503", label: "provider unavailable (503)" },
146756
- { pattern: "UNAVAILABLE", label: "provider unavailable" },
146757
- { pattern: "rate limit", label: "rate limited" },
146758
- { pattern: "limit: 0", label: "zero quota" }
146873
+ { regex: new RegExp(`${statusKey}429\\b`, "i"), label: "rate limited (429)" },
146874
+ { regex: new RegExp(`${statusKey}500\\b`, "i"), label: "provider 500 error" },
146875
+ { regex: new RegExp(`${statusKey}503\\b`, "i"), label: "provider unavailable (503)" },
146876
+ // matches `rate limit`, `rate limited`, `rate limits exceeded`,
146877
+ // `rate_limit_error`, `rate_limit_exceeded`. the leading `\b` + `[_ ]`
146878
+ // separator rejects `x-ratelimit-*` / `anthropic-ratelimit-*` response
146879
+ // headers (no separator between "rate" and "limit") which routinely
146880
+ // appear in dumped 401 / 4xx error JSON.
146881
+ { regex: /\brate[_ ]limit/i, label: "rate limited" },
146882
+ { regex: /\bRESOURCE_EXHAUSTED\b/, label: "quota exhausted" },
146883
+ // Google gRPC `INTERNAL` status. word-boundary anchors reject
146884
+ // `INTERNAL_SERVER_ERROR` (HTTP 500 message that may appear in unrelated
146885
+ // log lines) and identifiers like `INTERNALS`.
146886
+ { regex: /\bINTERNAL\b/, label: "provider internal error" },
146887
+ { regex: /\bUNAVAILABLE\b/, label: "provider unavailable" },
146888
+ // matches `quota`, `insufficient_quota`, `quota_exceeded`, `quotaExceeded`.
146889
+ // word-character lookarounds would reject `_quota` / `quotaX`; `quota` is
146890
+ // specific enough that a plain substring match is safe.
146891
+ { regex: /quota/i, label: "quota error" },
146892
+ // explicit zero-quota response, e.g. `{"limit": 0}`. the `\b` anchor
146893
+ // around `limit` rejects keys like `time_limit` or `field_limit`.
146894
+ { regex: /["']?\blimit\b["']?\s*:\s*0\b/, label: "zero quota" }
146759
146895
  ];
146760
146896
  function detectProviderError(text) {
146761
146897
  for (const entry of PROVIDER_ERROR_PATTERNS) {
146762
- if (text.includes(entry.pattern)) return entry.label;
146898
+ if (entry.regex.test(text)) return entry.label;
146763
146899
  }
146764
146900
  return null;
146765
146901
  }
@@ -146869,6 +147005,7 @@ var ThinkingTimer = class {
146869
147005
  };
146870
147006
 
146871
147007
  // agents/postRun.ts
147008
+ import { readFile } from "node:fs/promises";
146872
147009
  var MAX_HOOK_OUTPUT_CHARS = 4096;
146873
147010
  function truncateHookOutput(raw2) {
146874
147011
  if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
@@ -146913,6 +147050,23 @@ function buildStopHookPrompt(failure) {
146913
147050
  "```"
146914
147051
  ].join("\n");
146915
147052
  }
147053
+ async function isSummaryUnchanged(filePath, seed) {
147054
+ try {
147055
+ const current = await readFile(filePath, "utf8");
147056
+ return current === seed;
147057
+ } catch {
147058
+ return false;
147059
+ }
147060
+ }
147061
+ function buildSummaryStalePrompt(filePath) {
147062
+ return [
147063
+ `PR SUMMARY UNTOUCHED \u2014 the rolling PR summary file at \`${filePath}\` is byte-identical to its seed; this run did not edit it.`,
147064
+ "",
147065
+ "review the diff and update the file in place to reflect what changed in the PR. update intent, key changes, and any risks worth flagging \u2014 keep the existing section headings stable so incremental runs produce clean diffs.",
147066
+ "",
147067
+ "if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is \u2014 but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context."
147068
+ ].join("\n");
147069
+ }
146916
147070
  async function collectPostRunIssues(params) {
146917
147071
  const issues = {};
146918
147072
  if (params.stopScript) {
@@ -146921,12 +147075,17 @@ async function collectPostRunIssues(params) {
146921
147075
  }
146922
147076
  const status = getGitStatus();
146923
147077
  if (status) issues.dirtyTree = status;
147078
+ if (params.summaryFilePath && params.summarySeed !== void 0) {
147079
+ const stale = await isSummaryUnchanged(params.summaryFilePath, params.summarySeed);
147080
+ if (stale) issues.summaryStale = { filePath: params.summaryFilePath };
147081
+ }
146924
147082
  return issues;
146925
147083
  }
146926
147084
  function buildPostRunPrompt(issues) {
146927
147085
  const parts = [];
146928
147086
  if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
146929
147087
  if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
147088
+ if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
146930
147089
  return parts.join("\n\n---\n\n");
146931
147090
  }
146932
147091
  function buildLearningsReflectionPrompt(agentId) {
@@ -146949,9 +147108,15 @@ async function runPostRunRetryLoop(params) {
146949
147108
  let finalIssues = {};
146950
147109
  let gateResumeCount = 0;
146951
147110
  let pendingReflection = params.reflectionPrompt;
147111
+ let summaryStaleNudged = false;
146952
147112
  while (gateResumeCount < MAX_POST_RUN_RETRIES) {
146953
147113
  if (!result.success) break;
146954
- const issues = await collectPostRunIssues({ stopScript: params.stopScript });
147114
+ const issues = await collectPostRunIssues({
147115
+ stopScript: params.stopScript,
147116
+ summaryFilePath: summaryStaleNudged ? void 0 : params.summaryFilePath,
147117
+ summarySeed: summaryStaleNudged ? void 0 : params.summarySeed
147118
+ });
147119
+ if (issues.summaryStale) summaryStaleNudged = true;
146955
147120
  finalIssues = issues;
146956
147121
  if (!hasPostRunIssues(issues)) {
146957
147122
  if (!pendingReflection) break;
@@ -146983,8 +147148,17 @@ async function runPostRunRetryLoop(params) {
146983
147148
  }
146984
147149
  log.info(`\xBB post-run retry (attempt ${gateResumeCount + 1}/${MAX_POST_RUN_RETRIES})`);
146985
147150
  const prompt = buildPostRunPrompt(issues);
147151
+ const onlySummaryStale = issues.summaryStale !== void 0 && issues.stopHook === void 0 && issues.dirtyTree === void 0;
147152
+ const preResume = result;
146986
147153
  result = await params.resume({ prompt, previousResult: result });
146987
147154
  aggregatedUsage = mergeAgentUsage(aggregatedUsage, result.usage);
147155
+ if (!result.success && onlySummaryStale) {
147156
+ log.warning(
147157
+ `\xBB summary-stale resume turn failed (${result.error ?? "unknown error"}), preserving prior successful result`
147158
+ );
147159
+ result = preResume;
147160
+ break;
147161
+ }
146988
147162
  gateResumeCount++;
146989
147163
  }
146990
147164
  if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
@@ -147121,6 +147295,7 @@ async function runClaude(params) {
147121
147295
  const thinkingTimer = new ThinkingTimer();
147122
147296
  let finalOutput = "";
147123
147297
  let sessionId;
147298
+ let resultErrorSubtype = null;
147124
147299
  let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
147125
147300
  let accumulatedCostUsd = 0;
147126
147301
  let tokensLogged = false;
@@ -147224,9 +147399,14 @@ async function runClaude(params) {
147224
147399
  tokensLogged = true;
147225
147400
  }
147226
147401
  } else if (subtype === "error_max_turns") {
147402
+ resultErrorSubtype = subtype;
147227
147403
  log.info(`\xBB ${params.label} max turns reached: ${JSON.stringify(event)}`);
147228
147404
  } else if (subtype === "error_during_execution") {
147405
+ resultErrorSubtype = subtype;
147229
147406
  log.info(`\xBB ${params.label} execution error: ${JSON.stringify(event)}`);
147407
+ } else if (subtype.startsWith("error")) {
147408
+ resultErrorSubtype = subtype;
147409
+ log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
147230
147410
  } else {
147231
147411
  log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
147232
147412
  }
@@ -147357,6 +147537,15 @@ ${stderrContext}`);
147357
147537
  sessionId
147358
147538
  };
147359
147539
  }
147540
+ if (resultErrorSubtype) {
147541
+ return {
147542
+ success: false,
147543
+ output: finalOutput || output,
147544
+ error: `result subtype: ${resultErrorSubtype}`,
147545
+ usage,
147546
+ sessionId
147547
+ };
147548
+ }
147360
147549
  return { success: true, output: finalOutput || output, usage, sessionId };
147361
147550
  } catch (error49) {
147362
147551
  params.todoTracker?.cancel();
@@ -147485,6 +147674,8 @@ var claude = agent({
147485
147674
  initialResult: result,
147486
147675
  initialUsage: result.usage,
147487
147676
  stopScript: ctx.stopScript,
147677
+ summaryFilePath: ctx.summaryFilePath,
147678
+ summarySeed: ctx.summarySeed,
147488
147679
  reflectionPrompt: buildLearningsReflectionPrompt("claude"),
147489
147680
  canResume: (r) => Boolean(r.sessionId),
147490
147681
  resume: async (c2) => {
@@ -147818,6 +148009,12 @@ async function runOpenCode(params) {
147818
148009
  log.debug(withLabel(label, `tool output: ${outputStr}`));
147819
148010
  }
147820
148011
  },
148012
+ error: (event) => {
148013
+ agentErrorEvent = event;
148014
+ const errorName = event.error?.name || "unknown";
148015
+ const errorMessage = event.error?.data?.message || event.error?.name || JSON.stringify(event);
148016
+ log.info(`\xBB ${params.label} error event: ${errorName}: ${errorMessage}`);
148017
+ },
147821
148018
  result: async (event) => {
147822
148019
  const status = event.status || "unknown";
147823
148020
  const duration4 = event.stats?.duration_ms || 0;
@@ -147838,6 +148035,7 @@ async function runOpenCode(params) {
147838
148035
  };
147839
148036
  const recentStderr = [];
147840
148037
  let lastProviderError = null;
148038
+ let agentErrorEvent = null;
147841
148039
  let output = "";
147842
148040
  let stdoutBuffer = "";
147843
148041
  try {
@@ -147956,6 +148154,17 @@ ${stderrContext}`);
147956
148154
  usage
147957
148155
  };
147958
148156
  }
148157
+ if (agentErrorEvent) {
148158
+ const errorEvent = agentErrorEvent;
148159
+ const errorName = errorEvent.error?.name || "agent error";
148160
+ const errorMessage = errorEvent.error?.data?.message || errorEvent.error?.name || JSON.stringify(errorEvent);
148161
+ return {
148162
+ success: false,
148163
+ output: finalOutput || output,
148164
+ error: `${errorName}: ${errorMessage}`,
148165
+ usage
148166
+ };
148167
+ }
147959
148168
  return { success: true, output: finalOutput || output, usage };
147960
148169
  } catch (error49) {
147961
148170
  params.todoTracker?.cancel();
@@ -148031,6 +148240,8 @@ var opencode = agent({
148031
148240
  initialResult: result,
148032
148241
  initialUsage: result.usage,
148033
148242
  stopScript: ctx.stopScript,
148243
+ summaryFilePath: ctx.summaryFilePath,
148244
+ summarySeed: ctx.summarySeed,
148034
148245
  reflectionPrompt: buildLearningsReflectionPrompt("opencode"),
148035
148246
  resume: async (c2) => runOpenCode({
148036
148247
  ...runParams,
@@ -152214,8 +152425,8 @@ async function reportErrorToComment(ctx) {
152214
152425
  const formattedError = ctx.title ? `${ctx.title}
152215
152426
 
152216
152427
  ${ctx.error}` : ctx.error;
152217
- const commentId = ctx.toolState.progressCommentId;
152218
- if (!commentId) {
152428
+ const comment = ctx.toolState.progressComment;
152429
+ if (!comment) {
152219
152430
  return;
152220
152431
  }
152221
152432
  const repoContext = parseRepoContext();
@@ -152234,12 +152445,11 @@ ${ctx.error}` : ctx.error;
152234
152445
  customParts,
152235
152446
  model: ctx.toolState.model
152236
152447
  });
152237
- await octokit.rest.issues.updateComment({
152238
- owner: repoContext.owner,
152239
- repo: repoContext.name,
152240
- comment_id: commentId,
152241
- body: `${formattedError}${footer}`
152242
- });
152448
+ await updateProgressComment(
152449
+ { octokit, owner: repoContext.owner, repo: repoContext.name },
152450
+ comment,
152451
+ `${formattedError}${footer}`
152452
+ );
152243
152453
  ctx.toolState.wasUpdated = true;
152244
152454
  }
152245
152455
 
@@ -152578,7 +152788,7 @@ When embedding images (e.g. uploaded screenshots) in comments or PR bodies, alwa
152578
152788
 
152579
152789
  **\`report_progress\`**: call this exactly once at the end of every run with a brief final summary (1-3 sentences) unless the mode guidance instructs otherwise. Never call it for intermediate status updates (e.g., "Checking for changes...", "Starting review...") \u2014 the task list handles live progress automatically. Calling \`report_progress\` replaces the task list with your summary and preserves the current task list in a collapsible section. Keep the summary concise \u2014 do not repeat what the task list already shows. Focus on the outcome (what was accomplished, links to artifacts) rather than listing individual steps. If something failed, include the tool's error text even when that makes the summary longer.
152580
152790
 
152581
- Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments, PR Summary comments).
152791
+ Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments).
152582
152792
 
152583
152793
  ### If you get stuck
152584
152794
 
@@ -152739,7 +152949,11 @@ var JsonPayload = type({
152739
152949
  "eventInstructions?": "string",
152740
152950
  "event?": "object",
152741
152951
  "timeout?": "string | undefined",
152742
- "progressCommentId?": "string | undefined"
152952
+ "progressComment?": type({
152953
+ id: "string",
152954
+ type: "'issue' | 'review'"
152955
+ }).or("undefined"),
152956
+ "generateSummary?": "boolean | undefined"
152743
152957
  });
152744
152958
  var COLLABORATOR_PERMISSIONS = ["admin", "maintain", "write"];
152745
152959
  function isCollaborator(event) {
@@ -152821,7 +153035,8 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
152821
153035
  event,
152822
153036
  timeout: inputs.timeout ?? jsonPayload?.timeout,
152823
153037
  cwd: resolveCwd(inputs.cwd),
152824
- progressCommentId: jsonPayload?.progressCommentId,
153038
+ progressComment: jsonPayload?.progressComment,
153039
+ generateSummary: jsonPayload?.generateSummary,
152825
153040
  // permissions: inputs > repoSettings > fallbacks
152826
153041
  push: inputs.push ?? repoSettings.push ?? "restricted",
152827
153042
  shell: resolvedShell,
@@ -152830,6 +153045,40 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
152830
153045
  };
152831
153046
  }
152832
153047
 
153048
+ // utils/prSummary.ts
153049
+ import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
153050
+ import { dirname as dirname4, join as join14 } from "node:path";
153051
+ var SUMMARY_FILE_NAME = "pullfrog-summary.md";
153052
+ var SUMMARY_SCAFFOLD = `# PR summary
153053
+
153054
+ <!-- durable cross-run context. edit in place; the next agent run reads this
153055
+ before reviewing new commits. structure however serves the PR best. -->
153056
+ `;
153057
+ var MIN_SNAPSHOT_LENGTH = 60;
153058
+ var MAX_SNAPSHOT_LENGTH = 32768;
153059
+ function summaryFilePath(tmpdir3) {
153060
+ return join14(tmpdir3, SUMMARY_FILE_NAME);
153061
+ }
153062
+ async function seedSummaryFile(params) {
153063
+ const path3 = summaryFilePath(params.tmpdir);
153064
+ await mkdir(dirname4(path3), { recursive: true });
153065
+ const seed = params.previousSnapshot && params.previousSnapshot.trim().length >= MIN_SNAPSHOT_LENGTH ? params.previousSnapshot : SUMMARY_SCAFFOLD;
153066
+ await writeFile2(path3, seed, "utf8");
153067
+ return path3;
153068
+ }
153069
+ async function readSummaryFile(path3) {
153070
+ let raw2;
153071
+ try {
153072
+ raw2 = await readFile2(path3, "utf8");
153073
+ } catch {
153074
+ return null;
153075
+ }
153076
+ const trimmed = raw2.trim();
153077
+ if (trimmed.length < MIN_SNAPSHOT_LENGTH) return null;
153078
+ if (trimmed.length > MAX_SNAPSHOT_LENGTH) return trimmed.slice(0, MAX_SNAPSHOT_LENGTH);
153079
+ return trimmed;
153080
+ }
153081
+
152833
153082
  // utils/reviewCleanup.ts
152834
153083
  var RE_REVIEW_PREAMBLE = "Incrementally re-review the new commits on this pull request. Use the IncrementalReview mode.";
152835
153084
  async function postReviewCleanup(ctx) {
@@ -152889,11 +153138,16 @@ async function dispatchFollowUpReReview(ctx, reviewedSha) {
152889
153138
  await ctx.octokit.rest.actions.createWorkflowDispatch({
152890
153139
  owner: ctx.repo.owner,
152891
153140
  repo: ctx.repo.name,
152892
- workflow_id: "pullfrog.yml",
153141
+ workflow_id: getCurrentWorkflowFilename(),
152893
153142
  ref: pr.data.base.repo.default_branch,
152894
153143
  inputs: { prompt: JSON.stringify(payload) }
152895
153144
  });
152896
153145
  }
153146
+ function getCurrentWorkflowFilename() {
153147
+ const ref = process.env.GITHUB_WORKFLOW_REF ?? "";
153148
+ const match3 = ref.match(/\/([^/]+)@/);
153149
+ return match3?.[1] ?? "pullfrog.yml";
153150
+ }
152897
153151
 
152898
153152
  // utils/run.ts
152899
153153
  async function handleAgentResult(ctx) {
@@ -152929,10 +153183,10 @@ async function handleAgentResult(ctx) {
152929
153183
  };
152930
153184
  }
152931
153185
 
152932
- // utils/runContextData.ts
152933
- var core5 = __toESM(require_core(), 1);
152934
-
152935
153186
  // utils/runContext.ts
153187
+ function isInfraCovered(params) {
153188
+ return params.isOss || params.plan === "payg";
153189
+ }
152936
153190
  var defaultSettings = {
152937
153191
  model: null,
152938
153192
  modes: [],
@@ -152950,7 +153204,8 @@ var defaultSettings = {
152950
153204
  var defaultRunContext = {
152951
153205
  settings: defaultSettings,
152952
153206
  apiToken: "",
152953
- oss: false
153207
+ oss: false,
153208
+ plan: "none"
152954
153209
  };
152955
153210
  async function fetchRunContext(params) {
152956
153211
  const timeoutMs = 3e4;
@@ -152989,6 +153244,7 @@ async function fetchRunContext(params) {
152989
153244
  },
152990
153245
  apiToken: data.apiToken,
152991
153246
  oss: data.oss ?? false,
153247
+ plan: data.plan ?? "none",
152992
153248
  proxyModel: data.proxyModel,
152993
153249
  dbSecrets: data.dbSecrets
152994
153250
  };
@@ -152999,6 +153255,7 @@ async function fetchRunContext(params) {
152999
153255
  }
153000
153256
 
153001
153257
  // utils/runContextData.ts
153258
+ var core5 = __toESM(require_core(), 1);
153002
153259
  async function resolveRunContextData(params) {
153003
153260
  log.info(`\xBB running Pullfrog v${package_default.version}...`);
153004
153261
  const repoContext = parseRepoContext();
@@ -153020,6 +153277,7 @@ async function resolveRunContextData(params) {
153020
153277
  repoSettings: runContext.settings,
153021
153278
  apiToken: runContext.apiToken,
153022
153279
  oss: runContext.oss,
153280
+ plan: runContext.plan,
153023
153281
  proxyModel: runContext.proxyModel,
153024
153282
  dbSecrets: runContext.dbSecrets
153025
153283
  };
@@ -153029,9 +153287,9 @@ async function resolveRunContextData(params) {
153029
153287
  import { execFileSync as execFileSync5, execSync as execSync3 } from "node:child_process";
153030
153288
  import { mkdtempSync } from "node:fs";
153031
153289
  import { tmpdir as tmpdir2 } from "node:os";
153032
- import { join as join14 } from "node:path";
153290
+ import { join as join15 } from "node:path";
153033
153291
  function createTempDirectory() {
153034
- const sharedTempDir = mkdtempSync(join14(tmpdir2(), "pullfrog-"));
153292
+ const sharedTempDir = mkdtempSync(join15(tmpdir2(), "pullfrog-"));
153035
153293
  process.env.PULLFROG_TEMP_DIR = sharedTempDir;
153036
153294
  log.info(`\xBB created temp dir at ${sharedTempDir}`);
153037
153295
  return sharedTempDir;
@@ -153346,6 +153604,73 @@ function resolveAgentForLog(ctx) {
153346
153604
  }
153347
153605
  return ctx.agentName;
153348
153606
  }
153607
+ var BillingError = class extends Error {
153608
+ code;
153609
+ declineCode;
153610
+ needsReauthentication;
153611
+ constructor(message, opts = {}) {
153612
+ super(message);
153613
+ this.name = "BillingError";
153614
+ this.code = opts.code ?? null;
153615
+ this.declineCode = opts.declineCode ?? null;
153616
+ this.needsReauthentication = opts.needsReauthentication ?? false;
153617
+ }
153618
+ };
153619
+ var TransientError = class extends Error {
153620
+ constructor(message) {
153621
+ super(message);
153622
+ this.name = "TransientError";
153623
+ }
153624
+ };
153625
+ function billingConsoleUrl(owner, anchor) {
153626
+ return `https://pullfrog.com/console/${encodeURIComponent(owner)}#${anchor}`;
153627
+ }
153628
+ function formatBillingErrorSummary(error49, owner) {
153629
+ if (error49.code === "router_requires_card") {
153630
+ return [
153631
+ "**Add a card to start using Pullfrog Router.**",
153632
+ "",
153633
+ "Router proxies OpenRouter at raw cost \u2014 no platform markup, and your first $20 of usage is on us.",
153634
+ "",
153635
+ `[Add a card \u2192](${billingConsoleUrl(owner, "model-access")})`
153636
+ ].join("\n");
153637
+ }
153638
+ if (error49.needsReauthentication) {
153639
+ const code = error49.declineCode ?? "authentication_required";
153640
+ return [
153641
+ `**Your card issuer requires 3D Secure on every charge** (\`${code}\`).`,
153642
+ "",
153643
+ "Pullfrog can't complete a 3DS challenge from inside a workflow. Top up your Router balance once in Stripe Checkout \u2014 subsequent runs draw from the prepaid balance without re-triggering 3DS.",
153644
+ "",
153645
+ `[Top up balance \u2192](${billingConsoleUrl(owner, "billing")})`
153646
+ ].join("\n");
153647
+ }
153648
+ if (error49.declineCode) {
153649
+ return [
153650
+ `**Your card was declined** (\`${error49.declineCode}\`).`,
153651
+ "",
153652
+ "Update your payment method and Pullfrog will retry on the next run.",
153653
+ "",
153654
+ `[Update payment method \u2192](${billingConsoleUrl(owner, "billing")})`
153655
+ ].join("\n");
153656
+ }
153657
+ return [
153658
+ "**Your Pullfrog balance is empty.**",
153659
+ "",
153660
+ "Top up your balance or enable auto-reload to keep runs flowing.",
153661
+ "",
153662
+ `[Manage billing \u2192](${billingConsoleUrl(owner, "billing")})`
153663
+ ].join("\n");
153664
+ }
153665
+ function formatTransientErrorSummary(error49, owner) {
153666
+ return [
153667
+ "**Pullfrog billing is temporarily unavailable.**",
153668
+ "",
153669
+ error49.message,
153670
+ "",
153671
+ `Usually transient \u2014 the next dispatch should succeed. If it persists, check [status.pullfrog.com](https://status.pullfrog.com) or [your console](${billingConsoleUrl(owner, "billing")}).`
153672
+ ].join("\n");
153673
+ }
153349
153674
  async function mintProxyKey(ctx) {
153350
153675
  try {
153351
153676
  process.env.ACTIONS_ID_TOKEN_REQUEST_URL = ctx.oidcCredentials.requestUrl;
@@ -153358,6 +153683,20 @@ async function mintProxyKey(ctx) {
153358
153683
  method: "POST",
153359
153684
  headers: { Authorization: `Bearer ${oidcToken}` }
153360
153685
  });
153686
+ if (response.status === 402) {
153687
+ const body = await response.json().catch(() => null);
153688
+ throw new BillingError(body?.error ?? "insufficient balance", {
153689
+ code: body?.code ?? null,
153690
+ declineCode: body?.declineCode ?? null,
153691
+ needsReauthentication: body?.needsReauthentication ?? false
153692
+ });
153693
+ }
153694
+ if (response.status === 503) {
153695
+ const body = await response.json().catch(() => null);
153696
+ throw new TransientError(
153697
+ body?.error ?? "billing service temporarily unavailable \u2014 retry shortly"
153698
+ );
153699
+ }
153361
153700
  if (!response.ok) {
153362
153701
  log.warning(`proxy key mint failed (${response.status})`);
153363
153702
  return null;
@@ -153365,6 +153704,8 @@ async function mintProxyKey(ctx) {
153365
153704
  const data = await response.json();
153366
153705
  return data.key;
153367
153706
  } catch (error49) {
153707
+ if (error49 instanceof BillingError) throw error49;
153708
+ if (error49 instanceof TransientError) throw error49;
153368
153709
  log.warning(`proxy key mint error: ${error49 instanceof Error ? error49.message : String(error49)}`);
153369
153710
  return null;
153370
153711
  } finally {
@@ -153374,19 +153715,56 @@ async function mintProxyKey(ctx) {
153374
153715
  }
153375
153716
  async function resolveProxyModel(ctx) {
153376
153717
  if (process.env.PULLFROG_MODEL?.trim()) return;
153377
- if (ctx.oss && ctx.proxyModel) {
153378
- if (!ctx.oidcCredentials) {
153379
- log.warning("\xBB oss repo but no OIDC credentials available \u2014 skipping proxy");
153380
- return;
153381
- }
153382
- const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials });
153383
- if (!key) return;
153384
- process.env.OPENROUTER_API_KEY = key;
153385
- core6.setSecret(key);
153386
- ctx.payload.proxyModel = ctx.proxyModel;
153387
- log.info(`\xBB proxy: oss \u2192 ${ctx.proxyModel}`);
153718
+ const needsProxy = isInfraCovered({ isOss: ctx.oss, plan: ctx.plan }) && ctx.proxyModel;
153719
+ if (!needsProxy) return;
153720
+ if (!ctx.oidcCredentials) {
153721
+ log.warning("\xBB proxy requested but no OIDC credentials available \u2014 skipping");
153722
+ return;
153723
+ }
153724
+ const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials });
153725
+ if (!key) return;
153726
+ process.env.OPENROUTER_API_KEY = key;
153727
+ core6.setSecret(key);
153728
+ ctx.payload.proxyModel = ctx.proxyModel;
153729
+ const label = ctx.oss ? "oss" : "router";
153730
+ log.info(`\xBB proxy: ${label} \u2192 ${ctx.proxyModel}`);
153731
+ }
153732
+ async function fetchPreviousSnapshot(ctx, prNumber) {
153733
+ if (!ctx.githubInstallationToken) return null;
153734
+ try {
153735
+ const response = await apiFetch({
153736
+ path: `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/pr/${prNumber}/summary-comment`,
153737
+ method: "GET",
153738
+ headers: { authorization: `Bearer ${ctx.githubInstallationToken}` },
153739
+ signal: AbortSignal.timeout(1e4)
153740
+ });
153741
+ if (!response.ok) return null;
153742
+ const data = await response.json();
153743
+ return typeof data.snapshot === "string" && data.snapshot.length > 0 ? data.snapshot : null;
153744
+ } catch {
153745
+ return null;
153746
+ }
153747
+ }
153748
+ async function persistSummary(ctx) {
153749
+ const filePath = ctx.toolState.summaryFilePath;
153750
+ if (!filePath) return;
153751
+ if (ctx.toolState.summaryPersistAttempted) return;
153752
+ ctx.toolState.summaryPersistAttempted = true;
153753
+ const snapshot2 = await readSummaryFile(filePath);
153754
+ if (!snapshot2) {
153755
+ log.debug(`pr summary tmpfile missing or invalid at ${filePath} \u2014 skipping persist`);
153756
+ return;
153757
+ }
153758
+ const seed = ctx.toolState.summarySeed?.trim();
153759
+ if (seed !== void 0 && snapshot2 === seed) {
153760
+ log.warning(
153761
+ "\xBB pr summary tmpfile unchanged from seed \u2014 skipping persist (agent did not edit it)"
153762
+ );
153388
153763
  return;
153389
153764
  }
153765
+ await patchWorkflowRunFields(ctx, { summarySnapshot: snapshot2 }).catch((err) => {
153766
+ log.debug(`pr summary persist failed: ${err instanceof Error ? err.message : String(err)}`);
153767
+ });
153390
153768
  }
153391
153769
  async function writeJobSummary(toolState) {
153392
153770
  const usageSummary = formatUsageSummary(toolState.usageEntries);
@@ -153408,7 +153786,7 @@ async function main() {
153408
153786
  let safetyNetTimer;
153409
153787
  const resolvedPromptInput = resolvePromptInput();
153410
153788
  const toolState = initToolState({
153411
- progressCommentId: typeof resolvedPromptInput !== "string" ? resolvedPromptInput.progressCommentId : void 0
153789
+ progressComment: typeof resolvedPromptInput !== "string" ? resolvedPromptInput.progressComment : void 0
153412
153790
  });
153413
153791
  resolveGit();
153414
153792
  const jobToken = getJobToken();
@@ -153442,12 +153820,33 @@ async function main() {
153442
153820
  delete process.env.ACTIONS_ID_TOKEN_REQUEST_URL;
153443
153821
  delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
153444
153822
  }
153445
- await resolveProxyModel({
153446
- payload,
153447
- oss: runContext.oss,
153448
- proxyModel: runContext.proxyModel,
153449
- oidcCredentials
153450
- });
153823
+ try {
153824
+ await resolveProxyModel({
153825
+ payload,
153826
+ oss: runContext.oss,
153827
+ plan: runContext.plan,
153828
+ proxyModel: runContext.proxyModel,
153829
+ oidcCredentials
153830
+ });
153831
+ } catch (error49) {
153832
+ if (error49 instanceof BillingError) {
153833
+ const summary2 = formatBillingErrorSummary(error49, runContext.repo.owner);
153834
+ await writeSummary(summary2).catch(() => {
153835
+ });
153836
+ await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
153837
+ });
153838
+ throw error49;
153839
+ }
153840
+ if (error49 instanceof TransientError) {
153841
+ const summary2 = formatTransientErrorSummary(error49, runContext.repo.owner);
153842
+ await writeSummary(summary2).catch(() => {
153843
+ });
153844
+ await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
153845
+ });
153846
+ throw error49;
153847
+ }
153848
+ throw error49;
153849
+ }
153451
153850
  const octokit = createOctokit(tokenRef.mcpToken);
153452
153851
  const runInfo = await resolveRun({ octokit });
153453
153852
  let toolContext;
@@ -153521,12 +153920,28 @@ async function main() {
153521
153920
  jobId: runInfo.jobId,
153522
153921
  mcpServerUrl: "",
153523
153922
  tmpdir: tmpdir3,
153923
+ oss: runContext.oss,
153924
+ plan: runContext.plan,
153524
153925
  resolvedModel
153525
153926
  };
153526
153927
  const mcpHttpServer = __using(_stack, await startMcpHttpServer(toolContext, { outputSchema }), true);
153527
153928
  toolContext.mcpServerUrl = mcpHttpServer.url;
153528
153929
  log.info(`\xBB MCP server started at ${mcpHttpServer.url}`);
153529
153930
  timer.checkpoint("mcpServer");
153931
+ if (payload.generateSummary && payload.event.is_pr && payload.event.issue_number) {
153932
+ const previousSnapshot = await fetchPreviousSnapshot(toolContext, payload.event.issue_number);
153933
+ const filePath = await seedSummaryFile({ tmpdir: tmpdir3, previousSnapshot });
153934
+ toolState.summaryFilePath = filePath;
153935
+ try {
153936
+ toolState.summarySeed = await readFile3(filePath, "utf8");
153937
+ } catch {
153938
+ }
153939
+ log.info(
153940
+ `\xBB summary snapshot seeded at ${filePath} (previous=${previousSnapshot ? "yes" : "no"})`
153941
+ );
153942
+ const ctxForExit = toolContext;
153943
+ onExitSignal(() => persistSummary(ctxForExit));
153944
+ }
153530
153945
  startInstallation(toolContext);
153531
153946
  const modelForLog = resolveModelForLog({ payload, resolvedModel });
153532
153947
  const agentForLog = resolveAgentForLog({ agentName: agent2.name, resolvedModel });
@@ -153558,7 +153973,7 @@ ${instructions.user}` : null,
153558
153973
  log.info(instructions.full);
153559
153974
  });
153560
153975
  if (agentId === "opencode") {
153561
- const pluginDir = join15(process.cwd(), ".opencode", "plugin");
153976
+ const pluginDir = join16(process.cwd(), ".opencode", "plugin");
153562
153977
  const hasPlugins = existsSync7(pluginDir) && readdirSync(pluginDir).some((f) => /\.[jt]sx?$/.test(f));
153563
153978
  if (hasPlugins && toolState.dependencyInstallation?.promise) {
153564
153979
  log.info(
@@ -153584,6 +153999,9 @@ ${instructions.user}` : null,
153584
153999
  }
153585
154000
  });
153586
154001
  toolState.todoTracker = todoTracker;
154002
+ onExitSignal(() => {
154003
+ todoTracker?.cancel();
154004
+ });
153587
154005
  let innerTimeoutFired = false;
153588
154006
  const onInnerActivityTimeout = () => {
153589
154007
  if (innerTimeoutFired) return;
@@ -153614,6 +154032,8 @@ ${instructions.user}` : null,
153614
154032
  instructions,
153615
154033
  todoTracker,
153616
154034
  stopScript: runContext.repoSettings.stopScript,
154035
+ summaryFilePath: toolState.summaryFilePath,
154036
+ summarySeed: toolState.summarySeed,
153617
154037
  onActivityTimeout: onInnerActivityTimeout,
153618
154038
  onToolUse: (event) => {
153619
154039
  const wasTracked = recordDiffReadFromToolUse({
@@ -153668,8 +154088,10 @@ ${instructions.user}` : null,
153668
154088
  log.debug(`post-review cleanup failed: ${error49}`);
153669
154089
  });
153670
154090
  }
153671
- const trackerWasLastWriter = todoTracker?.hasPublished && !toolState.finalSummaryWritten;
153672
- if (toolContext && toolState.progressCommentId && (!toolState.wasUpdated || trackerWasLastWriter)) {
154091
+ if (toolContext) {
154092
+ await persistSummary(toolContext);
154093
+ }
154094
+ if (toolContext && toolState.progressComment && !toolState.finalSummaryWritten) {
153673
154095
  await deleteProgressComment(toolContext).catch((error49) => {
153674
154096
  log.debug(`stranded progress comment cleanup failed: ${error49}`);
153675
154097
  });
@@ -153716,6 +154138,9 @@ ${errorMessage}
153716
154138
  log.debug(`post-review cleanup failed: ${error50}`);
153717
154139
  });
153718
154140
  }
154141
+ if (toolContext) {
154142
+ await persistSummary(toolContext);
154143
+ }
153719
154144
  return {
153720
154145
  success: false,
153721
154146
  error: errorMessage
@@ -153747,135 +154172,8 @@ ${errorMessage}
153747
154172
  }
153748
154173
  }
153749
154174
 
153750
- // utils/postCleanup.ts
153751
- var SHOULD_CHECK_REASON = true;
153752
- function buildErrorCommentBody(ctx, isCancellation) {
153753
- let errorMessage = isCancellation ? `This run was cancelled \u{1F6D1}
153754
-
153755
- The workflow was cancelled before completion.` : `This run croaked \u{1F635}
153756
-
153757
- The workflow encountered an error before any progress could be reported.`;
153758
- if (ctx.runId) {
153759
- errorMessage += " Please check the link below for details.";
153760
- }
153761
- const customParts = [];
153762
- if (!isCancellation && ctx.runId) {
153763
- const apiUrl = getApiUrl();
153764
- customParts.push(
153765
- `[Rerun failed job \u2794](${apiUrl}/trigger/${ctx.repoContext.owner}/${ctx.repoContext.name}/${ctx.runId}?action=rerun)`
153766
- );
153767
- }
153768
- const footer = buildPullfrogFooter({
153769
- triggeredBy: true,
153770
- workflowRun: ctx.runId ? {
153771
- owner: ctx.repoContext.owner,
153772
- repo: ctx.repoContext.name,
153773
- runId: ctx.runId
153774
- } : void 0,
153775
- customParts
153776
- });
153777
- return `${errorMessage}${footer}`;
153778
- }
153779
- async function validateStuckProgressComment(ctx) {
153780
- if (!ctx.promptInput?.progressCommentId) {
153781
- log.info("[post] no progressCommentId in prompt input, skipping cleanup");
153782
- return null;
153783
- }
153784
- const commentId = parseInt(ctx.promptInput.progressCommentId, 10);
153785
- log.info(`[post] validating progressCommentId from prompt input: ${commentId}`);
153786
- try {
153787
- const commentResult = await ctx.octokit.rest.issues.getComment({
153788
- owner: ctx.repoContext.owner,
153789
- repo: ctx.repoContext.name,
153790
- comment_id: commentId
153791
- });
153792
- const body = commentResult.data.body ?? "";
153793
- if (isLeapingIntoActionCommentBody(body)) {
153794
- log.info(`[post] comment ${commentId} is stuck on "Leaping into action"`);
153795
- return commentId;
153796
- }
153797
- if (/^- \[[ x]\] |^- \*\*→\*\* |^- ~~/.test(body)) {
153798
- log.info(`[post] comment ${commentId} is stuck on a todo checklist`);
153799
- return commentId;
153800
- }
153801
- log.info(`[post] comment ${commentId} is not stuck (already updated or different content)`);
153802
- return null;
153803
- } catch (error49) {
153804
- const errorMessage = error49 instanceof Error ? error49.message : String(error49);
153805
- log.info(`[post] failed to get comment ${commentId}: ${errorMessage}`);
153806
- return null;
153807
- }
153808
- }
153809
- async function getIsCancelled(ctx) {
153810
- if (!ctx.runId) return false;
153811
- try {
153812
- const jobsResult = await ctx.octokit.rest.actions.listJobsForWorkflowRun({
153813
- owner: ctx.repoContext.owner,
153814
- repo: ctx.repoContext.name,
153815
- run_id: ctx.runId
153816
- });
153817
- const currentJobName = process.env.GITHUB_JOB;
153818
- const currentJob = currentJobName ? jobsResult.data.jobs.find(
153819
- (j2) => j2.name === currentJobName || j2.name.startsWith(`${currentJobName} (`)
153820
- ) : jobsResult.data.jobs[0];
153821
- if (!currentJob) {
153822
- log.warning("[post] could not find current job");
153823
- return false;
153824
- }
153825
- log.info(`[post] job status: ${currentJob.status}, conclusion: ${currentJob.conclusion}`);
153826
- if (currentJob.conclusion === "cancelled") return true;
153827
- const cancelledStep = currentJob.steps?.find((step) => step.conclusion === "cancelled");
153828
- if (cancelledStep) {
153829
- log.info(`[post] found cancelled step: ${cancelledStep.name}`);
153830
- return true;
153831
- }
153832
- log.info("[post] no cancellation found, assuming failure");
153833
- } catch (error49) {
153834
- log.info(
153835
- `[post] failed to get job status: ${error49 instanceof Error ? error49.message : String(error49)}`
153836
- );
153837
- }
153838
- return false;
153839
- }
153840
- async function runPostCleanup() {
153841
- log.info("\xBB [post] starting post cleanup");
153842
- const runId = process.env.GITHUB_RUN_ID ? Number.parseInt(process.env.GITHUB_RUN_ID, 10) : void 0;
153843
- let promptInput = null;
153844
- try {
153845
- const resolved = resolvePromptInput();
153846
- if (typeof resolved !== "string") promptInput = resolved;
153847
- } catch (error49) {
153848
- log.info(
153849
- `[post] failed to resolve prompt input: ${error49 instanceof Error ? error49.message : String(error49)}`
153850
- );
153851
- }
153852
- const token = getJobToken();
153853
- const repoContext = parseRepoContext();
153854
- const octokit = createOctokit(token);
153855
- const ctx = { repoContext, octokit, runId, promptInput };
153856
- const commentId = await validateStuckProgressComment(ctx);
153857
- if (!commentId) return log.info("\xBB [post] no stuck progress comment to update, skipping cleanup");
153858
- log.info(`\xBB [post] validated stuck comment: ${commentId}, updating with error message`);
153859
- try {
153860
- const body = buildErrorCommentBody(
153861
- ctx,
153862
- SHOULD_CHECK_REASON ? await getIsCancelled(ctx) : false
153863
- );
153864
- await ctx.octokit.rest.issues.updateComment({
153865
- owner: ctx.repoContext.owner,
153866
- repo: ctx.repoContext.name,
153867
- comment_id: commentId,
153868
- body
153869
- });
153870
- log.info("\xBB [post] successfully updated progress comment");
153871
- } catch (error49) {
153872
- const errorMessage = error49 instanceof Error ? error49.message : String(error49);
153873
- log.info(`[post] failed to update comment: ${errorMessage}`);
153874
- }
153875
- }
153876
-
153877
154175
  // commands/gha.ts
153878
- process.env.PATH = `${dirname4(process.execPath)}:${process.env.PATH}`;
154176
+ process.env.PATH = `${dirname5(process.execPath)}:${process.env.PATH}`;
153879
154177
  var STATE_TOKEN = "token";
153880
154178
  async function runMain() {
153881
154179
  try {
@@ -153888,15 +154186,6 @@ async function runMain() {
153888
154186
  core7.setFailed(`action failed: ${errorMessage}`);
153889
154187
  }
153890
154188
  }
153891
- async function runPost() {
153892
- log.debug(`[post] script started at ${(/* @__PURE__ */ new Date()).toISOString()}`);
153893
- try {
153894
- await runPostCleanup();
153895
- } catch (error49) {
153896
- const message = error49 instanceof Error ? error49.message : String(error49);
153897
- log.error(`[post] unexpected error: ${message}`);
153898
- }
153899
- }
153900
154189
  async function tokenMain() {
153901
154190
  const reposInput = core7.getInput("repos");
153902
154191
  const additionalRepos = reposInput ? reposInput.split(",").map((r) => r.trim()).filter(Boolean) : [];
@@ -153917,7 +154206,7 @@ async function tokenPost() {
153917
154206
  core7.info("\xBB installation token revoked");
153918
154207
  }
153919
154208
  function printGhaUsage(params) {
153920
- params.stream(`usage: ${params.prog} gha [token] [--post]
154209
+ params.stream(`usage: ${params.prog} gha [subcommand]
153921
154210
  `);
153922
154211
  params.stream("run the github action runtime flow.");
153923
154212
  params.stream("");
@@ -153926,9 +154215,29 @@ function printGhaUsage(params) {
153926
154215
  params.stream("");
153927
154216
  params.stream("options:");
153928
154217
  params.stream(" -h, --help show help");
153929
- params.stream(" --post run post-cleanup flow");
154218
+ }
154219
+ function printGhaTokenUsage(params) {
154220
+ params.stream(`usage: ${params.prog} gha token [--post]
154221
+ `);
154222
+ params.stream("acquire a github app installation token, or revoke it in the post step.");
154223
+ params.stream("");
154224
+ params.stream("options:");
154225
+ params.stream(" -h, --help show help");
154226
+ params.stream(" --post revoke the previously-acquired token (post-step usage only)");
153930
154227
  }
153931
154228
  function parseGhaArgs(args2) {
154229
+ return (0, import_arg.default)(
154230
+ {
154231
+ "--help": Boolean,
154232
+ "-h": "--help"
154233
+ },
154234
+ {
154235
+ argv: args2,
154236
+ stopAtPositional: true
154237
+ }
154238
+ );
154239
+ }
154240
+ function parseGhaTokenArgs(args2) {
153932
154241
  return (0, import_arg.default)(
153933
154242
  {
153934
154243
  "--help": Boolean,
@@ -153959,23 +154268,40 @@ async function runCli(params) {
153959
154268
  printGhaUsage({ stream: console.log, prog: params.prog });
153960
154269
  return;
153961
154270
  }
153962
- const normalizedArgs = ["gha"];
153963
154271
  const positional = parsed2._;
153964
- if (positional.length > 1) {
153965
- console.error(`unexpected positional arguments for gha: ${positional.slice(1).join(" ")}
154272
+ const subcommand = positional[0];
154273
+ if (!subcommand) {
154274
+ await run(["gha"]);
154275
+ return;
154276
+ }
154277
+ if (subcommand !== "token") {
154278
+ console.error(`unknown gha subcommand: ${subcommand}
153966
154279
  `);
153967
154280
  printGhaUsage({ stream: console.error, prog: params.prog });
153968
154281
  process.exit(1);
153969
154282
  }
153970
- if (positional[0] === "token") {
153971
- normalizedArgs.push("token");
153972
- } else if (positional[0]) {
153973
- console.error(`unknown gha subcommand: ${positional[0]}
154283
+ let tokenParsed;
154284
+ try {
154285
+ tokenParsed = parseGhaTokenArgs(positional.slice(1));
154286
+ } catch (error49) {
154287
+ const message = error49 instanceof Error ? error49.message : String(error49);
154288
+ console.error(`${message}
153974
154289
  `);
153975
- printGhaUsage({ stream: console.error, prog: params.prog });
154290
+ printGhaTokenUsage({ stream: console.error, prog: params.prog });
154291
+ process.exit(1);
154292
+ }
154293
+ if (tokenParsed["--help"]) {
154294
+ printGhaTokenUsage({ stream: console.log, prog: params.prog });
154295
+ return;
154296
+ }
154297
+ if (tokenParsed._.length > 0) {
154298
+ console.error(`unexpected positional arguments for gha token: ${tokenParsed._.join(" ")}
154299
+ `);
154300
+ printGhaTokenUsage({ stream: console.error, prog: params.prog });
153976
154301
  process.exit(1);
153977
154302
  }
153978
- if (parsed2["--post"]) {
154303
+ const normalizedArgs = ["gha", "token"];
154304
+ if (tokenParsed["--post"]) {
153979
154305
  normalizedArgs.push("--post");
153980
154306
  }
153981
154307
  await run(normalizedArgs);
@@ -153988,8 +154314,6 @@ async function run(args2) {
153988
154314
  } else {
153989
154315
  await tokenMain();
153990
154316
  }
153991
- } else if (args2.includes("--post")) {
153992
- await runPost();
153993
154317
  } else {
153994
154318
  await runMain();
153995
154319
  }
@@ -155659,7 +155983,7 @@ async function run2() {
155659
155983
  }
155660
155984
 
155661
155985
  // cli.ts
155662
- var VERSION10 = "0.0.203";
155986
+ var VERSION10 = "0.0.205";
155663
155987
  var bin = basename2(process.argv[1] || "");
155664
155988
  var PROG = bin === "pf" || bin === "pullfrog" ? bin : "pullfrog";
155665
155989
  var rawArgs = process.argv.slice(2);