pullfrog 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -107423,7 +107423,7 @@ function buildCommitPrompt(status) {
107423
107423
  ].join("\n");
107424
107424
  }
107425
107425
  function hasPostRunIssues(issues) {
107426
- return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0;
107426
+ return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0 || issues.unsubmittedReview !== void 0;
107427
107427
  }
107428
107428
  var agent = (input) => {
107429
107429
  return {
@@ -109191,13 +109191,38 @@ var ReplyToReviewComment = type({
109191
109191
  "extremely brief reply (1 sentence max) explaining what was fixed, e.g. 'Fixed by renaming to X' or 'Added null check'"
109192
109192
  )
109193
109193
  });
109194
+ function duplicateReplyDecision(params) {
109195
+ const existing = params.existing;
109196
+ if (!existing) return null;
109197
+ if (existing.bodyWithFooter !== params.bodyWithFooter) return null;
109198
+ return {
109199
+ kind: "already-replied",
109200
+ commentId: existing.commentId,
109201
+ url: existing.url,
109202
+ reason: `reply ${existing.commentId} with identical body was already posted in this session; ignoring duplicate call`
109203
+ };
109204
+ }
109194
109205
  function ReplyToReviewCommentTool(ctx) {
109195
109206
  return tool({
109196
109207
  name: "reply_to_review_comment",
109197
- description: "Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Call this for EACH comment you address in AddressReviews mode. Keep replies extremely brief (1 sentence max).",
109208
+ description: "Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Call exactly ONCE per parent comment you address in AddressReviews mode \u2014 duplicate calls with the same body are a no-op. Keep replies extremely brief (1 sentence max).",
109198
109209
  parameters: ReplyToReviewComment,
109199
109210
  execute: execute(async ({ pull_number, comment_id, body }) => {
109200
109211
  const bodyWithFooter = addFooter(ctx, body);
109212
+ const dup = duplicateReplyDecision({
109213
+ existing: ctx.toolState.reviewReplies?.get(comment_id),
109214
+ bodyWithFooter
109215
+ });
109216
+ if (dup) {
109217
+ log.info(`skipping duplicate review reply: ${dup.reason}`);
109218
+ return {
109219
+ success: true,
109220
+ skipped: true,
109221
+ reason: dup.reason,
109222
+ commentId: dup.commentId,
109223
+ url: dup.url
109224
+ };
109225
+ }
109201
109226
  const result = await ctx.octokit.rest.pulls.createReplyForReviewComment({
109202
109227
  owner: ctx.repo.owner,
109203
109228
  repo: ctx.repo.name,
@@ -109207,6 +109232,12 @@ function ReplyToReviewCommentTool(ctx) {
109207
109232
  });
109208
109233
  log.info(`\xBB created review comment ${result.data.id} (in reply to ${comment_id})`);
109209
109234
  ctx.toolState.wasUpdated = true;
109235
+ ctx.toolState.reviewReplies ??= /* @__PURE__ */ new Map();
109236
+ ctx.toolState.reviewReplies.set(comment_id, {
109237
+ commentId: result.data.id,
109238
+ url: result.data.html_url,
109239
+ bodyWithFooter
109240
+ });
109210
109241
  return {
109211
109242
  success: true,
109212
109243
  commentId: result.data.id,
@@ -109953,13 +109984,13 @@ var installNodeDependencies = {
109953
109984
  };
109954
109985
  }
109955
109986
  }
109956
- const resolved = resolveCommand(agent2, "frozen", []) || resolveCommand(agent2, "install", []);
109987
+ const resolved = resolveCommand(agent2, "frozen", []);
109957
109988
  if (!resolved) {
109958
109989
  return {
109959
109990
  language: "node",
109960
109991
  packageManager,
109961
109992
  dependenciesInstalled: false,
109962
- issues: [`no install command found for ${agent2}`]
109993
+ issues: [`no frozen-install command available for ${agent2}`]
109963
109994
  };
109964
109995
  }
109965
109996
  if (options.ignoreScripts) {
@@ -142275,7 +142306,7 @@ var import_semver = __toESM(require_semver2(), 1);
142275
142306
  // package.json
142276
142307
  var package_default = {
142277
142308
  name: "pullfrog",
142278
- version: "0.1.2",
142309
+ version: "0.1.3",
142279
142310
  type: "module",
142280
142311
  bin: {
142281
142312
  pullfrog: "dist/cli.mjs",
@@ -143552,7 +143583,7 @@ var CreatePullRequestReview = type({
143552
143583
  "1-2 sentence high-level summary with urgency level, critical callouts, and feedback about code outside the diff. Specific feedback on diff lines goes in 'comments' array."
143553
143584
  ).optional(),
143554
143585
  approved: type.boolean.describe(
143555
- "Set to true to submit as an approval. ONLY when the review contains no actionable feedback \u2014 neither inline comments nor actionable content in the body. Defaults to false (comment-only review). Rejections are not supported."
143586
+ "Set to true to submit as an approval. Use for both 'no issues found' and informational `> [!NOTE]` reviews where the PR is mergeable as-is and nothing in the body warrants code changes \u2014 approving also suppresses the Fix-button footer affordance so users don't dispatch a fix run on non-actionable feedback. Reserve approved: false for `> [!IMPORTANT]` (recommended changes) and `> [!CAUTION]` (critical) reviews. Defaults to false (comment-only review). Rejections are not supported."
143556
143587
  ).optional(),
143557
143588
  commit_id: type.string.describe("Optional SHA of the commit being reviewed. Defaults to latest.").optional(),
143558
143589
  comments: type({
@@ -144082,6 +144113,8 @@ async function ensureBeforeShaReachable(params) {
144082
144113
  }
144083
144114
  }
144084
144115
  var STALE_LOCK_AGE_MS = 3e4;
144116
+ var PULL_REF_RETRY_DELAYS_MS = [2e3, 5e3, 1e4];
144117
+ var PULL_REF_MISSING_PATTERN = /couldn't find remote ref pull\/\d+\/head/i;
144085
144118
  var GIT_LOCK_PATHS = [
144086
144119
  ".git/shallow.lock",
144087
144120
  ".git/index.lock",
@@ -144107,6 +144140,27 @@ function cleanupStaleGitLocks() {
144107
144140
  }
144108
144141
  }
144109
144142
  }
144143
+ async function isPullRequestStillDispatchable(args2) {
144144
+ try {
144145
+ const { data } = await args2.octokit.rest.pulls.get({
144146
+ owner: args2.owner,
144147
+ repo: args2.repo,
144148
+ pull_number: args2.pr.number
144149
+ });
144150
+ if (data.state !== "open") return false;
144151
+ if (data.head.sha !== args2.pr.headSha) return false;
144152
+ return true;
144153
+ } catch {
144154
+ return true;
144155
+ }
144156
+ }
144157
+ async function abortIfPullRequestMoved(args2) {
144158
+ const stillValid = await isPullRequestStillDispatchable(args2);
144159
+ if (stillValid) return;
144160
+ throw new Error(
144161
+ `PR #${args2.pr.number} is no longer in the state it was at dispatch (likely closed, merged, or force-pushed between webhook fire and run start). aborting checkout \u2014 re-trigger the run if this PR is still active.`
144162
+ );
144163
+ }
144110
144164
  async function checkoutPrBranch(pr, params) {
144111
144165
  const { octokit, owner, name, gitToken, toolState, beforeSha } = params;
144112
144166
  log.info(`\xBB checking out PR #${pr.number}...`);
@@ -144123,9 +144177,26 @@ async function checkoutPrBranch(pr, params) {
144123
144177
  if (!alreadyOnBranch) {
144124
144178
  $("git", ["checkout", "-B", pr.baseRef, `origin/${pr.baseRef}`], { log: false });
144125
144179
  log.debug(`\xBB fetching PR #${pr.number} (${localBranch})...`);
144126
- await $git("fetch", ["--no-tags", "origin", `+pull/${pr.number}/head:${localBranch}`], {
144127
- token: gitToken
144128
- });
144180
+ await retry(
144181
+ async () => {
144182
+ try {
144183
+ await $git("fetch", ["--no-tags", "origin", `+pull/${pr.number}/head:${localBranch}`], {
144184
+ token: gitToken
144185
+ });
144186
+ } catch (e) {
144187
+ const msg = e instanceof Error ? e.message : String(e);
144188
+ if (PULL_REF_MISSING_PATTERN.test(msg)) {
144189
+ await abortIfPullRequestMoved({ octokit, owner, repo: name, pr });
144190
+ }
144191
+ throw e;
144192
+ }
144193
+ },
144194
+ {
144195
+ delaysMs: PULL_REF_RETRY_DELAYS_MS,
144196
+ label: `pull/${pr.number}/head fetch`,
144197
+ shouldRetry: (e) => PULL_REF_MISSING_PATTERN.test(e instanceof Error ? e.message : String(e))
144198
+ }
144199
+ );
144129
144200
  $("git", ["checkout", localBranch], { log: false });
144130
144201
  log.debug(`\xBB checked out PR #${pr.number}`);
144131
144202
  toolState.checkoutSha = $("git", ["rev-parse", "HEAD"], { log: false }).trim();
@@ -145502,13 +145573,14 @@ function buildModeOverrides(t) {
145502
145573
 
145503
145574
  An existing plan comment was found for this issue. Update that comment with the revised plan \u2014 do not create a new plan comment.
145504
145575
 
145505
- 1. Use \`previousPlanBody\` from this response as the plan to revise; do not call \`get_issue\` or \`get_issue_comments\`.
145506
- 2. Revise the plan based on the user's request:
145576
+ 1. **task list**: create your task list for this run as your first action.
145577
+ 2. Use \`previousPlanBody\` from this response as the plan to revise; do not call \`get_issue\` or \`get_issue_comments\`.
145578
+ 3. Revise the plan based on the user's request:
145507
145579
  - incorporate the current plan (\`previousPlanBody\`) and the user's revision request
145508
145580
  - gather relevant codebase context (file paths, architecture notes from AGENTS.md)
145509
145581
  - produce a structured plan with clear milestones
145510
- 3. Call \`${t("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
145511
- 4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t("report_progress")}\` so it is not left as "Leaping...".`
145582
+ 4. Call \`${t("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
145583
+ 5. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t("report_progress")}\` so it is not left as "Leaping...".`
145512
145584
  };
145513
145585
  }
145514
145586
  var modeInstructionParent = {
@@ -145940,18 +146012,6 @@ function UploadFileTool(ctx) {
145940
146012
  }
145941
146013
 
145942
146014
  // mcp/server.ts
145943
- function initToolState(params) {
145944
- const resolved = parseProgressComment(params.progressComment);
145945
- if (resolved) {
145946
- log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
145947
- }
145948
- return {
145949
- progressComment: resolved,
145950
- hadProgressComment: !!resolved,
145951
- backgroundProcesses: /* @__PURE__ */ new Map(),
145952
- usageEntries: []
145953
- };
145954
- }
145955
146015
  var mcpPortStart = 3764;
145956
146016
  var mcpPortAttempts = 100;
145957
146017
  var mcpHost = "127.0.0.1";
@@ -146192,18 +146252,20 @@ function computeModes(agentId) {
146192
146252
  description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
146193
146253
  prompt: `### Checklist
146194
146254
 
146195
- 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
146255
+ 1. **task list**: create your task list for this run as your first action.
146256
+
146257
+ 2. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
146196
146258
 
146197
- 2. **setup**: checkout or create the branch:
146259
+ 3. **setup**: checkout or create the branch:
146198
146260
  - **PR event, modifying the existing PR**: call \`${t("checkout_pr")}\`
146199
146261
  - **new branch**: use \`${t("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
146200
146262
 
146201
- 3. **build**: implement changes using your native file and shell tools:
146263
+ 4. **build**: implement changes using your native file and shell tools:
146202
146264
  - follow the plan (if you ran a plan phase)
146203
146265
  - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
146204
146266
  - run relevant tests/lints before committing
146205
146267
 
146206
- 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
146268
+ 5. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
146207
146269
 
146208
146270
  Skip self-review (commit directly) when the diff is **genuinely trivial**:
146209
146271
  - doc typos, comment-only edits, whitespace/format-only, import reordering
@@ -146234,7 +146296,7 @@ function computeModes(agentId) {
146234
146296
 
146235
146297
  Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
146236
146298
 
146237
- 5. **finalize**:
146299
+ 6. **finalize**:
146238
146300
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
146239
146301
  - create a PR via \`${t("create_pull_request")}\`
146240
146302
  - call \`${t("report_progress")}\` with the PR link or the exact error if push/PR failed
@@ -146248,23 +146310,25 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146248
146310
  description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
146249
146311
  prompt: `### Checklist
146250
146312
 
146251
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
146313
+ 1. **task list**: create your task list for this run as your first action.
146252
146314
 
146253
- 2. Fetch review comments via \`${t("get_review_comments")}\`.
146315
+ 2. Checkout the PR branch via \`${t("checkout_pr")}\`.
146254
146316
 
146255
- 3. For each comment:
146317
+ 3. Fetch review comments via \`${t("get_review_comments")}\`.
146318
+
146319
+ 4. For each comment:
146256
146320
  - understand the feedback
146257
146321
  - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
146258
146322
  - if the request stands, make the code change using your native tools; otherwise reply explaining why
146259
146323
  - record what was done (or why nothing was done)
146260
146324
 
146261
- 4. Quality check:
146325
+ 5. Quality check:
146262
146326
  - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
146263
146327
  - commit locally via shell (\`git add . && git commit -m "..."\`)
146264
146328
 
146265
- 5. Finalize:
146329
+ 6. Finalize:
146266
146330
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146267
- - reply to each comment using \`${t("reply_to_review_comment")}\`
146331
+ - reply to each comment **exactly once** using \`${t("reply_to_review_comment")}\` \u2014 do not re-emit the same call (the runtime dedupes identical bodies and the second call is wasted)
146268
146332
  - resolve addressed threads via \`${t("resolve_review_thread")}\`
146269
146333
  - call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)`
146270
146334
  },
@@ -146285,11 +146349,13 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146285
146349
  description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
146286
146350
  prompt: `### Checklist
146287
146351
 
146288
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
146352
+ 1. **task list**: create your task list for this run as your first action.
146353
+
146354
+ 2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
146289
146355
 
146290
- 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
146356
+ 3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
146291
146357
 
146292
- if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
146358
+ if the PR is **genuinely trivial**, skip steps 4\u20135 entirely and submit a \`No new issues found.\` review per step 6. there's no value in dispatching even one lens for a typo.
146293
146359
 
146294
146360
  "Genuinely trivial" (skip):
146295
146361
  - single-word doc typo, whitespace/format-only, comment-only across any number of files
@@ -146334,7 +146400,7 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146334
146400
  - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
146335
146401
  - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
146336
146402
 
146337
- 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
146403
+ 4. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
146338
146404
  - the diff path / target \u2014 reading the diff and the codebase is its job
146339
146405
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146340
146406
  - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
@@ -146349,20 +146415,33 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
146349
146415
  - do NOT pre-shape their output with a finding schema
146350
146416
  - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
146351
146417
 
146352
- 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
146418
+ 5. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
146353
146419
 
146354
146420
  for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
146355
146421
 
146356
- 5. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
146422
+ 6. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
146357
146423
 
146358
146424
  note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146359
146425
 
146360
146426
  The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
146361
146427
 
146362
- - **critical issues** (blocks merge \u2014 bugs, security, data loss):
146428
+ GitHub alert blockquotes render at four visual intensities \u2014 the callout is what the author sees first, so pick the one that matches what you want them to do:
146429
+
146430
+ - \`[!CAUTION]\` \u2014 large red banner. Reads as "this will break something."
146431
+ - \`[!IMPORTANT]\` \u2014 large purple banner. Reads as "you need to look at this before merging."
146432
+ - \`[!NOTE]\` \u2014 small blue inline callout. Reads as "FYI, here's something worth noting."
146433
+ - no callout \u2014 plain text. Reads as routine review output.
146434
+
146435
+ Two reinforcing levers: callout intensity (above) and \`approved\` (which gates the footer Fix-button affordance \u2014 Fix renders on every non-approving review, so \`approved: true\` suppresses it). Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing. Pick the tier the author's actual next action justifies.
146436
+
146437
+ - **critical issues** (blocks merge \u2014 bugs, security, data loss, broken core flows):
146363
146438
  \`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146364
- - **recommended changes** (non-critical):
146365
- \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
146439
+ - **must-address non-critical findings** (real consequences if shipped \u2014 incorrect behavior in non-critical paths, missing validation on user input, regressions the author should fix before merge):
146440
+ \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> ...\`, followed by the PR summary. Reserve this tier for findings with concrete fallout \u2014 do NOT use \`[!IMPORTANT]\` for nits, style preferences, or "consider also" suggestions. Include all inline comments via \`comments\`.
146441
+ - **minor suggestions only** (single-line nits, doc/comment polish, defer-able observations, "rough edges"):
146442
+ \`approved: false\`. NO alert blockquote. Body opens directly with the PR summary. Include all inline comments via \`comments\`.
146443
+ - **informational observations** (mergeable as-is, nothing actionable \u2014 e.g. prior feedback addressed cleanly, surfacing a minor stale doc reference, calling out something noteworthy without recommending a change):
146444
+ \`approved: true\`. Body opens with \`> [!NOTE]\\n> ...\`, followed by the PR summary. Do NOT include inline \`comments\` \u2014 \`[!NOTE]\` signals "no action needed", which contradicts an actionable anchor; if a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead.
146366
146445
  - **no actionable issues**:
146367
146446
  \`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
146368
146447
 
@@ -146371,7 +146450,7 @@ ${PR_SUMMARY_FORMAT}`
146371
146450
  // IncrementalReview shares Review's multi-lens orchestrator pattern but
146372
146451
  // scopes the target to the incremental diff. The "issues must be NEW
146373
146452
  // since the last Pullfrog review" filter lives at aggregation time
146374
- // (step 5), NOT in the subagent prompt — pushing the filter into
146453
+ // (step 6), NOT in the subagent prompt — pushing the filter into
146375
146454
  // subagents matches the canonical anneal anti-pattern of "list known
146376
146455
  // pre-existing failures — don't flag these" and suppresses signal on
146377
146456
  // regressions the new commits amplified. The review body is just
@@ -146384,15 +146463,17 @@ ${PR_SUMMARY_FORMAT}`
146384
146463
  description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
146385
146464
  prompt: `### Checklist
146386
146465
 
146387
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
146466
+ 1. **task list**: create your task list for this run as your first action.
146467
+
146468
+ 2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
146388
146469
 
146389
- 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
146470
+ 3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
146390
146471
 
146391
- 3. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
146472
+ 4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 6 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
146392
146473
 
146393
- 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
146474
+ 5. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
146394
146475
 
146395
- if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
146476
+ if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 8's non-substantive path (do NOT submit a review).
146396
146477
 
146397
146478
  "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
146398
146479
  "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
@@ -146400,8 +146481,8 @@ ${PR_SUMMARY_FORMAT}`
146400
146481
 
146401
146482
  otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
146402
146483
 
146403
- dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
146404
- - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
146484
+ dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 5 entirely on a single subagent failure. each subagent gets:
146485
+ - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 6), not in the subagent prompt
146405
146486
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
146406
146487
  - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
146407
146488
  - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
@@ -146415,15 +146496,21 @@ ${PR_SUMMARY_FORMAT}`
146415
146496
  - do NOT pre-shape their output with a finding schema
146416
146497
  - do NOT mention the other lenses (independence is the point)
146417
146498
 
146418
- 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
146499
+ 6. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
146419
146500
 
146420
- 6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
146501
+ 7. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
146421
146502
 
146422
- 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
146503
+ 8. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
146504
+
146505
+ Same callout-intensity ladder as Review mode \u2014 \`[!CAUTION]\` (large red, "will break") \u2192 \`[!IMPORTANT]\` (large purple, "must address before merging") \u2192 \`[!NOTE]\` (small blue, "FYI") \u2192 no callout (plain text). And the same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
146506
+
146507
+ Follow these rules:
146423
146508
  - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
146424
- - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
146425
- - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
146426
- - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
146509
+ - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Instead call \`${t("report_progress")}\` with a 1-2 sentence note explaining no review was warranted (e.g. "No new issues. Changes since last review are formatting-only."). this leaves a visible signal that the run completed.
146510
+ - ELSE IF NEW CRITICAL ISSUES (blocks merge \u2014 bugs, security, data loss, broken core flows): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, then the Reviewed-changes summary.
146511
+ - ELSE IF NEW MUST-ADDRESS NON-CRITICAL FINDINGS (real consequences if shipped \u2014 incorrect behavior, missing validation, regressions the author should fix before merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\`, then the Reviewed-changes summary. Do NOT use this tier for nits, style preferences, or "consider also" suggestions.
146512
+ - ELSE IF NEW MINOR SUGGESTIONS ONLY (single-line nits, doc/comment polish, defer-able observations, "rough edges"): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens directly with \`Reviewed the following changes:\\n\` (NO alert blockquote), then the Reviewed-changes summary.
146513
+ - ELSE IF INFORMATIONAL OBSERVATIONS (mergeable as-is, but worth surfacing \u2014 e.g. prior feedback addressed cleanly with one minor stale doc reference, or a noteworthy positive observation): call \`${t("create_pull_request_review")}\` with \`approved: true\`, NO inline comments, and the review body. body opens with \`> [!NOTE]\\n> ...\` alert, then the Reviewed-changes summary. If a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead \u2014 \`[!NOTE]\` and inline comments don't mix.
146427
146514
  - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
146428
146515
  },
146429
146516
  {
@@ -146431,33 +146518,37 @@ ${PR_SUMMARY_FORMAT}`
146431
146518
  description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
146432
146519
  prompt: `### Checklist
146433
146520
 
146434
- 1. Analyze the task and gather context:
146521
+ 1. **task list**: create your task list for this run as your first action.
146522
+
146523
+ 2. Analyze the task and gather context:
146435
146524
  - read AGENTS.md and relevant codebase files
146436
146525
  - understand the architecture and constraints
146437
146526
 
146438
- 2. Produce a structured, actionable plan with clear milestones.
146527
+ 3. Produce a structured, actionable plan with clear milestones.
146439
146528
 
146440
- 3. Call \`${t("report_progress")}\` with the plan.`
146529
+ 4. Call \`${t("report_progress")}\` with the plan.`
146441
146530
  },
146442
146531
  {
146443
146532
  name: "Fix",
146444
146533
  description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
146445
146534
  prompt: `### Checklist
146446
146535
 
146447
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
146536
+ 1. **task list**: create your task list for this run as your first action.
146448
146537
 
146449
- 2. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
146538
+ 2. Checkout the PR branch via \`${t("checkout_pr")}\`.
146450
146539
 
146451
- 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
146540
+ 3. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
146452
146541
 
146453
- 4. Diagnose and fix:
146542
+ 4. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
146543
+
146544
+ 5. Diagnose and fix:
146454
146545
  - read the workflow file, reproduce locally with the EXACT same commands CI runs
146455
146546
  - fix the issue using your native file and shell tools
146456
146547
  - verify the fix by re-running the exact CI command
146457
146548
  - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
146458
146549
  - commit locally via shell (\`git add . && git commit -m "..."\`)
146459
146550
 
146460
- 5. Finalize:
146551
+ 6. Finalize:
146461
146552
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
146462
146553
  - call \`${t("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)`
146463
146554
  },
@@ -146466,22 +146557,24 @@ ${PR_SUMMARY_FORMAT}`
146466
146557
  description: "Resolve merge conflicts in a PR branch against the base branch",
146467
146558
  prompt: `### Checklist
146468
146559
 
146469
- 1. **Setup**:
146560
+ 1. **task list**: create your task list for this run as your first action.
146561
+
146562
+ 2. **Setup**:
146470
146563
  - Call \`${t("checkout_pr")}\` to get the PR branch.
146471
146564
  - Call \`${t("get_pull_request")}\` to identify the base branch (e.g., 'main').
146472
146565
  - Call \`${t("git_fetch")}\` to fetch the base branch.
146473
146566
 
146474
- 2. **Merge Attempt**:
146567
+ 3. **Merge Attempt**:
146475
146568
  - Run \`git merge origin/<base_branch>\` via shell.
146476
- - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
146477
- - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
146569
+ - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 4\u20135.**
146570
+ - If it fails (conflicts), resolve them manually (continue to steps 4\u20135).
146478
146571
 
146479
- 3. **Resolve Conflicts**:
146572
+ 4. **Resolve Conflicts**:
146480
146573
  - Run \`git status\` or parse the merge output to find the list of conflicting files.
146481
146574
  - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
146482
146575
  - Verify the file syntax is correct after resolution.
146483
146576
 
146484
- 4. **Finalize**:
146577
+ 5. **Finalize**:
146485
146578
  - Run a final verification (build/test) to ensure the resolution works.
146486
146579
  - \`git add . && git commit -m "resolve merge conflicts"\`
146487
146580
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
@@ -146492,15 +146585,17 @@ ${PR_SUMMARY_FORMAT}`
146492
146585
  description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
146493
146586
  prompt: `### Checklist
146494
146587
 
146495
- 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
146588
+ 1. **task list**: create your task list for this run as your first action.
146496
146589
 
146497
- 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
146590
+ 2. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
146591
+
146592
+ 3. For substantial work \u2014 code changes across multiple files, multi-step investigations:
146498
146593
  - plan your approach before starting
146499
146594
  - use native file and shell tools for local operations
146500
146595
  - use ${pullfrogMcpName} MCP tools for GitHub/git operations
146501
146596
  - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
146502
146597
 
146503
- 3. Finalize:
146598
+ 4. Finalize:
146504
146599
  - if code changes were made, push to a pull request (new or existing) using \`${t("push_branch")}\` and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
146505
146600
  - call \`${t("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
146506
146601
  - if the task involved labeling, commenting, or other GitHub operations, perform those directly`
@@ -146508,6 +146603,25 @@ ${PR_SUMMARY_FORMAT}`
146508
146603
  ];
146509
146604
  }
146510
146605
  var modes = computeModes("opencode");
146606
+ var NON_COMMITTING_MODES = /* @__PURE__ */ new Set([
146607
+ "Review",
146608
+ "IncrementalReview",
146609
+ "Plan"
146610
+ ]);
146611
+
146612
+ // toolState.ts
146613
+ function initToolState(params) {
146614
+ const resolved = parseProgressComment(params.progressComment);
146615
+ if (resolved) {
146616
+ log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
146617
+ }
146618
+ return {
146619
+ progressComment: resolved,
146620
+ hadProgressComment: !!resolved,
146621
+ backgroundProcesses: /* @__PURE__ */ new Map(),
146622
+ usageEntries: []
146623
+ };
146624
+ }
146511
146625
 
146512
146626
  // agents/claude.ts
146513
146627
  import { execFileSync as execFileSync3 } from "node:child_process";
@@ -146754,6 +146868,13 @@ var ThinkingTimer = class {
146754
146868
 
146755
146869
  // agents/postRun.ts
146756
146870
  import { readFile } from "node:fs/promises";
146871
+ function getUnsubmittedReview(toolState) {
146872
+ const mode = toolState.selectedMode;
146873
+ if (mode !== "Review" && mode !== "IncrementalReview") return null;
146874
+ if (toolState.review || toolState.finalSummaryWritten) return null;
146875
+ if (!toolState.hadProgressComment) return null;
146876
+ return mode;
146877
+ }
146757
146878
  var MAX_HOOK_OUTPUT_CHARS = 4096;
146758
146879
  function truncateHookOutput(raw2) {
146759
146880
  if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
@@ -146815,23 +146936,57 @@ function buildSummaryStalePrompt(filePath) {
146815
146936
  "if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is \u2014 but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context."
146816
146937
  ].join("\n");
146817
146938
  }
146818
- async function collectPostRunIssues(params) {
146939
+ function buildUnsubmittedReviewPrompt(mode) {
146940
+ if (mode === "Review") {
146941
+ return [
146942
+ `MISSING REVIEW OUTPUT \u2014 you selected Review mode but stopped without calling \`create_pull_request_review\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
146943
+ "",
146944
+ "call `create_pull_request_review` now with your aggregated review (body + inline comments). pick the tier per the mode prompt \u2014 Review mode has no no-submit exit, so even informational `> [!NOTE]` reviews and `No new issues found.` reviews must be submitted (both use `approved: true`). the first call may error once with a diff-coverage nudge \u2014 retry the same call to proceed.",
146945
+ "",
146946
+ "do NOT stop again until `create_pull_request_review` has been called successfully."
146947
+ ].join("\n");
146948
+ }
146949
+ return [
146950
+ `MISSING REVIEW OUTPUT \u2014 you selected IncrementalReview mode but stopped without calling \`create_pull_request_review\` or \`report_progress\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
146951
+ "",
146952
+ "do exactly one of:",
146953
+ "- if you have findings: call `create_pull_request_review` now with your aggregated review (body + inline comments). the first call may error once with a diff-coverage nudge \u2014 retry the same call to proceed.",
146954
+ "- if there are genuinely no actionable findings since the last review (e.g. only formatting / comment / lockfile changes): call `report_progress` with a 1-2 sentence summary explaining that no review was warranted.",
146955
+ "",
146956
+ "do NOT stop again until one of those tools has been called successfully."
146957
+ ].join("\n");
146958
+ }
146959
+ async function collectPostRunIssues(ctx, options = {}) {
146819
146960
  const issues = {};
146820
- if (params.stopScript) {
146821
- const failure = await executeStopHook(params.stopScript);
146961
+ if (ctx.stopScript) {
146962
+ const failure = await executeStopHook(ctx.stopScript);
146822
146963
  if (failure) issues.stopHook = failure;
146823
146964
  }
146824
146965
  const status = getGitStatus();
146825
- if (status) issues.dirtyTree = status;
146826
- if (params.summaryFilePath && params.summarySeed !== void 0) {
146827
- const stale = await isSummaryUnchanged(params.summaryFilePath, params.summarySeed);
146828
- if (stale) issues.summaryStale = { filePath: params.summaryFilePath };
146966
+ const mode = ctx.toolState.selectedMode;
146967
+ if (status) {
146968
+ if (mode && NON_COMMITTING_MODES.has(mode)) {
146969
+ log.info(`\xBB dirty-tree gate suppressed: mode \`${mode}\` does not commit`);
146970
+ } else {
146971
+ issues.dirtyTree = status;
146972
+ }
146973
+ }
146974
+ const summaryFilePath2 = ctx.toolState.summaryFilePath;
146975
+ const summarySeed = ctx.toolState.summarySeed;
146976
+ if (!options.skipSummaryStale && summaryFilePath2 && summarySeed !== void 0) {
146977
+ const stale = await isSummaryUnchanged(summaryFilePath2, summarySeed);
146978
+ if (stale) issues.summaryStale = { filePath: summaryFilePath2 };
146829
146979
  }
146980
+ const unsubmittedMode = getUnsubmittedReview(ctx.toolState);
146981
+ if (unsubmittedMode) issues.unsubmittedReview = unsubmittedMode;
146830
146982
  return issues;
146831
146983
  }
146832
146984
  function buildPostRunPrompt(issues) {
146833
146985
  const parts = [];
146834
146986
  if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
146987
+ if (issues.unsubmittedReview) {
146988
+ parts.push(buildUnsubmittedReviewPrompt(issues.unsubmittedReview));
146989
+ }
146835
146990
  if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
146836
146991
  if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
146837
146992
  return parts.join("\n\n---\n\n");
@@ -146858,10 +147013,8 @@ async function runPostRunRetryLoop(params) {
146858
147013
  let summaryStaleNudged = false;
146859
147014
  while (gateResumeCount < MAX_POST_RUN_RETRIES) {
146860
147015
  if (!result.success) break;
146861
- const issues = await collectPostRunIssues({
146862
- stopScript: params.stopScript,
146863
- summaryFilePath: summaryStaleNudged ? void 0 : params.summaryFilePath,
146864
- summarySeed: summaryStaleNudged ? void 0 : params.summarySeed
147016
+ const issues = await collectPostRunIssues(params.ctx, {
147017
+ skipSummaryStale: summaryStaleNudged
146865
147018
  });
146866
147019
  if (issues.summaryStale) summaryStaleNudged = true;
146867
147020
  finalIssues = issues;
@@ -146909,7 +147062,7 @@ async function runPostRunRetryLoop(params) {
146909
147062
  gateResumeCount++;
146910
147063
  }
146911
147064
  if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
146912
- finalIssues = await collectPostRunIssues({ stopScript: params.stopScript });
147065
+ finalIssues = await collectPostRunIssues(params.ctx, { skipSummaryStale: true });
146913
147066
  }
146914
147067
  if (result.success && finalIssues.stopHook) {
146915
147068
  const retryNote = gateResumeCount > 0 ? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}` : "";
@@ -146920,6 +147073,16 @@ async function runPostRunRetryLoop(params) {
146920
147073
  usage: aggregatedUsage
146921
147074
  };
146922
147075
  }
147076
+ if (result.success && finalIssues.unsubmittedReview) {
147077
+ const retryNote = gateResumeCount > 0 ? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}` : "";
147078
+ const expected = finalIssues.unsubmittedReview === "Review" ? "create_pull_request_review" : "create_pull_request_review or report_progress";
147079
+ return {
147080
+ ...result,
147081
+ success: false,
147082
+ error: `${finalIssues.unsubmittedReview} mode finished without calling ${expected}${retryNote}`,
147083
+ usage: aggregatedUsage
147084
+ };
147085
+ }
146923
147086
  return { ...result, usage: aggregatedUsage };
146924
147087
  }
146925
147088
 
@@ -147036,6 +147199,12 @@ function resolveEffort(model) {
147036
147199
  if (model?.includes("opus")) return "max";
147037
147200
  return "high";
147038
147201
  }
147202
+ function tailLines(text, maxCodeUnits) {
147203
+ if (text.length <= maxCodeUnits) return text;
147204
+ const tail = text.slice(-maxCodeUnits);
147205
+ const firstNewline = tail.indexOf("\n");
147206
+ return firstNewline > 0 && firstNewline < tail.length - 1 ? tail.slice(firstNewline + 1) : tail;
147207
+ }
147039
147208
  async function runClaude(params) {
147040
147209
  const startTime = performance6.now();
147041
147210
  let eventCount = 0;
@@ -147043,6 +147212,8 @@ async function runClaude(params) {
147043
147212
  let finalOutput = "";
147044
147213
  let sessionId;
147045
147214
  let resultErrorSubtype = null;
147215
+ let lastResultError = null;
147216
+ let syntheticStopFailure = false;
147046
147217
  let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
147047
147218
  let accumulatedCostUsd = 0;
147048
147219
  let tokensLogged = false;
@@ -147125,6 +147296,16 @@ async function runClaude(params) {
147125
147296
  if (event.session_id) sessionId = event.session_id;
147126
147297
  const subtype = event.subtype || "unknown";
147127
147298
  const numTurns = event.num_turns || 0;
147299
+ if (event.is_error === true && subtype === "success") {
147300
+ const apiStatus = event.api_error_status;
147301
+ lastResultError = event.result?.trim() || `claude reported is_error=true with no result text (api_error_status=${apiStatus ?? "unknown"})`;
147302
+ resultErrorSubtype = subtype;
147303
+ syntheticStopFailure = true;
147304
+ log.info(
147305
+ `\xBB ${params.label} result error: subtype=${subtype}, api_error_status=${apiStatus ?? "unknown"}, message=${lastResultError}`
147306
+ );
147307
+ return;
147308
+ }
147128
147309
  if (subtype === "success") {
147129
147310
  const usage = event.usage;
147130
147311
  const inputTokens = usage?.input_tokens || 0;
@@ -147147,12 +147328,15 @@ async function runClaude(params) {
147147
147328
  }
147148
147329
  } else if (subtype === "error_max_turns") {
147149
147330
  resultErrorSubtype = subtype;
147331
+ lastResultError = event.errors?.join("\n").trim() || null;
147150
147332
  log.info(`\xBB ${params.label} max turns reached: ${JSON.stringify(event)}`);
147151
147333
  } else if (subtype === "error_during_execution") {
147152
147334
  resultErrorSubtype = subtype;
147335
+ lastResultError = event.errors?.join("\n").trim() || null;
147153
147336
  log.info(`\xBB ${params.label} execution error: ${JSON.stringify(event)}`);
147154
147337
  } else if (subtype.startsWith("error")) {
147155
147338
  resultErrorSubtype = subtype;
147339
+ lastResultError = event.errors?.join("\n").trim() || null;
147156
147340
  log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
147157
147341
  } else {
147158
147342
  log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
@@ -147260,14 +147444,15 @@ async function runClaude(params) {
147260
147444
  if (stderrContext) log.info(`\xBB last stderr output:
147261
147445
  ${stderrContext}`);
147262
147446
  }
147263
- if (!tokensLogged && (accumulatedTokens.input > 0 || accumulatedTokens.output > 0 || accumulatedTokens.cacheRead > 0 || accumulatedTokens.cacheWrite > 0)) {
147447
+ if (!tokensLogged && !syntheticStopFailure && (accumulatedTokens.input > 0 || accumulatedTokens.output > 0 || accumulatedTokens.cacheRead > 0 || accumulatedTokens.cacheWrite > 0)) {
147264
147448
  logTokenTable({ ...accumulatedTokens, costUsd: accumulatedCostUsd });
147265
147449
  tokensLogged = true;
147266
147450
  }
147267
147451
  const usage = buildUsage();
147268
147452
  if (result.exitCode !== 0) {
147269
147453
  const errorContext = lastProviderError ? ` (${lastProviderError})` : "";
147270
- const errorMessage = result.stderr || result.stdout || `unknown error - no output from Claude CLI${errorContext}`;
147454
+ const truncatedStdout = result.stdout ? tailLines(result.stdout, 2048) : "";
147455
+ const errorMessage = lastResultError || result.stderr || truncatedStdout || `unknown error - no output from Claude CLI${errorContext}`;
147271
147456
  log.error(
147272
147457
  `${params.label} exited with code ${result.exitCode}${errorContext}: ${errorMessage}`
147273
147458
  );
@@ -147294,7 +147479,7 @@ ${stderrContext}`);
147294
147479
  return {
147295
147480
  success: false,
147296
147481
  output: finalOutput || output,
147297
- error: `result subtype: ${resultErrorSubtype}`,
147482
+ error: lastResultError || `result subtype: ${resultErrorSubtype}`,
147298
147483
  usage,
147299
147484
  sessionId
147300
147485
  };
@@ -147424,12 +147609,10 @@ var claude = agent({
147424
147609
  args: [...baseArgs, "-p", ctx.instructions.full]
147425
147610
  });
147426
147611
  return runPostRunRetryLoop({
147612
+ ctx,
147427
147613
  initialResult: result,
147428
147614
  initialUsage: result.usage,
147429
- stopScript: ctx.stopScript,
147430
- summaryFilePath: ctx.summaryFilePath,
147431
- summarySeed: ctx.summarySeed,
147432
- reflectionPrompt: ctx.learningsFilePath ? buildLearningsReflectionPrompt(ctx.learningsFilePath) : void 0,
147615
+ reflectionPrompt: ctx.toolState.learningsFilePath ? buildLearningsReflectionPrompt(ctx.toolState.learningsFilePath) : void 0,
147433
147616
  canResume: (r) => Boolean(r.sessionId),
147434
147617
  resume: async (c) => {
147435
147618
  const sessionId = c.previousResult.sessionId;
@@ -147540,6 +147723,8 @@ async function installOpencodeCli() {
147540
147723
  });
147541
147724
  }
147542
147725
  var PULLFROG_OPENCODE_OUTPUT_LIMIT = 5e3;
147726
+ var GEMINI_3_DIRECT_THINKING_LEVEL = "medium";
147727
+ var GEMINI_3_DIRECT_API_IDS = ["gemini-3.1-pro-preview", "gemini-3-flash-preview"];
147543
147728
  function buildSecurityConfig(ctx, model) {
147544
147729
  const config3 = {
147545
147730
  permission: {
@@ -147553,7 +147738,21 @@ function buildSecurityConfig(ctx, model) {
147553
147738
  mcp: {
147554
147739
  [pullfrogMcpName]: { type: "remote", url: ctx.mcpServerUrl }
147555
147740
  },
147556
- agent: buildReviewerAgentConfig()
147741
+ agent: buildReviewerAgentConfig(),
147742
+ provider: {
147743
+ google: {
147744
+ models: Object.fromEntries(
147745
+ GEMINI_3_DIRECT_API_IDS.map((id) => [
147746
+ id,
147747
+ {
147748
+ options: {
147749
+ thinkingConfig: { thinkingLevel: GEMINI_3_DIRECT_THINKING_LEVEL }
147750
+ }
147751
+ }
147752
+ ])
147753
+ )
147754
+ }
147755
+ }
147557
147756
  };
147558
147757
  if (model) {
147559
147758
  config3.model = model;
@@ -148148,12 +148347,10 @@ var opencode = agent({
148148
148347
  args: [...baseArgs, ctx.instructions.full]
148149
148348
  });
148150
148349
  return runPostRunRetryLoop({
148350
+ ctx,
148151
148351
  initialResult: result,
148152
148352
  initialUsage: result.usage,
148153
- stopScript: ctx.stopScript,
148154
- summaryFilePath: ctx.summaryFilePath,
148155
- summarySeed: ctx.summarySeed,
148156
- reflectionPrompt: ctx.learningsFilePath ? buildLearningsReflectionPrompt(ctx.learningsFilePath) : void 0,
148353
+ reflectionPrompt: ctx.toolState.learningsFilePath ? buildLearningsReflectionPrompt(ctx.toolState.learningsFilePath) : void 0,
148157
148354
  resume: async (c) => runOpenCode({
148158
148355
  ...runParams,
148159
148356
  args: [...baseArgs, "--continue", c.prompt]
@@ -152079,8 +152276,10 @@ var checkRepositoryAccess = async (token, repoOwner, repoName) => {
152079
152276
  const response = await githubRequest("/installation/repositories", {
152080
152277
  headers: { Authorization: `token ${token}` }
152081
152278
  });
152279
+ const ownerLower = repoOwner.toLowerCase();
152280
+ const nameLower = repoName.toLowerCase();
152082
152281
  return response.repositories.some(
152083
- (repo) => repo.owner.login === repoOwner && repo.name === repoName
152282
+ (repo) => repo.owner.login.toLowerCase() === ownerLower && repo.name.toLowerCase() === nameLower
152084
152283
  );
152085
152284
  } catch {
152086
152285
  return false;
@@ -153772,9 +153971,10 @@ async function persistSummary(ctx) {
153772
153971
  log.debug(`pr summary persist failed: ${err instanceof Error ? err.message : String(err)}`);
153773
153972
  });
153774
153973
  }
153775
- async function writeJobSummary(toolState) {
153974
+ async function writeJobSummary(toolState, finalOutput) {
153776
153975
  const usageSummary = formatUsageSummary(toolState.usageEntries);
153777
- const summaryParts = [toolState.lastProgressBody, usageSummary].filter(Boolean);
153976
+ const body = toolState.lastProgressBody || finalOutput;
153977
+ const summaryParts = [body, usageSummary].filter(Boolean);
153778
153978
  if (summaryParts.length > 0) {
153779
153979
  await writeSummary(summaryParts.join("\n\n"));
153780
153980
  }
@@ -154060,9 +154260,7 @@ ${instructions.user}` : null,
154060
154260
  instructions,
154061
154261
  todoTracker,
154062
154262
  stopScript: runContext.repoSettings.stopScript,
154063
- summaryFilePath: toolState.summaryFilePath,
154064
- summarySeed: toolState.summarySeed,
154065
- learningsFilePath: toolState.learningsFilePath,
154263
+ toolState,
154066
154264
  onActivityTimeout: onInnerActivityTimeout,
154067
154265
  onToolUse: (event) => {
154068
154266
  const wasTracked = recordDiffReadFromToolUse({
@@ -154123,12 +154321,24 @@ ${instructions.user}` : null,
154123
154321
  if (toolContext) {
154124
154322
  await persistLearnings(toolContext);
154125
154323
  }
154126
- if (toolContext && toolState.progressComment && !toolState.finalSummaryWritten) {
154324
+ if (!result.success && toolContext && toolState.progressComment) {
154325
+ await reportErrorToComment({
154326
+ toolState,
154327
+ error: result.error || "agent run failed"
154328
+ }).catch((error49) => {
154329
+ log.debug(`failure error report failed: ${error49}`);
154330
+ });
154331
+ }
154332
+ if (toolContext && result.success && toolState.progressComment && !toolState.finalSummaryWritten) {
154127
154333
  await deleteProgressComment(toolContext).catch((error49) => {
154128
154334
  log.debug(`stranded progress comment cleanup failed: ${error49}`);
154129
154335
  });
154130
154336
  }
154131
- await writeJobSummary(toolState);
154337
+ try {
154338
+ await writeJobSummary(toolState, result.output);
154339
+ } catch (error49) {
154340
+ log.debug(`job summary write failed: ${error49}`);
154341
+ }
154132
154342
  if (toolState.output) {
154133
154343
  log.info(`::pullfrog-output::${Buffer.from(toolState.output).toString("base64")}`);
154134
154344
  core6.setOutput("result", toolState.output);