pullfrog 0.0.204 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -11
- package/dist/agents/postRun.d.ts +21 -2
- package/dist/agents/shared.d.ts +22 -0
- package/dist/cli.mjs +859 -772
- package/dist/external.d.ts +2 -0
- package/dist/index.js +808 -564
- package/dist/internal/index.d.ts +1 -0
- package/dist/internal.js +41 -43
- package/dist/mcp/comment.d.ts +4 -10
- package/dist/mcp/review.d.ts +15 -2
- package/dist/mcp/selectMode.d.ts +0 -6
- package/dist/mcp/server.d.ts +3 -1
- package/dist/utils/diffCoverage.d.ts +1 -0
- package/dist/utils/leapingComment.d.ts +11 -0
- package/dist/utils/patchWorkflowRunFields.d.ts +1 -1
- package/dist/utils/payload.d.ts +2 -0
- package/dist/utils/prSummary.d.ts +40 -0
- package/dist/utils/providerErrors.d.ts +1 -0
- package/dist/utils/retry.d.ts +6 -0
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -18415,7 +18415,7 @@ var require_summary = __commonJS({
|
|
|
18415
18415
|
exports.summary = exports.markdownSummary = exports.SUMMARY_DOCS_URL = exports.SUMMARY_ENV_VAR = void 0;
|
|
18416
18416
|
var os_1 = __require("os");
|
|
18417
18417
|
var fs_1 = __require("fs");
|
|
18418
|
-
var { access, appendFile, writeFile:
|
|
18418
|
+
var { access, appendFile, writeFile: writeFile3 } = fs_1.promises;
|
|
18419
18419
|
exports.SUMMARY_ENV_VAR = "GITHUB_STEP_SUMMARY";
|
|
18420
18420
|
exports.SUMMARY_DOCS_URL = "https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary";
|
|
18421
18421
|
var Summary = class {
|
|
@@ -18473,7 +18473,7 @@ var require_summary = __commonJS({
|
|
|
18473
18473
|
return __awaiter(this, void 0, void 0, function* () {
|
|
18474
18474
|
const overwrite = !!(options === null || options === void 0 ? void 0 : options.overwrite);
|
|
18475
18475
|
const filePath = yield this.filePath();
|
|
18476
|
-
const writeFunc = overwrite ?
|
|
18476
|
+
const writeFunc = overwrite ? writeFile3 : appendFile;
|
|
18477
18477
|
yield writeFunc(filePath, this._buffer, { encoding: "utf8" });
|
|
18478
18478
|
return this.emptyBuffer();
|
|
18479
18479
|
});
|
|
@@ -62879,8 +62879,8 @@ var require_snapshot_utils = __commonJS({
|
|
|
62879
62879
|
var require_snapshot_recorder = __commonJS({
|
|
62880
62880
|
"node_modules/.pnpm/undici@7.22.0/node_modules/undici/lib/mock/snapshot-recorder.js"(exports, module) {
|
|
62881
62881
|
"use strict";
|
|
62882
|
-
var { writeFile:
|
|
62883
|
-
var { dirname:
|
|
62882
|
+
var { writeFile: writeFile3, readFile: readFile4, mkdir: mkdir2 } = __require("node:fs/promises");
|
|
62883
|
+
var { dirname: dirname6, resolve: resolve3 } = __require("node:path");
|
|
62884
62884
|
var { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = __require("node:timers");
|
|
62885
62885
|
var { InvalidArgumentError, UndiciError } = require_errors4();
|
|
62886
62886
|
var { hashId, isUrlExcludedFactory, normalizeHeaders, createHeaderFilters } = require_snapshot_utils();
|
|
@@ -63081,7 +63081,7 @@ var require_snapshot_recorder = __commonJS({
|
|
|
63081
63081
|
throw new InvalidArgumentError("Snapshot path is required");
|
|
63082
63082
|
}
|
|
63083
63083
|
try {
|
|
63084
|
-
const data = await
|
|
63084
|
+
const data = await readFile4(resolve3(path3), "utf8");
|
|
63085
63085
|
const parsed2 = JSON.parse(data);
|
|
63086
63086
|
if (Array.isArray(parsed2)) {
|
|
63087
63087
|
this.#snapshots.clear();
|
|
@@ -63111,12 +63111,12 @@ var require_snapshot_recorder = __commonJS({
|
|
|
63111
63111
|
throw new InvalidArgumentError("Snapshot path is required");
|
|
63112
63112
|
}
|
|
63113
63113
|
const resolvedPath = resolve3(path3);
|
|
63114
|
-
await
|
|
63114
|
+
await mkdir2(dirname6(resolvedPath), { recursive: true });
|
|
63115
63115
|
const data = Array.from(this.#snapshots.entries()).map(([hash2, snapshot2]) => ({
|
|
63116
63116
|
hash: hash2,
|
|
63117
63117
|
snapshot: snapshot2
|
|
63118
63118
|
}));
|
|
63119
|
-
await
|
|
63119
|
+
await writeFile3(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
|
|
63120
63120
|
}
|
|
63121
63121
|
/**
|
|
63122
63122
|
* Clears all recorded snapshots
|
|
@@ -97692,14 +97692,14 @@ var require_turndown_cjs = __commonJS({
|
|
|
97692
97692
|
} else if (node2.nodeType === 1) {
|
|
97693
97693
|
replacement = replacementForNode.call(self2, node2);
|
|
97694
97694
|
}
|
|
97695
|
-
return
|
|
97695
|
+
return join17(output, replacement);
|
|
97696
97696
|
}, "");
|
|
97697
97697
|
}
|
|
97698
97698
|
function postProcess(output) {
|
|
97699
97699
|
var self2 = this;
|
|
97700
97700
|
this.rules.forEach(function(rule) {
|
|
97701
97701
|
if (typeof rule.append === "function") {
|
|
97702
|
-
output =
|
|
97702
|
+
output = join17(output, rule.append(self2.options));
|
|
97703
97703
|
}
|
|
97704
97704
|
});
|
|
97705
97705
|
return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
|
|
@@ -97711,7 +97711,7 @@ var require_turndown_cjs = __commonJS({
|
|
|
97711
97711
|
if (whitespace.leading || whitespace.trailing) content = content.trim();
|
|
97712
97712
|
return whitespace.leading + rule.replacement(content, node2, this.options) + whitespace.trailing;
|
|
97713
97713
|
}
|
|
97714
|
-
function
|
|
97714
|
+
function join17(output, replacement) {
|
|
97715
97715
|
var s1 = trimTrailingNewlines(output);
|
|
97716
97716
|
var s2 = trimLeadingNewlines(replacement);
|
|
97717
97717
|
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
|
|
@@ -99204,12 +99204,13 @@ import { basename as basename2 } from "node:path";
|
|
|
99204
99204
|
// commands/gha.ts
|
|
99205
99205
|
var core7 = __toESM(require_core(), 1);
|
|
99206
99206
|
var import_arg = __toESM(require_arg(), 1);
|
|
99207
|
-
import { dirname as
|
|
99207
|
+
import { dirname as dirname5 } from "node:path";
|
|
99208
99208
|
|
|
99209
99209
|
// main.ts
|
|
99210
99210
|
var core6 = __toESM(require_core(), 1);
|
|
99211
99211
|
import { existsSync as existsSync7, readdirSync } from "node:fs";
|
|
99212
|
-
import {
|
|
99212
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
99213
|
+
import { join as join16 } from "node:path";
|
|
99213
99214
|
|
|
99214
99215
|
// node_modules/.pnpm/@ark+util@0.56.0/node_modules/@ark/util/out/arrays.js
|
|
99215
99216
|
var liftArray = (data) => Array.isArray(data) ? data : [data];
|
|
@@ -107705,7 +107706,7 @@ function buildCommitPrompt(status) {
|
|
|
107705
107706
|
].join("\n");
|
|
107706
107707
|
}
|
|
107707
107708
|
function hasPostRunIssues(issues) {
|
|
107708
|
-
return issues.stopHook !== void 0 || issues.dirtyTree !== void 0;
|
|
107709
|
+
return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0;
|
|
107709
107710
|
}
|
|
107710
107711
|
var agent = (input) => {
|
|
107711
107712
|
return {
|
|
@@ -108398,7 +108399,11 @@ function resolveCliModel(slug2) {
|
|
|
108398
108399
|
var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
|
|
108399
108400
|
var FROG_LOGO = `<a href="https://pullfrog.com"><picture><source media="(prefers-color-scheme: dark)" srcset="https://pullfrog.com/logos/frog-white-full-18px.png"><img src="https://pullfrog.com/logos/frog-green-full-18px.png" width="9px" height="9px" style="vertical-align: middle; " alt="Pullfrog"></picture></a>`;
|
|
108400
108401
|
function formatModelLabel(slug2) {
|
|
108401
|
-
const alias = resolveDisplayAlias(slug2)
|
|
108402
|
+
const alias = resolveDisplayAlias(slug2) ?? // reverse-lookup: when the caller passes an effective model (proxy or
|
|
108403
|
+
// resolved target like "openrouter/anthropic/claude-opus-4.7") instead of
|
|
108404
|
+
// a stored alias slug, find the alias whose resolve target matches so we
|
|
108405
|
+
// still render a friendly display name.
|
|
108406
|
+
modelAliases.find((a) => a.resolve === slug2 || a.openRouterResolve === slug2);
|
|
108402
108407
|
if (!alias) return `\`${slug2}\``;
|
|
108403
108408
|
return alias.isFree ? `\`${alias.displayName}\` (free)` : `\`${alias.displayName}\``;
|
|
108404
108409
|
}
|
|
@@ -108473,10 +108478,13 @@ var defaultShouldRetry = (error49) => {
|
|
|
108473
108478
|
return error49.name === "AbortError" || error49.message.includes("fetch failed") || error49.message.includes("ECONNRESET") || error49.message.includes("ETIMEDOUT");
|
|
108474
108479
|
};
|
|
108475
108480
|
async function retry(fn2, options = {}) {
|
|
108476
|
-
const maxAttempts = options.maxAttempts ?? 3;
|
|
108477
|
-
const delayMs = options.delayMs ?? 1e3;
|
|
108478
108481
|
const shouldRetry = options.shouldRetry ?? defaultShouldRetry;
|
|
108479
108482
|
const label = options.label ?? "operation";
|
|
108483
|
+
const delays = options.delaysMs ? Array.from(options.delaysMs) : Array.from(
|
|
108484
|
+
{ length: (options.maxAttempts ?? 3) - 1 },
|
|
108485
|
+
(_2, i) => (options.delayMs ?? 1e3) * (i + 1)
|
|
108486
|
+
);
|
|
108487
|
+
const maxAttempts = delays.length + 1;
|
|
108480
108488
|
let lastError;
|
|
108481
108489
|
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
108482
108490
|
try {
|
|
@@ -108486,7 +108494,7 @@ async function retry(fn2, options = {}) {
|
|
|
108486
108494
|
if (attempt === maxAttempts || !shouldRetry(error49)) {
|
|
108487
108495
|
throw error49;
|
|
108488
108496
|
}
|
|
108489
|
-
const delay2 =
|
|
108497
|
+
const delay2 = delays[attempt - 1];
|
|
108490
108498
|
log.info(`\xBB ${label} failed (attempt ${attempt}/${maxAttempts}), retrying in ${delay2}ms...`);
|
|
108491
108499
|
await sleep(delay2);
|
|
108492
108500
|
}
|
|
@@ -108500,7 +108508,7 @@ var STRING_KEYS = [
|
|
|
108500
108508
|
"issueNodeId",
|
|
108501
108509
|
"reviewNodeId",
|
|
108502
108510
|
"planCommentNodeId",
|
|
108503
|
-
"
|
|
108511
|
+
"summarySnapshot"
|
|
108504
108512
|
];
|
|
108505
108513
|
var NUMBER_KEYS = [
|
|
108506
108514
|
"inputTokens",
|
|
@@ -108590,22 +108598,6 @@ function parseProgressComment(raw2) {
|
|
|
108590
108598
|
if (Number.isNaN(id) || id <= 0) return void 0;
|
|
108591
108599
|
return { id, type: raw2.type };
|
|
108592
108600
|
}
|
|
108593
|
-
async function getProgressComment(ctx, comment) {
|
|
108594
|
-
const result = await (comment.type === "review" ? ctx.octokit.rest.pulls.getReviewComment({
|
|
108595
|
-
owner: ctx.owner,
|
|
108596
|
-
repo: ctx.repo,
|
|
108597
|
-
comment_id: comment.id
|
|
108598
|
-
}) : ctx.octokit.rest.issues.getComment({
|
|
108599
|
-
owner: ctx.owner,
|
|
108600
|
-
repo: ctx.repo,
|
|
108601
|
-
comment_id: comment.id
|
|
108602
|
-
}));
|
|
108603
|
-
return {
|
|
108604
|
-
id: result.data.id,
|
|
108605
|
-
body: result.data.body ?? void 0,
|
|
108606
|
-
html_url: result.data.html_url
|
|
108607
|
-
};
|
|
108608
|
-
}
|
|
108609
108601
|
async function updateProgressComment(ctx, comment, body) {
|
|
108610
108602
|
const result = await (comment.type === "review" ? ctx.octokit.rest.pulls.updateReviewComment({
|
|
108611
108603
|
owner: ctx.owner,
|
|
@@ -109205,12 +109197,6 @@ var addTools = (ctx, server, tools) => {
|
|
|
109205
109197
|
};
|
|
109206
109198
|
|
|
109207
109199
|
// mcp/comment.ts
|
|
109208
|
-
var LEAPING_INTO_ACTION_PREFIX = "Leaping into action";
|
|
109209
|
-
function isLeapingIntoActionCommentBody(body) {
|
|
109210
|
-
const content = stripExistingFooter(body).trimStart();
|
|
109211
|
-
const firstLine = content.split(/\r?\n/, 1)[0]?.trimEnd() ?? "";
|
|
109212
|
-
return new RegExp(`(^|\\s)${LEAPING_INTO_ACTION_PREFIX}(\\.\\.\\.)?$`).test(firstLine);
|
|
109213
|
-
}
|
|
109214
109200
|
function buildCommentFooter(ctx, customParts) {
|
|
109215
109201
|
const runId = ctx.runId;
|
|
109216
109202
|
return buildPullfrogFooter({
|
|
@@ -109242,43 +109228,22 @@ function addFooter(ctx, body) {
|
|
|
109242
109228
|
var Comment = type({
|
|
109243
109229
|
issueNumber: type.number.describe("the issue number to comment on"),
|
|
109244
109230
|
body: type.string.describe("the comment body content"),
|
|
109245
|
-
type: type.enumerated("Plan", "
|
|
109246
|
-
"Plan: record as the plan for this run. Summary: record as the PR summary comment (one per PR, updated in place). Comment: regular comment (default)."
|
|
109247
|
-
).optional()
|
|
109231
|
+
type: type.enumerated("Plan", "Comment").describe("Plan: record as the plan for this run. Comment: regular comment (default).").optional()
|
|
109248
109232
|
});
|
|
109249
109233
|
function CreateCommentTool(ctx) {
|
|
109250
109234
|
return tool({
|
|
109251
109235
|
name: "create_issue_comment",
|
|
109252
|
-
description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments
|
|
109236
|
+
description: "Create a comment on a GitHub issue or PR. For progress/plan updates on the current run use report_progress instead. Use type: 'Plan' for plan comments.",
|
|
109253
109237
|
parameters: Comment,
|
|
109254
109238
|
execute: execute(async ({ issueNumber, body, type: commentType }) => {
|
|
109255
109239
|
const bodyWithFooter = addFooter(ctx, body);
|
|
109256
|
-
if (commentType === "Summary" && ctx.toolState.existingSummaryCommentId) {
|
|
109257
|
-
log.info(
|
|
109258
|
-
`\xBB redirecting create_issue_comment(Summary) to update existing comment ${ctx.toolState.existingSummaryCommentId}`
|
|
109259
|
-
);
|
|
109260
|
-
const result2 = await ctx.octokit.rest.issues.updateComment({
|
|
109261
|
-
owner: ctx.repo.owner,
|
|
109262
|
-
repo: ctx.repo.name,
|
|
109263
|
-
comment_id: ctx.toolState.existingSummaryCommentId,
|
|
109264
|
-
body: bodyWithFooter
|
|
109265
|
-
});
|
|
109266
|
-
if (result2.data.node_id) {
|
|
109267
|
-
await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result2.data.node_id });
|
|
109268
|
-
}
|
|
109269
|
-
return {
|
|
109270
|
-
success: true,
|
|
109271
|
-
commentId: result2.data.id,
|
|
109272
|
-
url: result2.data.html_url,
|
|
109273
|
-
body: result2.data.body
|
|
109274
|
-
};
|
|
109275
|
-
}
|
|
109276
109240
|
const result = await ctx.octokit.rest.issues.createComment({
|
|
109277
109241
|
owner: ctx.repo.owner,
|
|
109278
109242
|
repo: ctx.repo.name,
|
|
109279
109243
|
issue_number: issueNumber,
|
|
109280
109244
|
body: bodyWithFooter
|
|
109281
109245
|
});
|
|
109246
|
+
ctx.toolState.wasUpdated = true;
|
|
109282
109247
|
if (commentType === "Plan") {
|
|
109283
109248
|
if (result.data.node_id) {
|
|
109284
109249
|
await patchWorkflowRunFields(ctx, { planCommentNodeId: result.data.node_id });
|
|
@@ -109299,9 +109264,6 @@ function CreateCommentTool(ctx) {
|
|
|
109299
109264
|
body: updateResult.data.body
|
|
109300
109265
|
};
|
|
109301
109266
|
}
|
|
109302
|
-
if (commentType === "Summary" && result.data.node_id) {
|
|
109303
|
-
await patchWorkflowRunFields(ctx, { summaryCommentNodeId: result.data.node_id });
|
|
109304
|
-
}
|
|
109305
109267
|
return {
|
|
109306
109268
|
success: true,
|
|
109307
109269
|
commentId: result.data.id,
|
|
@@ -109457,15 +109419,15 @@ ${collapsible}`;
|
|
|
109457
109419
|
reportParams.target_plan_comment = params.target_plan_comment;
|
|
109458
109420
|
}
|
|
109459
109421
|
const result = await reportProgress(ctx, reportParams);
|
|
109460
|
-
if (!params.target_plan_comment) {
|
|
109461
|
-
ctx.toolState.finalSummaryWritten = true;
|
|
109462
|
-
}
|
|
109463
109422
|
if (result.action === "skipped") {
|
|
109464
109423
|
return {
|
|
109465
109424
|
success: true,
|
|
109466
109425
|
message: "progress recorded (no GitHub comment created - this may occur for workflow_dispatch events or when there is no associated issue/PR)"
|
|
109467
109426
|
};
|
|
109468
109427
|
}
|
|
109428
|
+
if (!params.target_plan_comment) {
|
|
109429
|
+
ctx.toolState.finalSummaryWritten = true;
|
|
109430
|
+
}
|
|
109469
109431
|
return {
|
|
109470
109432
|
success: true,
|
|
109471
109433
|
...result
|
|
@@ -142570,7 +142532,7 @@ var import_semver = __toESM(require_semver2(), 1);
|
|
|
142570
142532
|
// package.json
|
|
142571
142533
|
var package_default = {
|
|
142572
142534
|
name: "pullfrog",
|
|
142573
|
-
version: "0.0
|
|
142535
|
+
version: "0.1.0",
|
|
142574
142536
|
type: "module",
|
|
142575
142537
|
bin: {
|
|
142576
142538
|
pullfrog: "dist/cli.mjs",
|
|
@@ -142767,7 +142729,7 @@ function closeBrowserDaemon(toolState) {
|
|
|
142767
142729
|
|
|
142768
142730
|
// mcp/checkout.ts
|
|
142769
142731
|
import { createHash as createHash2 } from "node:crypto";
|
|
142770
|
-
import { writeFileSync } from "node:fs";
|
|
142732
|
+
import { statSync, unlinkSync as unlinkSync2, writeFileSync } from "node:fs";
|
|
142771
142733
|
import { join as join3 } from "node:path";
|
|
142772
142734
|
|
|
142773
142735
|
// utils/diffCoverage.ts
|
|
@@ -142796,7 +142758,10 @@ function createDiffCoverageState(params) {
|
|
|
142796
142758
|
totalLines: params.totalLines,
|
|
142797
142759
|
tocEntries: parseDiffTocEntries({ toc: params.toc }),
|
|
142798
142760
|
coveredRanges: [],
|
|
142799
|
-
|
|
142761
|
+
// carry forward across checkout_pr refreshes so the nudge stays "once per
|
|
142762
|
+
// review session". coveredRanges are intentionally not carried because
|
|
142763
|
+
// line numbers are tied to the previous diff's content.
|
|
142764
|
+
coveragePreflightRan: params.previous?.coveragePreflightRan ?? false
|
|
142800
142765
|
};
|
|
142801
142766
|
}
|
|
142802
142767
|
function recordDiffReadFromToolUse(params) {
|
|
@@ -143600,6 +143565,11 @@ var GitFetch = type({
|
|
|
143600
143565
|
ref: type.string.describe("Ref to fetch: branch name, tag, or 'pull/N/head' for PRs"),
|
|
143601
143566
|
depth: type.number.describe("Fetch depth (for shallow clones)").optional()
|
|
143602
143567
|
});
|
|
143568
|
+
var SHALLOW_UNREACHABLE_PATTERNS = [
|
|
143569
|
+
/Could not read [a-f0-9]{40,64}/,
|
|
143570
|
+
/remote did not send all necessary objects/
|
|
143571
|
+
];
|
|
143572
|
+
var DEEPEN_RETRY_DEPTH = 1e3;
|
|
143603
143573
|
function GitFetchTool(ctx) {
|
|
143604
143574
|
return tool({
|
|
143605
143575
|
name: "git_fetch",
|
|
@@ -143611,9 +143581,20 @@ function GitFetchTool(ctx) {
|
|
|
143611
143581
|
if (params.depth !== void 0) {
|
|
143612
143582
|
fetchArgs.push(`--depth=${params.depth}`);
|
|
143613
143583
|
}
|
|
143614
|
-
|
|
143615
|
-
token: ctx.gitToken
|
|
143616
|
-
})
|
|
143584
|
+
try {
|
|
143585
|
+
await $git("fetch", fetchArgs, { token: ctx.gitToken });
|
|
143586
|
+
} catch (err) {
|
|
143587
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
143588
|
+
const isShallowUnreachable = SHALLOW_UNREACHABLE_PATTERNS.some((p2) => p2.test(msg));
|
|
143589
|
+
const isShallow = isShallowUnreachable && $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
|
|
143590
|
+
if (!isShallow) throw err;
|
|
143591
|
+
log.info(
|
|
143592
|
+
`\xBB git_fetch hit shallow-unreachable error, retrying with --deepen=${DEEPEN_RETRY_DEPTH}`
|
|
143593
|
+
);
|
|
143594
|
+
await $git("fetch", [`--deepen=${DEEPEN_RETRY_DEPTH}`, "--no-tags", "origin", params.ref], {
|
|
143595
|
+
token: ctx.gitToken
|
|
143596
|
+
});
|
|
143597
|
+
}
|
|
143617
143598
|
return { success: true, ref: params.ref };
|
|
143618
143599
|
})
|
|
143619
143600
|
});
|
|
@@ -143679,6 +143660,12 @@ function getHttpStatus(err) {
|
|
|
143679
143660
|
const status = err.status;
|
|
143680
143661
|
return typeof status === "number" ? status : void 0;
|
|
143681
143662
|
}
|
|
143663
|
+
function isTransientReviewError(err) {
|
|
143664
|
+
if (getHttpStatus(err) !== 422) return false;
|
|
143665
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
143666
|
+
return /internal error occurred, please try again/i.test(msg);
|
|
143667
|
+
}
|
|
143668
|
+
var TRANSIENT_REVIEW_RETRY_DELAYS_MS = [1e3, 3e3];
|
|
143682
143669
|
function commentableLinesForFile(patch) {
|
|
143683
143670
|
const right = /* @__PURE__ */ new Set();
|
|
143684
143671
|
const left = /* @__PURE__ */ new Set();
|
|
@@ -143946,12 +143933,26 @@ function CreatePullRequestReviewTool(ctx) {
|
|
|
143946
143933
|
}
|
|
143947
143934
|
let result;
|
|
143948
143935
|
try {
|
|
143949
|
-
result =
|
|
143950
|
-
body,
|
|
143951
|
-
|
|
143952
|
-
|
|
143953
|
-
|
|
143936
|
+
result = await retry(
|
|
143937
|
+
() => body ? createAndSubmitWithFooter(ctx, params, {
|
|
143938
|
+
body,
|
|
143939
|
+
approved: approved ?? false,
|
|
143940
|
+
hasComments: (params.comments?.length ?? 0) > 0
|
|
143941
|
+
}) : createReviewWithStrandedRecovery(ctx, params),
|
|
143942
|
+
{
|
|
143943
|
+
delaysMs: TRANSIENT_REVIEW_RETRY_DELAYS_MS,
|
|
143944
|
+
shouldRetry: isTransientReviewError,
|
|
143945
|
+
label: "review submission"
|
|
143946
|
+
}
|
|
143947
|
+
);
|
|
143954
143948
|
} catch (err) {
|
|
143949
|
+
if (isTransientReviewError(err)) {
|
|
143950
|
+
const rawMsg2 = err instanceof Error ? err.message : String(err);
|
|
143951
|
+
throw new Error(
|
|
143952
|
+
`GitHub returned a transient 422 "internal error" on the reviews endpoint after ${TRANSIENT_REVIEW_RETRY_DELAYS_MS.length + 1} attempts. This is a GitHub-side issue, not a problem with your review content. Do NOT modify or drop inline comments \u2014 their content is not the cause. Wait ~30 seconds and call this tool once more with the SAME arguments. If it still fails, submit a body-only review (move all inline feedback into \`body\` as text) so nothing is lost. GitHub said: ${rawMsg2}`,
|
|
143953
|
+
{ cause: err }
|
|
143954
|
+
);
|
|
143955
|
+
}
|
|
143955
143956
|
if (getHttpStatus(err) !== 422 || !params.comments?.length) throw err;
|
|
143956
143957
|
const details = params.comments.map((c2) => {
|
|
143957
143958
|
const line = c2.line ?? 0;
|
|
@@ -143978,6 +143979,7 @@ function CreatePullRequestReviewTool(ctx) {
|
|
|
143978
143979
|
nodeId: reviewNodeId,
|
|
143979
143980
|
reviewedSha: actuallyReviewedSha
|
|
143980
143981
|
};
|
|
143982
|
+
ctx.toolState.wasUpdated = true;
|
|
143981
143983
|
await deleteProgressComment(ctx).catch((err) => {
|
|
143982
143984
|
log.debug(`progress comment cleanup after review failed: ${err}`);
|
|
143983
143985
|
});
|
|
@@ -144329,11 +144331,38 @@ async function ensureBeforeShaReachable(params) {
|
|
|
144329
144331
|
return false;
|
|
144330
144332
|
}
|
|
144331
144333
|
}
|
|
144334
|
+
var STALE_LOCK_AGE_MS = 3e4;
|
|
144335
|
+
var GIT_LOCK_PATHS = [
|
|
144336
|
+
".git/shallow.lock",
|
|
144337
|
+
".git/index.lock",
|
|
144338
|
+
".git/objects/maintenance.lock"
|
|
144339
|
+
];
|
|
144340
|
+
function cleanupStaleGitLocks() {
|
|
144341
|
+
const now = Date.now();
|
|
144342
|
+
for (const relPath of GIT_LOCK_PATHS) {
|
|
144343
|
+
let mtimeMs;
|
|
144344
|
+
try {
|
|
144345
|
+
mtimeMs = statSync(relPath).mtimeMs;
|
|
144346
|
+
} catch {
|
|
144347
|
+
continue;
|
|
144348
|
+
}
|
|
144349
|
+
if (now - mtimeMs < STALE_LOCK_AGE_MS) continue;
|
|
144350
|
+
try {
|
|
144351
|
+
unlinkSync2(relPath);
|
|
144352
|
+
log.warning(`\xBB removed stale ${relPath} from prior run`);
|
|
144353
|
+
} catch (e) {
|
|
144354
|
+
log.debug(
|
|
144355
|
+
`\xBB failed to remove stale ${relPath}: ${e instanceof Error ? e.message : String(e)}`
|
|
144356
|
+
);
|
|
144357
|
+
}
|
|
144358
|
+
}
|
|
144359
|
+
}
|
|
144332
144360
|
async function checkoutPrBranch(pr, params) {
|
|
144333
144361
|
const { octokit, owner, name, gitToken, toolState, beforeSha } = params;
|
|
144334
144362
|
log.info(`\xBB checking out PR #${pr.number}...`);
|
|
144335
144363
|
rejectIfLeadingDash(pr.baseRef, "PR base ref");
|
|
144336
144364
|
rejectIfLeadingDash(pr.headRef, "PR head ref");
|
|
144365
|
+
cleanupStaleGitLocks();
|
|
144337
144366
|
const isFork = pr.headRepoFullName !== pr.baseRepoFullName;
|
|
144338
144367
|
const localBranch = `pr-${pr.number}`;
|
|
144339
144368
|
const isShallow = $("git", ["rev-parse", "--is-shallow-repository"], { log: false }).trim() === "true";
|
|
@@ -144503,7 +144532,8 @@ ${diffPreview}`);
|
|
|
144503
144532
|
ctx.toolState.diffCoverage = createDiffCoverageState({
|
|
144504
144533
|
diffPath,
|
|
144505
144534
|
totalLines: countLines({ content: formatResult.content }),
|
|
144506
|
-
toc: formatResult.toc
|
|
144535
|
+
toc: formatResult.toc,
|
|
144536
|
+
previous: ctx.toolState.diffCoverage
|
|
144507
144537
|
});
|
|
144508
144538
|
log.debug(
|
|
144509
144539
|
`\xBB diff coverage initialized: diffPath=${diffPath}, totalLines=${ctx.toolState.diffCoverage.totalLines}, tocEntries=${ctx.toolState.diffCoverage.tocEntries.length}`
|
|
@@ -145133,6 +145163,7 @@ function UpdatePullRequestBodyTool(ctx) {
|
|
|
145133
145163
|
pull_number: params.pull_number,
|
|
145134
145164
|
body: bodyWithFooter
|
|
145135
145165
|
});
|
|
145166
|
+
ctx.toolState.wasUpdated = true;
|
|
145136
145167
|
return {
|
|
145137
145168
|
success: true,
|
|
145138
145169
|
number: result.data.number,
|
|
@@ -145733,425 +145764,10 @@ function ResolveReviewThreadTool(ctx) {
|
|
|
145733
145764
|
});
|
|
145734
145765
|
}
|
|
145735
145766
|
|
|
145736
|
-
// agents/reviewer.ts
|
|
145737
|
-
var REVIEWER_AGENT_NAME = "reviewfrog";
|
|
145738
|
-
var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
|
|
145739
|
-
|
|
145740
|
-
HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
|
|
145741
|
-
- Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
|
|
145742
|
-
- Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
|
|
145743
|
-
- Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
|
|
145744
|
-
- Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
|
|
145745
|
-
|
|
145746
|
-
Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
|
|
145747
|
-
|
|
145748
|
-
// modes.ts
|
|
145749
|
-
var PR_SUMMARY_FORMAT = `### Default format
|
|
145750
|
-
|
|
145751
|
-
Follow this structure exactly:
|
|
145752
|
-
|
|
145753
|
-
<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
|
|
145754
|
-
NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
|
|
145755
|
-
|
|
145756
|
-
### Key changes
|
|
145757
|
-
|
|
145758
|
-
- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
|
|
145759
|
-
|
|
145760
|
-
<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
|
|
145761
|
-
NOTE: the metadata line goes AFTER the bullet list, not before it.
|
|
145762
|
-
|
|
145763
|
-
Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
|
|
145764
|
-
|
|
145765
|
-
<br/>
|
|
145766
|
-
|
|
145767
|
-
## Example readable section title
|
|
145768
|
-
|
|
145769
|
-
> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
|
|
145770
|
-
IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
|
|
145771
|
-
|
|
145772
|
-
1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
|
|
145773
|
-
|
|
145774
|
-
If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
|
|
145775
|
-
> <details><summary>How does X work?</summary>
|
|
145776
|
-
> Extended explanation here.
|
|
145777
|
-
> </details>
|
|
145778
|
-
|
|
145779
|
-
End each section with a file links trail (3-4 key files max):
|
|
145780
|
-
[\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
|
|
145781
|
-
|
|
145782
|
-
Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
|
|
145783
|
-
|
|
145784
|
-
CRITICAL \u2014 GitHub markdown rendering rule:
|
|
145785
|
-
GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
|
|
145786
|
-
|
|
145787
|
-
Rules:
|
|
145788
|
-
- \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
|
|
145789
|
-
- ALL variable names, identifiers, and file names in body text must be in backticks
|
|
145790
|
-
- ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
|
|
145791
|
-
- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
|
|
145792
|
-
- Do NOT include raw diff stats like '+123 / -45' or line counts
|
|
145793
|
-
- Do NOT include code blocks or repeat diff contents
|
|
145794
|
-
- Do NOT include a changelog section \u2014 the key changes list serves this purpose
|
|
145795
|
-
- Focus on *intent*, not *what* \u2014 the diff already shows what changed
|
|
145796
|
-
- Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
|
|
145797
|
-
function learningsStep(t2, n) {
|
|
145798
|
-
return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t2("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
|
|
145799
|
-
}
|
|
145800
|
-
function computeModes(agentId) {
|
|
145801
|
-
const t2 = (toolName) => formatMcpToolRef(agentId, toolName);
|
|
145802
|
-
return [
|
|
145803
|
-
{
|
|
145804
|
-
name: "Build",
|
|
145805
|
-
description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
|
|
145806
|
-
prompt: `### Checklist
|
|
145807
|
-
|
|
145808
|
-
1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
|
|
145809
|
-
|
|
145810
|
-
2. **setup**: checkout or create the branch:
|
|
145811
|
-
- **PR event, modifying the existing PR**: call \`${t2("checkout_pr")}\`
|
|
145812
|
-
- **new branch**: use \`${t2("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
|
|
145813
|
-
|
|
145814
|
-
3. **build**: implement changes using your native file and shell tools:
|
|
145815
|
-
- follow the plan (if you ran a plan phase)
|
|
145816
|
-
- plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
|
|
145817
|
-
- run relevant tests/lints before committing
|
|
145818
|
-
|
|
145819
|
-
4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
|
|
145820
|
-
|
|
145821
|
-
Skip self-review (commit directly) when the diff is **genuinely trivial**:
|
|
145822
|
-
- doc typos, comment-only edits, whitespace/format-only, import reordering
|
|
145823
|
-
- lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
|
|
145824
|
-
- low-risk dep patch bump from a trusted source
|
|
145825
|
-
|
|
145826
|
-
Run self-review when the diff has **any behavioral surface, however small**:
|
|
145827
|
-
- 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
|
|
145828
|
-
- any change to money / tax / currency / billing / fee / refund / payout calculations or constants
|
|
145829
|
-
- any change to auth / permissions / roles / sessions / tokens / signature verification
|
|
145830
|
-
- any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
|
|
145831
|
-
- new endpoints, new code paths, new error branches \u2014 even small ones
|
|
145832
|
-
- mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
|
|
145833
|
-
- anything you're uncertain about
|
|
145834
|
-
|
|
145835
|
-
Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
|
|
145836
|
-
|
|
145837
|
-
Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
|
|
145838
|
-
|
|
145839
|
-
Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
|
|
145840
|
-
|
|
145841
|
-
Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
|
|
145842
|
-
- Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
|
|
145843
|
-
- Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
|
|
145844
|
-
- Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
|
|
145845
|
-
- Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
|
|
145846
|
-
- For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
|
|
145847
|
-
|
|
145848
|
-
Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is not enough \u2014 a fix that improves correctness while degrading elegance still degrades the codebase. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
|
|
145849
|
-
|
|
145850
|
-
5. **finalize**:
|
|
145851
|
-
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
|
|
145852
|
-
- create a PR via \`${t2("create_pull_request")}\`
|
|
145853
|
-
- call \`${t2("report_progress")}\` with the PR link or the exact error if push/PR failed
|
|
145854
|
-
|
|
145855
|
-
${learningsStep(t2, 6)}
|
|
145856
|
-
|
|
145857
|
-
### Notes
|
|
145858
|
-
|
|
145859
|
-
For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
145860
|
-
},
|
|
145861
|
-
{
|
|
145862
|
-
name: "AddressReviews",
|
|
145863
|
-
description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
|
|
145864
|
-
prompt: `### Checklist
|
|
145865
|
-
|
|
145866
|
-
1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
|
|
145867
|
-
|
|
145868
|
-
2. Fetch review comments via \`${t2("get_review_comments")}\`.
|
|
145869
|
-
|
|
145870
|
-
3. For each comment:
|
|
145871
|
-
- understand the feedback
|
|
145872
|
-
- evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is not enough; improving correctness while degrading elegance still degrades the code.
|
|
145873
|
-
- if the request stands, make the code change using your native tools; otherwise reply explaining why
|
|
145874
|
-
- record what was done (or why nothing was done)
|
|
145875
|
-
|
|
145876
|
-
4. Quality check:
|
|
145877
|
-
- test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
|
|
145878
|
-
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
145879
|
-
|
|
145880
|
-
5. Finalize:
|
|
145881
|
-
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
145882
|
-
- reply to each comment using \`${t2("reply_to_review_comment")}\`
|
|
145883
|
-
- resolve addressed threads via \`${t2("resolve_review_thread")}\`
|
|
145884
|
-
- call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)
|
|
145885
|
-
|
|
145886
|
-
${learningsStep(t2, 6)}`
|
|
145887
|
-
},
|
|
145888
|
-
// Review and IncrementalReview use the multi-lens orchestrator pattern
|
|
145889
|
-
// (canonical source: .claude/commands/anneal.md). The orchestrator does
|
|
145890
|
-
// triage → parallel read-only subagent fan-out → aggregate → draft comments
|
|
145891
|
-
// → submit. For someone else's PR, parallel lenses (correctness, security,
|
|
145892
|
-
// research-validated claims, user-journey, etc.) provide breadth across
|
|
145893
|
-
// angles that a single subagent can't carry coherently. Build mode keeps
|
|
145894
|
-
// a single fresh-eyes subagent (different problem shape — orchestrator
|
|
145895
|
-
// wrote the code and bias-mitigation comes from delegating to one
|
|
145896
|
-
// subagent that doesn't share the implementation context).
|
|
145897
|
-
// Deliberate omission vs canonical /anneal: severity categorization in the
|
|
145898
|
-
// final message (the review body has its own CAUTION/IMPORTANT framing
|
|
145899
|
-
// instead of a severity table).
|
|
145900
|
-
{
|
|
145901
|
-
name: "Review",
|
|
145902
|
-
description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
|
|
145903
|
-
prompt: `### Checklist
|
|
145904
|
-
|
|
145905
|
-
1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
|
|
145906
|
-
|
|
145907
|
-
2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
|
|
145908
|
-
|
|
145909
|
-
if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit \`Reviewed \u2014 no issues found.\` per step 5. there's no value in dispatching even one lens for a typo.
|
|
145910
|
-
|
|
145911
|
-
"Genuinely trivial" (skip):
|
|
145912
|
-
- single-word doc typo, whitespace/format-only, comment-only across any number of files
|
|
145913
|
-
- lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
|
|
145914
|
-
- mechanical rename whose only effect is import-path updates
|
|
145915
|
-
- low-risk dep patch bump
|
|
145916
|
-
|
|
145917
|
-
"Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
|
|
145918
|
-
- any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
|
|
145919
|
-
- flipping a feature-flag default, default config value, or retry/timeout constant
|
|
145920
|
-
- changing a money/tax/currency/fee constant by any amount
|
|
145921
|
-
- changing an HTTP method, redirect URL, response code, or status enum
|
|
145922
|
-
- tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
|
|
145923
|
-
- renaming a public API surface (still trivial in shape, but needs an impact lens)
|
|
145924
|
-
- adding a new direct dependency (supply-chain surface)
|
|
145925
|
-
- any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
|
|
145926
|
-
- mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
|
|
145927
|
-
|
|
145928
|
-
When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
|
|
145929
|
-
|
|
145930
|
-
otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
|
|
145931
|
-
|
|
145932
|
-
- **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
|
|
145933
|
-
- **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
|
|
145934
|
-
- **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
|
|
145935
|
-
- **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
|
|
145936
|
-
|
|
145937
|
-
lenses come in two flavors, and you can mix them:
|
|
145938
|
-
- **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
|
|
145939
|
-
- **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
|
|
145940
|
-
|
|
145941
|
-
starter menu (combine, omit, or invent your own):
|
|
145942
|
-
- **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
|
|
145943
|
-
- **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
|
|
145944
|
-
- **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
|
|
145945
|
-
- **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
|
|
145946
|
-
- **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
|
|
145947
|
-
- **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
|
|
145948
|
-
- **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
|
|
145949
|
-
- **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
|
|
145950
|
-
- **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
|
|
145951
|
-
- **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
|
|
145952
|
-
- **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
|
|
145953
|
-
|
|
145954
|
-
3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
|
|
145955
|
-
- the diff path / target \u2014 reading the diff and the codebase is its job
|
|
145956
|
-
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
145957
|
-
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
145958
|
-
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
145959
|
-
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
|
|
145960
|
-
- ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
|
|
145961
|
-
|
|
145962
|
-
delegation discipline:
|
|
145963
|
-
- do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
|
|
145964
|
-
- do NOT summarize the PR for them (biases toward a validation frame)
|
|
145965
|
-
- do NOT hand them a curated reading list (let them discover scope)
|
|
145966
|
-
- do NOT pre-shape their output with a finding schema
|
|
145967
|
-
- do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
|
|
145968
|
-
|
|
145969
|
-
4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
|
|
145970
|
-
|
|
145971
|
-
for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
|
|
145972
|
-
|
|
145973
|
-
5. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
|
|
145974
|
-
|
|
145975
|
-
note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
145976
|
-
|
|
145977
|
-
- **critical issues** (blocks merge \u2014 bugs, security, data loss):
|
|
145978
|
-
\`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
|
|
145979
|
-
\`> [!CAUTION]\\n> This PR introduces a race condition in ...\`
|
|
145980
|
-
Follow with a brief summary if needed. Include all inline comments.
|
|
145981
|
-
- **recommended changes** (non-critical):
|
|
145982
|
-
\`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
|
|
145983
|
-
\`> [!IMPORTANT]\\n> Consider adding input validation for ...\`
|
|
145984
|
-
Follow with a brief summary if needed. Include all inline comments.
|
|
145985
|
-
- **no actionable issues**:
|
|
145986
|
-
\`approved: true\`, body: "Reviewed \u2014 no issues found."`
|
|
145987
|
-
},
|
|
145988
|
-
// IncrementalReview shares Review's multi-lens orchestrator pattern but
|
|
145989
|
-
// scopes the target to the incremental diff and adds prior-review-feedback
|
|
145990
|
-
// tracking. The "issues must be NEW since the last Pullfrog review" filter
|
|
145991
|
-
// lives at aggregation time (step 5), NOT in the subagent prompt — pushing
|
|
145992
|
-
// the filter into subagents matches the canonical anneal anti-pattern of
|
|
145993
|
-
// "list known pre-existing failures — don't flag these" and suppresses
|
|
145994
|
-
// signal on regressions the new commits amplified. The body-format rules
|
|
145995
|
-
// (Reviewed changes / Prior review feedback) are unchanged from the prior
|
|
145996
|
-
// version. Same severity-table omission as Review.
|
|
145997
|
-
{
|
|
145998
|
-
name: "IncrementalReview",
|
|
145999
|
-
description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
|
|
146000
|
-
prompt: `### Checklist
|
|
146001
|
-
|
|
146002
|
-
1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
|
|
146003
|
-
|
|
146004
|
-
2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
|
|
146005
|
-
|
|
146006
|
-
3. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll need this in step 6 to track which prior comments were addressed.
|
|
146007
|
-
|
|
146008
|
-
4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
|
|
146009
|
-
|
|
146010
|
-
if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
|
|
146011
|
-
|
|
146012
|
-
"Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
|
|
146013
|
-
"Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
|
|
146014
|
-
When unsure, treat as non-trivial.
|
|
146015
|
-
|
|
146016
|
-
otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
|
|
146017
|
-
|
|
146018
|
-
dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
|
|
146019
|
-
- the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
|
|
146020
|
-
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
146021
|
-
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
146022
|
-
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
146023
|
-
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
|
|
146024
|
-
- ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
|
|
146025
|
-
|
|
146026
|
-
delegation discipline:
|
|
146027
|
-
- do NOT lens-review the diff yourself in parallel with the subagents
|
|
146028
|
-
- do NOT summarize the changes for them (biases toward validation frame)
|
|
146029
|
-
- do NOT hand them a curated reading list (let them discover scope)
|
|
146030
|
-
- do NOT pre-shape their output with a finding schema
|
|
146031
|
-
- do NOT mention the other lenses (independence is the point)
|
|
146032
|
-
|
|
146033
|
-
5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
|
|
146034
|
-
|
|
146035
|
-
then check: which prior review comments were addressed by the new commits? track the addressed ones for step 6b.
|
|
146036
|
-
|
|
146037
|
-
6. **build the review body** \u2014 two distinct sections:
|
|
146038
|
-
a. **Reviewed changes**: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed.
|
|
146039
|
-
b. **Prior review feedback** (only if any were addressed): list only the prior review comments that WERE addressed by the new commits (\`- [x] safeParse instead of parse \u2014 addressed\`). omit unaddressed comments. omit this entire section if nothing was addressed. a change can appear in both sections.
|
|
146040
|
-
- no headings, no tables, no prose paragraphs in either section \u2014 just bullets
|
|
146041
|
-
- in some cases you may receive a complete diff for the whole pull request instead of an incremental one. when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
|
|
146042
|
-
|
|
146043
|
-
7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. the review body always includes the reviewed changes from step 6a. append \`Prior review feedback:\\n\` with the checklist from step 6b only if any prior comments were addressed. Follow these rules:
|
|
146044
|
-
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
146045
|
-
- IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
|
|
146046
|
-
- ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the reviewed changes summary and prior feedback (if any).
|
|
146047
|
-
- ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the reviewed changes summary and prior feedback (if any).
|
|
146048
|
-
- ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t2("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the reviewed changes summary and prior feedback (if any).`
|
|
146049
|
-
},
|
|
146050
|
-
{
|
|
146051
|
-
name: "Plan",
|
|
146052
|
-
description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
|
|
146053
|
-
prompt: `### Checklist
|
|
146054
|
-
|
|
146055
|
-
1. Analyze the task and gather context:
|
|
146056
|
-
- read AGENTS.md and relevant codebase files
|
|
146057
|
-
- understand the architecture and constraints
|
|
146058
|
-
|
|
146059
|
-
2. Produce a structured, actionable plan with clear milestones.
|
|
146060
|
-
|
|
146061
|
-
3. Call \`${t2("report_progress")}\` with the plan.
|
|
146062
|
-
|
|
146063
|
-
${learningsStep(t2, 4)}`
|
|
146064
|
-
},
|
|
146065
|
-
{
|
|
146066
|
-
name: "Fix",
|
|
146067
|
-
description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
|
|
146068
|
-
prompt: `### Checklist
|
|
146069
|
-
|
|
146070
|
-
1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
|
|
146071
|
-
|
|
146072
|
-
2. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
|
|
146073
|
-
|
|
146074
|
-
3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
|
|
146075
|
-
|
|
146076
|
-
4. Diagnose and fix:
|
|
146077
|
-
- read the workflow file, reproduce locally with the EXACT same commands CI runs
|
|
146078
|
-
- fix the issue using your native file and shell tools
|
|
146079
|
-
- verify the fix by re-running the exact CI command
|
|
146080
|
-
- review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
|
|
146081
|
-
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
146082
|
-
|
|
146083
|
-
5. Finalize:
|
|
146084
|
-
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
146085
|
-
- call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
|
|
146086
|
-
|
|
146087
|
-
${learningsStep(t2, 6)}`
|
|
146088
|
-
},
|
|
146089
|
-
{
|
|
146090
|
-
name: "ResolveConflicts",
|
|
146091
|
-
description: "Resolve merge conflicts in a PR branch against the base branch",
|
|
146092
|
-
prompt: `### Checklist
|
|
146093
|
-
|
|
146094
|
-
1. **Setup**:
|
|
146095
|
-
- Call \`${t2("checkout_pr")}\` to get the PR branch.
|
|
146096
|
-
- Call \`${t2("get_pull_request")}\` to identify the base branch (e.g., 'main').
|
|
146097
|
-
- Call \`${t2("git_fetch")}\` to fetch the base branch.
|
|
146098
|
-
|
|
146099
|
-
2. **Merge Attempt**:
|
|
146100
|
-
- Run \`git merge origin/<base_branch>\` via shell.
|
|
146101
|
-
- If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
|
|
146102
|
-
- If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
|
|
146103
|
-
|
|
146104
|
-
3. **Resolve Conflicts**:
|
|
146105
|
-
- Run \`git status\` or parse the merge output to find the list of conflicting files.
|
|
146106
|
-
- For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
|
|
146107
|
-
- Verify the file syntax is correct after resolution.
|
|
146108
|
-
|
|
146109
|
-
4. **Finalize**:
|
|
146110
|
-
- Run a final verification (build/test) to ensure the resolution works.
|
|
146111
|
-
- \`git add . && git commit -m "resolve merge conflicts"\`
|
|
146112
|
-
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
146113
|
-
- Call \`${t2("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
|
|
146114
|
-
},
|
|
146115
|
-
{
|
|
146116
|
-
name: "Task",
|
|
146117
|
-
description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
|
|
146118
|
-
prompt: `### Checklist
|
|
146119
|
-
|
|
146120
|
-
1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
|
|
146121
|
-
|
|
146122
|
-
2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
|
|
146123
|
-
- plan your approach before starting
|
|
146124
|
-
- use native file and shell tools for local operations
|
|
146125
|
-
- use ${pullfrogMcpName} MCP tools for GitHub/git operations
|
|
146126
|
-
- if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
|
|
146127
|
-
|
|
146128
|
-
3. Finalize:
|
|
146129
|
-
- if code changes were made, push to a pull request (new or existing) using \`${t2("push_branch")}\` and \`${t2("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
|
|
146130
|
-
- call \`${t2("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
|
|
146131
|
-
- if the task involved labeling, commenting, or other GitHub operations, perform those directly
|
|
146132
|
-
|
|
146133
|
-
${learningsStep(t2, 4)}`
|
|
146134
|
-
},
|
|
146135
|
-
{
|
|
146136
|
-
name: "Summarize",
|
|
146137
|
-
description: "Summarize a PR with a structured comment that is updated in place on subsequent pushes",
|
|
146138
|
-
prompt: `### Checklist
|
|
146139
|
-
|
|
146140
|
-
1. Checkout the PR via \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
|
|
146141
|
-
2. Read the diff using the TOC to selectively read relevant sections (not the entire file). Produce a structured summary. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
|
|
146142
|
-
3. Call \`${t2("create_issue_comment")}\` with \`type: "Summary"\` and the summary body.
|
|
146143
|
-
4. Call \`${t2("report_progress")}\` with a brief note (e.g., "Posted PR summary.").
|
|
146144
|
-
|
|
146145
|
-
${PR_SUMMARY_FORMAT}`
|
|
146146
|
-
}
|
|
146147
|
-
];
|
|
146148
|
-
}
|
|
146149
|
-
var modes = computeModes("opencode");
|
|
146150
|
-
|
|
146151
145767
|
// mcp/selectMode.ts
|
|
146152
145768
|
var SelectModeParams = type({
|
|
146153
145769
|
mode: type.string.describe(
|
|
146154
|
-
"the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts'
|
|
145770
|
+
"the name of the mode to select (e.g., 'Build', 'Plan', 'Review', 'IncrementalReview', 'Fix', 'AddressReviews', 'Task', 'ResolveConflicts')"
|
|
146155
145771
|
),
|
|
146156
145772
|
"issue_number?": type("number").describe(
|
|
146157
145773
|
"optional issue number; when provided with Plan mode, used to look up an existing plan comment for this issue (edit vs create)"
|
|
@@ -146172,18 +145788,7 @@ An existing plan comment was found for this issue. Update that comment with the
|
|
|
146172
145788
|
- gather relevant codebase context (file paths, architecture notes from AGENTS.md)
|
|
146173
145789
|
- produce a structured plan with clear milestones
|
|
146174
145790
|
3. Call \`${t2("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
|
|
146175
|
-
4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping..."
|
|
146176
|
-
SummaryUpdate: `### Checklist (updating existing summary)
|
|
146177
|
-
|
|
146178
|
-
An existing summary comment was found for this PR. Update it rather than creating a new one.
|
|
146179
|
-
|
|
146180
|
-
1. Use \`previousSummaryBody\` from this response as the current summary to revise.
|
|
146181
|
-
2. Checkout the PR via \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`.
|
|
146182
|
-
3. Read the diff using the TOC to selectively read relevant sections. Produce an updated summary reflecting the current state of the PR, using the existing summary (\`previousSummaryBody\`) as a starting point. If EVENT INSTRUCTIONS specify a custom format, follow that instead of the default format below.
|
|
146183
|
-
4. Call \`${t2("edit_issue_comment")}\` with \`commentId: existingSummaryCommentId\` (from this response) and the updated summary body.
|
|
146184
|
-
5. Call \`${t2("report_progress")}\` with a brief note (e.g., "Updated PR summary.").
|
|
146185
|
-
|
|
146186
|
-
${PR_SUMMARY_FORMAT}`
|
|
145791
|
+
4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping...".`
|
|
146187
145792
|
};
|
|
146188
145793
|
}
|
|
146189
145794
|
var modeInstructionParent = {
|
|
@@ -146216,30 +145821,22 @@ async function fetchExistingPlanComment(ctx, issueNumber) {
|
|
|
146216
145821
|
return null;
|
|
146217
145822
|
}
|
|
146218
145823
|
}
|
|
146219
|
-
|
|
146220
|
-
|
|
146221
|
-
|
|
146222
|
-
|
|
146223
|
-
|
|
146224
|
-
|
|
146225
|
-
|
|
146226
|
-
|
|
146227
|
-
|
|
146228
|
-
|
|
146229
|
-
|
|
146230
|
-
|
|
146231
|
-
|
|
146232
|
-
|
|
146233
|
-
|
|
146234
|
-
|
|
146235
|
-
}
|
|
146236
|
-
const errMsg = "error" in data ? data.error : "(no error body)";
|
|
146237
|
-
log.warning(`fetchExistingSummaryComment: ${response.status} ${path3} \u2014 ${errMsg}`);
|
|
146238
|
-
return null;
|
|
146239
|
-
} catch (error49) {
|
|
146240
|
-
log.warning("fetchExistingSummaryComment failed:", error49);
|
|
146241
|
-
return null;
|
|
146242
|
-
}
|
|
145824
|
+
var SUMMARY_MODES = /* @__PURE__ */ new Set(["Review", "IncrementalReview", "Task"]);
|
|
145825
|
+
function buildSummaryAddendum(t2, ctx) {
|
|
145826
|
+
const filePath = ctx.toolState.summaryFilePath;
|
|
145827
|
+
if (!filePath) return "";
|
|
145828
|
+
return `### PR summary snapshot \u2014 required step
|
|
145829
|
+
|
|
145830
|
+
A rolling PR summary lives at \`${filePath}\`. It is your durable cross-run agent context \u2014 a functional summary of what this PR does, the subsystems and files it touches, the material behavior of its changes, and any risks or open questions worth carrying forward. It is NOT a chronological log of past review runs; commit-level history can already be reconstructed from \`${t2("list_pull_request_reviews")}\`.
|
|
145831
|
+
|
|
145832
|
+
How to use it:
|
|
145833
|
+
|
|
145834
|
+
- read \`${filePath}\` at the START of the run, alongside the diff. it represents what previous agent runs already understood about this PR \u2014 absorb it before picking lenses or crafting subagent dispatch prompts. if it's a fresh seed (file is one or two lines), this is a first review and you'll be filling it in from the diff.
|
|
145835
|
+
- let the snapshot inform triage and dispatch. when it already tracks a risk, your lens prompts to subagents are stronger when they reference that context (e.g. "the JSDoc explicitly scopes to code points \u2014 do not flag grapheme-cluster issues" if the snapshot already documents that contract). when something the snapshot tracks is now resolved by new commits, note that. when new commits introduce something the snapshot doesn't yet describe, that's exactly where your fan-out should focus.
|
|
145836
|
+
- update the file in place to reflect the PR's CURRENT state. revise stale claims, drop resolved risks, add new behavior or risks. accuracy over breadth \u2014 every claim must be grounded in the diff. write for the next agent run, not for a human.
|
|
145837
|
+
- structure however serves THIS PR. there is no required section template. a refactor might organize by renamed export and call-site impact; a feature by capability; a billing change by money path. a compact note of which commit ranges have been reviewed should always be present so future runs scope correctly, but the rest is your call. when the structure works across runs, keep it stable so range-diffs are clean; when the PR's character changes (e.g. scope expands), reshape.
|
|
145838
|
+
|
|
145839
|
+
Do NOT call \`${t2("create_issue_comment")}\` for the summary \u2014 the server reads this file at end-of-run and persists it. The file edit is mandatory regardless of whether a review is submitted; the snapshot feeds the next run.`;
|
|
146243
145840
|
}
|
|
146244
145841
|
function SelectModeTool(ctx) {
|
|
146245
145842
|
const t2 = (name) => formatMcpToolRef(ctx.agentId, name);
|
|
@@ -146281,21 +145878,18 @@ function SelectModeTool(ctx) {
|
|
|
146281
145878
|
}
|
|
146282
145879
|
}
|
|
146283
145880
|
}
|
|
146284
|
-
|
|
146285
|
-
|
|
146286
|
-
|
|
146287
|
-
|
|
146288
|
-
|
|
146289
|
-
|
|
146290
|
-
|
|
146291
|
-
|
|
146292
|
-
|
|
146293
|
-
|
|
146294
|
-
};
|
|
146295
|
-
}
|
|
146296
|
-
}
|
|
145881
|
+
const summaryAddendum = SUMMARY_MODES.has(selectedMode.name) ? buildSummaryAddendum(t2, ctx) : "";
|
|
145882
|
+
const base = buildOrchestratorGuidance(ctx, selectedMode);
|
|
145883
|
+
if (summaryAddendum.length > 0) {
|
|
145884
|
+
return {
|
|
145885
|
+
...base,
|
|
145886
|
+
orchestratorGuidance: `${base.orchestratorGuidance}
|
|
145887
|
+
|
|
145888
|
+
${summaryAddendum}`,
|
|
145889
|
+
summaryFilePath: ctx.toolState.summaryFilePath
|
|
145890
|
+
};
|
|
146297
145891
|
}
|
|
146298
|
-
return
|
|
145892
|
+
return base;
|
|
146299
145893
|
})
|
|
146300
145894
|
});
|
|
146301
145895
|
}
|
|
@@ -146809,6 +146403,405 @@ async function startMcpHttpServer(ctx, options) {
|
|
|
146809
146403
|
};
|
|
146810
146404
|
}
|
|
146811
146405
|
|
|
146406
|
+
// agents/reviewer.ts
|
|
146407
|
+
var REVIEWER_AGENT_NAME = "reviewfrog";
|
|
146408
|
+
var REVIEWER_SYSTEM_PROMPT = `You are a read-only review subagent. Your role is to find flaws in code or artifacts provided by the orchestrator and report findings \u2014 never to modify state.
|
|
146409
|
+
|
|
146410
|
+
HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):
|
|
146411
|
+
- Read-only tools only. Do NOT write or edit files. Do NOT run shell commands that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` are fine; anything that mutates the working tree, the remote, the filesystem, or external state is prohibited).
|
|
146412
|
+
- Do NOT call any state-changing MCP tool. State-changing means: posts a comment, pushes a branch, creates/updates a PR or issue, changes labels, resolves review threads, persists learnings, sets workflow output, installs dependencies, uploads files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, log inspection, diff retrieval) are fine.
|
|
146413
|
+
- Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch pre-aggregates findings through an intermediate model and defeats the design.
|
|
146414
|
+
- Test for any tool call before invoking it: would this still be a no-op if reverted? If not, do not call it. Apply this test to tools added after this prompt was written \u2014 the rule is the invariant, not the enumeration.
|
|
146415
|
+
|
|
146416
|
+
Report findings clearly with file:line references and quoted evidence where possible. Flag uncertainty explicitly \u2014 if you cannot verify a claim, say so rather than guess.`;
|
|
146417
|
+
|
|
146418
|
+
// modes.ts
|
|
146419
|
+
var PR_SUMMARY_FORMAT = `### Default format
|
|
146420
|
+
|
|
146421
|
+
Follow this structure exactly:
|
|
146422
|
+
|
|
146423
|
+
<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.
|
|
146424
|
+
NOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.
|
|
146425
|
+
|
|
146426
|
+
### Key changes
|
|
146427
|
+
|
|
146428
|
+
- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add \`TodoTracker\` for live checklists**). A reviewer should understand the full PR from this list alone.
|
|
146429
|
+
|
|
146430
|
+
<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: \`{base}\` \u2190 \`{head}\`</sub>
|
|
146431
|
+
NOTE: the metadata line goes AFTER the bullet list, not before it.
|
|
146432
|
+
|
|
146433
|
+
Then for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).
|
|
146434
|
+
|
|
146435
|
+
<br/>
|
|
146436
|
+
|
|
146437
|
+
## Example readable section title
|
|
146438
|
+
|
|
146439
|
+
> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]
|
|
146440
|
+
IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
|
|
146441
|
+
|
|
146442
|
+
1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.
|
|
146443
|
+
|
|
146444
|
+
If a change warrants deeper explanation, use a blockquoted details/summary framed as a question:
|
|
146445
|
+
> <details><summary>How does X work?</summary>
|
|
146446
|
+
> Extended explanation here.
|
|
146447
|
+
> </details>
|
|
146448
|
+
|
|
146449
|
+
End each section with a file links trail (3-4 key files max):
|
|
146450
|
+
[\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
|
|
146451
|
+
|
|
146452
|
+
Single-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.
|
|
146453
|
+
|
|
146454
|
+
CRITICAL \u2014 GitHub markdown rendering rule:
|
|
146455
|
+
GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
|
|
146456
|
+
|
|
146457
|
+
Rules:
|
|
146458
|
+
- \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
|
|
146459
|
+
- ALL variable names, identifiers, and file names in body text must be in backticks
|
|
146460
|
+
- ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
|
|
146461
|
+
- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
|
|
146462
|
+
- Do NOT include raw diff stats like '+123 / -45' or line counts
|
|
146463
|
+
- Do NOT include code blocks or repeat diff contents
|
|
146464
|
+
- Do NOT include a changelog section \u2014 the key changes list serves this purpose
|
|
146465
|
+
- Focus on *intent*, not *what* \u2014 the diff already shows what changed
|
|
146466
|
+
- Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
|
|
146467
|
+
function learningsStep(t2, n) {
|
|
146468
|
+
return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t2("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
|
|
146469
|
+
}
|
|
146470
|
+
function computeModes(agentId) {
|
|
146471
|
+
const t2 = (toolName) => formatMcpToolRef(agentId, toolName);
|
|
146472
|
+
return [
|
|
146473
|
+
{
|
|
146474
|
+
name: "Build",
|
|
146475
|
+
description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
|
|
146476
|
+
prompt: `### Checklist
|
|
146477
|
+
|
|
146478
|
+
1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
|
|
146479
|
+
|
|
146480
|
+
2. **setup**: checkout or create the branch:
|
|
146481
|
+
- **PR event, modifying the existing PR**: call \`${t2("checkout_pr")}\`
|
|
146482
|
+
- **new branch**: use \`${t2("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
|
|
146483
|
+
|
|
146484
|
+
3. **build**: implement changes using your native file and shell tools:
|
|
146485
|
+
- follow the plan (if you ran a plan phase)
|
|
146486
|
+
- plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
|
|
146487
|
+
- run relevant tests/lints before committing
|
|
146488
|
+
|
|
146489
|
+
4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
|
|
146490
|
+
|
|
146491
|
+
Skip self-review (commit directly) when the diff is **genuinely trivial**:
|
|
146492
|
+
- doc typos, comment-only edits, whitespace/format-only, import reordering
|
|
146493
|
+
- lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
|
|
146494
|
+
- low-risk dep patch bump from a trusted source
|
|
146495
|
+
|
|
146496
|
+
Run self-review when the diff has **any behavioral surface, however small**:
|
|
146497
|
+
- 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
|
|
146498
|
+
- any change to money / tax / currency / billing / fee / refund / payout calculations or constants
|
|
146499
|
+
- any change to auth / permissions / roles / sessions / tokens / signature verification
|
|
146500
|
+
- any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
|
|
146501
|
+
- new endpoints, new code paths, new error branches \u2014 even small ones
|
|
146502
|
+
- mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
|
|
146503
|
+
- anything you're uncertain about
|
|
146504
|
+
|
|
146505
|
+
Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
|
|
146506
|
+
|
|
146507
|
+
Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
|
|
146508
|
+
|
|
146509
|
+
Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
|
|
146510
|
+
|
|
146511
|
+
Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
|
|
146512
|
+
- Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
|
|
146513
|
+
- Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
|
|
146514
|
+
- Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
|
|
146515
|
+
- Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
|
|
146516
|
+
- For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
|
|
146517
|
+
|
|
146518
|
+
Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
|
|
146519
|
+
|
|
146520
|
+
5. **finalize**:
|
|
146521
|
+
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
|
|
146522
|
+
- create a PR via \`${t2("create_pull_request")}\`
|
|
146523
|
+
- call \`${t2("report_progress")}\` with the PR link or the exact error if push/PR failed
|
|
146524
|
+
|
|
146525
|
+
${learningsStep(t2, 6)}
|
|
146526
|
+
|
|
146527
|
+
### Notes
|
|
146528
|
+
|
|
146529
|
+
For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
146530
|
+
},
|
|
146531
|
+
{
|
|
146532
|
+
name: "AddressReviews",
|
|
146533
|
+
description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
|
|
146534
|
+
prompt: `### Checklist
|
|
146535
|
+
|
|
146536
|
+
1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
|
|
146537
|
+
|
|
146538
|
+
2. Fetch review comments via \`${t2("get_review_comments")}\`.
|
|
146539
|
+
|
|
146540
|
+
3. For each comment:
|
|
146541
|
+
- understand the feedback
|
|
146542
|
+
- evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
|
|
146543
|
+
- if the request stands, make the code change using your native tools; otherwise reply explaining why
|
|
146544
|
+
- record what was done (or why nothing was done)
|
|
146545
|
+
|
|
146546
|
+
4. Quality check:
|
|
146547
|
+
- test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
|
|
146548
|
+
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
146549
|
+
|
|
146550
|
+
5. Finalize:
|
|
146551
|
+
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
146552
|
+
- reply to each comment using \`${t2("reply_to_review_comment")}\`
|
|
146553
|
+
- resolve addressed threads via \`${t2("resolve_review_thread")}\`
|
|
146554
|
+
- call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)
|
|
146555
|
+
|
|
146556
|
+
${learningsStep(t2, 6)}`
|
|
146557
|
+
},
|
|
146558
|
+
// Review and IncrementalReview use the multi-lens orchestrator pattern
|
|
146559
|
+
// (canonical source: .claude/commands/anneal.md). The orchestrator does
|
|
146560
|
+
// triage → parallel read-only subagent fan-out → aggregate → draft comments
|
|
146561
|
+
// → submit. For someone else's PR, parallel lenses (correctness, security,
|
|
146562
|
+
// research-validated claims, user-journey, etc.) provide breadth across
|
|
146563
|
+
// angles that a single subagent can't carry coherently. Build mode keeps
|
|
146564
|
+
// a single fresh-eyes subagent (different problem shape — orchestrator
|
|
146565
|
+
// wrote the code and bias-mitigation comes from delegating to one
|
|
146566
|
+
// subagent that doesn't share the implementation context).
|
|
146567
|
+
// Deliberate omission vs canonical /anneal: severity categorization in the
|
|
146568
|
+
// final message (the review body has its own CAUTION/IMPORTANT framing
|
|
146569
|
+
// instead of a severity table).
|
|
146570
|
+
{
|
|
146571
|
+
name: "Review",
|
|
146572
|
+
description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
|
|
146573
|
+
prompt: `### Checklist
|
|
146574
|
+
|
|
146575
|
+
1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
|
|
146576
|
+
|
|
146577
|
+
2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
|
|
146578
|
+
|
|
146579
|
+
if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
|
|
146580
|
+
|
|
146581
|
+
"Genuinely trivial" (skip):
|
|
146582
|
+
- single-word doc typo, whitespace/format-only, comment-only across any number of files
|
|
146583
|
+
- lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
|
|
146584
|
+
- mechanical rename whose only effect is import-path updates
|
|
146585
|
+
- low-risk dep patch bump
|
|
146586
|
+
|
|
146587
|
+
"Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
|
|
146588
|
+
- any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
|
|
146589
|
+
- flipping a feature-flag default, default config value, or retry/timeout constant
|
|
146590
|
+
- changing a money/tax/currency/fee constant by any amount
|
|
146591
|
+
- changing an HTTP method, redirect URL, response code, or status enum
|
|
146592
|
+
- tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
|
|
146593
|
+
- renaming a public API surface (still trivial in shape, but needs an impact lens)
|
|
146594
|
+
- adding a new direct dependency (supply-chain surface)
|
|
146595
|
+
- any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
|
|
146596
|
+
- mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
|
|
146597
|
+
|
|
146598
|
+
When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
|
|
146599
|
+
|
|
146600
|
+
otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
|
|
146601
|
+
|
|
146602
|
+
- **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
|
|
146603
|
+
- **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
|
|
146604
|
+
- **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
|
|
146605
|
+
- **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
|
|
146606
|
+
|
|
146607
|
+
lenses come in two flavors, and you can mix them:
|
|
146608
|
+
- **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
|
|
146609
|
+
- **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
|
|
146610
|
+
|
|
146611
|
+
starter menu (combine, omit, or invent your own):
|
|
146612
|
+
- **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
|
|
146613
|
+
- **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
|
|
146614
|
+
- **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
|
|
146615
|
+
- **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
|
|
146616
|
+
- **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
|
|
146617
|
+
- **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
|
|
146618
|
+
- **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
|
|
146619
|
+
- **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
|
|
146620
|
+
- **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
|
|
146621
|
+
- **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
|
|
146622
|
+
- **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
|
|
146623
|
+
|
|
146624
|
+
3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
|
|
146625
|
+
- the diff path / target \u2014 reading the diff and the codebase is its job
|
|
146626
|
+
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
146627
|
+
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
146628
|
+
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
146629
|
+
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
|
|
146630
|
+
- ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
|
|
146631
|
+
|
|
146632
|
+
delegation discipline:
|
|
146633
|
+
- do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
|
|
146634
|
+
- do NOT summarize the PR for them (biases toward a validation frame)
|
|
146635
|
+
- do NOT hand them a curated reading list (let them discover scope)
|
|
146636
|
+
- do NOT pre-shape their output with a finding schema
|
|
146637
|
+
- do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
|
|
146638
|
+
|
|
146639
|
+
4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
|
|
146640
|
+
|
|
146641
|
+
for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
|
|
146642
|
+
|
|
146643
|
+
5. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
|
|
146644
|
+
|
|
146645
|
+
note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
146646
|
+
|
|
146647
|
+
The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
|
|
146648
|
+
|
|
146649
|
+
- **critical issues** (blocks merge \u2014 bugs, security, data loss):
|
|
146650
|
+
\`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
|
|
146651
|
+
- **recommended changes** (non-critical):
|
|
146652
|
+
\`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
|
|
146653
|
+
- **no actionable issues**:
|
|
146654
|
+
\`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
|
|
146655
|
+
|
|
146656
|
+
${PR_SUMMARY_FORMAT}`
|
|
146657
|
+
},
|
|
146658
|
+
// IncrementalReview shares Review's multi-lens orchestrator pattern but
|
|
146659
|
+
// scopes the target to the incremental diff. The "issues must be NEW
|
|
146660
|
+
// since the last Pullfrog review" filter lives at aggregation time
|
|
146661
|
+
// (step 5), NOT in the subagent prompt — pushing the filter into
|
|
146662
|
+
// subagents matches the canonical anneal anti-pattern of "list known
|
|
146663
|
+
// pre-existing failures — don't flag these" and suppresses signal on
|
|
146664
|
+
// regressions the new commits amplified. The review body is just
|
|
146665
|
+
// "Reviewed changes" — a separate "Prior review feedback" checklist
|
|
146666
|
+
// would duplicate the rolling PR summary snapshot's record of what
|
|
146667
|
+
// earlier runs already addressed and add noise to the user-facing
|
|
146668
|
+
// body. Same severity-table omission as Review.
|
|
146669
|
+
{
|
|
146670
|
+
name: "IncrementalReview",
|
|
146671
|
+
description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
|
|
146672
|
+
prompt: `### Checklist
|
|
146673
|
+
|
|
146674
|
+
1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
|
|
146675
|
+
|
|
146676
|
+
2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
|
|
146677
|
+
|
|
146678
|
+
3. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
|
|
146679
|
+
|
|
146680
|
+
4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
|
|
146681
|
+
|
|
146682
|
+
if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
|
|
146683
|
+
|
|
146684
|
+
"Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
|
|
146685
|
+
"Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
|
|
146686
|
+
When unsure, treat as non-trivial.
|
|
146687
|
+
|
|
146688
|
+
otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
|
|
146689
|
+
|
|
146690
|
+
dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
|
|
146691
|
+
- the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
|
|
146692
|
+
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
146693
|
+
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
146694
|
+
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
146695
|
+
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
|
|
146696
|
+
- ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
|
|
146697
|
+
|
|
146698
|
+
delegation discipline:
|
|
146699
|
+
- do NOT lens-review the diff yourself in parallel with the subagents
|
|
146700
|
+
- do NOT summarize the changes for them (biases toward validation frame)
|
|
146701
|
+
- do NOT hand them a curated reading list (let them discover scope)
|
|
146702
|
+
- do NOT pre-shape their output with a finding schema
|
|
146703
|
+
- do NOT mention the other lenses (independence is the point)
|
|
146704
|
+
|
|
146705
|
+
5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
|
|
146706
|
+
|
|
146707
|
+
6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
|
|
146708
|
+
|
|
146709
|
+
7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
|
|
146710
|
+
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
146711
|
+
- IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
|
|
146712
|
+
- ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
|
|
146713
|
+
- ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
|
|
146714
|
+
- ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t2("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
|
|
146715
|
+
},
|
|
146716
|
+
{
|
|
146717
|
+
name: "Plan",
|
|
146718
|
+
description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
|
|
146719
|
+
prompt: `### Checklist
|
|
146720
|
+
|
|
146721
|
+
1. Analyze the task and gather context:
|
|
146722
|
+
- read AGENTS.md and relevant codebase files
|
|
146723
|
+
- understand the architecture and constraints
|
|
146724
|
+
|
|
146725
|
+
2. Produce a structured, actionable plan with clear milestones.
|
|
146726
|
+
|
|
146727
|
+
3. Call \`${t2("report_progress")}\` with the plan.
|
|
146728
|
+
|
|
146729
|
+
${learningsStep(t2, 4)}`
|
|
146730
|
+
},
|
|
146731
|
+
{
|
|
146732
|
+
name: "Fix",
|
|
146733
|
+
description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
|
|
146734
|
+
prompt: `### Checklist
|
|
146735
|
+
|
|
146736
|
+
1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
|
|
146737
|
+
|
|
146738
|
+
2. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
|
|
146739
|
+
|
|
146740
|
+
3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
|
|
146741
|
+
|
|
146742
|
+
4. Diagnose and fix:
|
|
146743
|
+
- read the workflow file, reproduce locally with the EXACT same commands CI runs
|
|
146744
|
+
- fix the issue using your native file and shell tools
|
|
146745
|
+
- verify the fix by re-running the exact CI command
|
|
146746
|
+
- review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
|
|
146747
|
+
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
146748
|
+
|
|
146749
|
+
5. Finalize:
|
|
146750
|
+
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
146751
|
+
- call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
|
|
146752
|
+
|
|
146753
|
+
${learningsStep(t2, 6)}`
|
|
146754
|
+
},
|
|
146755
|
+
{
|
|
146756
|
+
name: "ResolveConflicts",
|
|
146757
|
+
description: "Resolve merge conflicts in a PR branch against the base branch",
|
|
146758
|
+
prompt: `### Checklist
|
|
146759
|
+
|
|
146760
|
+
1. **Setup**:
|
|
146761
|
+
- Call \`${t2("checkout_pr")}\` to get the PR branch.
|
|
146762
|
+
- Call \`${t2("get_pull_request")}\` to identify the base branch (e.g., 'main').
|
|
146763
|
+
- Call \`${t2("git_fetch")}\` to fetch the base branch.
|
|
146764
|
+
|
|
146765
|
+
2. **Merge Attempt**:
|
|
146766
|
+
- Run \`git merge origin/<base_branch>\` via shell.
|
|
146767
|
+
- If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
|
|
146768
|
+
- If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
|
|
146769
|
+
|
|
146770
|
+
3. **Resolve Conflicts**:
|
|
146771
|
+
- Run \`git status\` or parse the merge output to find the list of conflicting files.
|
|
146772
|
+
- For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
|
|
146773
|
+
- Verify the file syntax is correct after resolution.
|
|
146774
|
+
|
|
146775
|
+
4. **Finalize**:
|
|
146776
|
+
- Run a final verification (build/test) to ensure the resolution works.
|
|
146777
|
+
- \`git add . && git commit -m "resolve merge conflicts"\`
|
|
146778
|
+
- confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
146779
|
+
- Call \`${t2("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
|
|
146780
|
+
},
|
|
146781
|
+
{
|
|
146782
|
+
name: "Task",
|
|
146783
|
+
description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
|
|
146784
|
+
prompt: `### Checklist
|
|
146785
|
+
|
|
146786
|
+
1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
|
|
146787
|
+
|
|
146788
|
+
2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
|
|
146789
|
+
- plan your approach before starting
|
|
146790
|
+
- use native file and shell tools for local operations
|
|
146791
|
+
- use ${pullfrogMcpName} MCP tools for GitHub/git operations
|
|
146792
|
+
- if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
|
|
146793
|
+
|
|
146794
|
+
3. Finalize:
|
|
146795
|
+
- if code changes were made, push to a pull request (new or existing) using \`${t2("push_branch")}\` and \`${t2("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
|
|
146796
|
+
- call \`${t2("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
|
|
146797
|
+
- if the task involved labeling, commenting, or other GitHub operations, perform those directly
|
|
146798
|
+
|
|
146799
|
+
${learningsStep(t2, 4)}`
|
|
146800
|
+
}
|
|
146801
|
+
];
|
|
146802
|
+
}
|
|
146803
|
+
var modes = computeModes("opencode");
|
|
146804
|
+
|
|
146812
146805
|
// agents/claude.ts
|
|
146813
146806
|
import { execFileSync as execFileSync3 } from "node:child_process";
|
|
146814
146807
|
import { mkdirSync as mkdirSync4, writeFileSync as writeFileSync7 } from "node:fs";
|
|
@@ -146901,23 +146894,41 @@ async function installFromNpmTarball(params) {
|
|
|
146901
146894
|
}
|
|
146902
146895
|
|
|
146903
146896
|
// utils/providerErrors.ts
|
|
146897
|
+
var statusKey = `\\b(?:status[_ ]?code|http[_ ]?status|status)["']?\\s*[:=]\\s*["']?`;
|
|
146904
146898
|
var PROVIDER_ERROR_PATTERNS = [
|
|
146905
|
-
{
|
|
146906
|
-
{
|
|
146907
|
-
{
|
|
146908
|
-
|
|
146909
|
-
|
|
146910
|
-
|
|
146911
|
-
|
|
146912
|
-
|
|
146913
|
-
{
|
|
146899
|
+
{ regex: new RegExp(`${statusKey}429\\b`, "i"), label: "rate limited (429)" },
|
|
146900
|
+
{ regex: new RegExp(`${statusKey}500\\b`, "i"), label: "provider 500 error" },
|
|
146901
|
+
{ regex: new RegExp(`${statusKey}503\\b`, "i"), label: "provider unavailable (503)" },
|
|
146902
|
+
// matches `rate limit`, `rate limited`, `rate limits exceeded`,
|
|
146903
|
+
// `rate_limit_error`, `rate_limit_exceeded`. the leading `\b` + `[_ ]`
|
|
146904
|
+
// separator rejects `x-ratelimit-*` / `anthropic-ratelimit-*` response
|
|
146905
|
+
// headers (no separator between "rate" and "limit") which routinely
|
|
146906
|
+
// appear in dumped 401 / 4xx error JSON.
|
|
146907
|
+
{ regex: /\brate[_ ]limit/i, label: "rate limited" },
|
|
146908
|
+
{ regex: /\bRESOURCE_EXHAUSTED\b/, label: "quota exhausted" },
|
|
146909
|
+
// Google gRPC `INTERNAL` status. word-boundary anchors reject
|
|
146910
|
+
// `INTERNAL_SERVER_ERROR` (HTTP 500 message that may appear in unrelated
|
|
146911
|
+
// log lines) and identifiers like `INTERNALS`.
|
|
146912
|
+
{ regex: /\bINTERNAL\b/, label: "provider internal error" },
|
|
146913
|
+
{ regex: /\bUNAVAILABLE\b/, label: "provider unavailable" },
|
|
146914
|
+
// matches `quota`, `insufficient_quota`, `quota_exceeded`, `quotaExceeded`.
|
|
146915
|
+
// word-character lookarounds would reject `_quota` / `quotaX`; `quota` is
|
|
146916
|
+
// specific enough that a plain substring match is safe.
|
|
146917
|
+
{ regex: /quota/i, label: "quota error" },
|
|
146918
|
+
// explicit zero-quota response, e.g. `{"limit": 0}`. the `\b` anchor
|
|
146919
|
+
// around `limit` rejects keys like `time_limit` or `field_limit`.
|
|
146920
|
+
{ regex: /["']?\blimit\b["']?\s*:\s*0\b/, label: "zero quota" }
|
|
146914
146921
|
];
|
|
146915
146922
|
function detectProviderError(text) {
|
|
146916
146923
|
for (const entry of PROVIDER_ERROR_PATTERNS) {
|
|
146917
|
-
if (
|
|
146924
|
+
if (entry.regex.test(text)) return entry.label;
|
|
146918
146925
|
}
|
|
146919
146926
|
return null;
|
|
146920
146927
|
}
|
|
146928
|
+
var ROUTER_KEYLIMIT_EXHAUSTED_PATTERN = /requires more credits.*?fewer max_tokens|requested up to \d+ tokens.*?can only afford/is;
|
|
146929
|
+
function isRouterKeylimitExhaustedError(text) {
|
|
146930
|
+
return ROUTER_KEYLIMIT_EXHAUSTED_PATTERN.test(text);
|
|
146931
|
+
}
|
|
146921
146932
|
|
|
146922
146933
|
// utils/skills.ts
|
|
146923
146934
|
import { spawnSync as spawnSync5 } from "node:child_process";
|
|
@@ -147024,6 +147035,7 @@ var ThinkingTimer = class {
|
|
|
147024
147035
|
};
|
|
147025
147036
|
|
|
147026
147037
|
// agents/postRun.ts
|
|
147038
|
+
import { readFile } from "node:fs/promises";
|
|
147027
147039
|
var MAX_HOOK_OUTPUT_CHARS = 4096;
|
|
147028
147040
|
function truncateHookOutput(raw2) {
|
|
147029
147041
|
if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
|
|
@@ -147068,6 +147080,23 @@ function buildStopHookPrompt(failure) {
|
|
|
147068
147080
|
"```"
|
|
147069
147081
|
].join("\n");
|
|
147070
147082
|
}
|
|
147083
|
+
async function isSummaryUnchanged(filePath, seed) {
|
|
147084
|
+
try {
|
|
147085
|
+
const current = await readFile(filePath, "utf8");
|
|
147086
|
+
return current === seed;
|
|
147087
|
+
} catch {
|
|
147088
|
+
return false;
|
|
147089
|
+
}
|
|
147090
|
+
}
|
|
147091
|
+
function buildSummaryStalePrompt(filePath) {
|
|
147092
|
+
return [
|
|
147093
|
+
`PR SUMMARY UNTOUCHED \u2014 the rolling PR summary file at \`${filePath}\` is byte-identical to its seed; this run did not edit it.`,
|
|
147094
|
+
"",
|
|
147095
|
+
"review the diff and update the file in place to reflect what changed in the PR. update intent, key changes, and any risks worth flagging \u2014 keep the existing section headings stable so incremental runs produce clean diffs.",
|
|
147096
|
+
"",
|
|
147097
|
+
"if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is \u2014 but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context."
|
|
147098
|
+
].join("\n");
|
|
147099
|
+
}
|
|
147071
147100
|
async function collectPostRunIssues(params) {
|
|
147072
147101
|
const issues = {};
|
|
147073
147102
|
if (params.stopScript) {
|
|
@@ -147076,12 +147105,17 @@ async function collectPostRunIssues(params) {
|
|
|
147076
147105
|
}
|
|
147077
147106
|
const status = getGitStatus();
|
|
147078
147107
|
if (status) issues.dirtyTree = status;
|
|
147108
|
+
if (params.summaryFilePath && params.summarySeed !== void 0) {
|
|
147109
|
+
const stale = await isSummaryUnchanged(params.summaryFilePath, params.summarySeed);
|
|
147110
|
+
if (stale) issues.summaryStale = { filePath: params.summaryFilePath };
|
|
147111
|
+
}
|
|
147079
147112
|
return issues;
|
|
147080
147113
|
}
|
|
147081
147114
|
function buildPostRunPrompt(issues) {
|
|
147082
147115
|
const parts = [];
|
|
147083
147116
|
if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
|
|
147084
147117
|
if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
|
|
147118
|
+
if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
|
|
147085
147119
|
return parts.join("\n\n---\n\n");
|
|
147086
147120
|
}
|
|
147087
147121
|
function buildLearningsReflectionPrompt(agentId) {
|
|
@@ -147104,9 +147138,15 @@ async function runPostRunRetryLoop(params) {
|
|
|
147104
147138
|
let finalIssues = {};
|
|
147105
147139
|
let gateResumeCount = 0;
|
|
147106
147140
|
let pendingReflection = params.reflectionPrompt;
|
|
147141
|
+
let summaryStaleNudged = false;
|
|
147107
147142
|
while (gateResumeCount < MAX_POST_RUN_RETRIES) {
|
|
147108
147143
|
if (!result.success) break;
|
|
147109
|
-
const issues = await collectPostRunIssues({
|
|
147144
|
+
const issues = await collectPostRunIssues({
|
|
147145
|
+
stopScript: params.stopScript,
|
|
147146
|
+
summaryFilePath: summaryStaleNudged ? void 0 : params.summaryFilePath,
|
|
147147
|
+
summarySeed: summaryStaleNudged ? void 0 : params.summarySeed
|
|
147148
|
+
});
|
|
147149
|
+
if (issues.summaryStale) summaryStaleNudged = true;
|
|
147110
147150
|
finalIssues = issues;
|
|
147111
147151
|
if (!hasPostRunIssues(issues)) {
|
|
147112
147152
|
if (!pendingReflection) break;
|
|
@@ -147138,8 +147178,17 @@ async function runPostRunRetryLoop(params) {
|
|
|
147138
147178
|
}
|
|
147139
147179
|
log.info(`\xBB post-run retry (attempt ${gateResumeCount + 1}/${MAX_POST_RUN_RETRIES})`);
|
|
147140
147180
|
const prompt = buildPostRunPrompt(issues);
|
|
147181
|
+
const onlySummaryStale = issues.summaryStale !== void 0 && issues.stopHook === void 0 && issues.dirtyTree === void 0;
|
|
147182
|
+
const preResume = result;
|
|
147141
147183
|
result = await params.resume({ prompt, previousResult: result });
|
|
147142
147184
|
aggregatedUsage = mergeAgentUsage(aggregatedUsage, result.usage);
|
|
147185
|
+
if (!result.success && onlySummaryStale) {
|
|
147186
|
+
log.warning(
|
|
147187
|
+
`\xBB summary-stale resume turn failed (${result.error ?? "unknown error"}), preserving prior successful result`
|
|
147188
|
+
);
|
|
147189
|
+
result = preResume;
|
|
147190
|
+
break;
|
|
147191
|
+
}
|
|
147143
147192
|
gateResumeCount++;
|
|
147144
147193
|
}
|
|
147145
147194
|
if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
|
|
@@ -147276,6 +147325,7 @@ async function runClaude(params) {
|
|
|
147276
147325
|
const thinkingTimer = new ThinkingTimer();
|
|
147277
147326
|
let finalOutput = "";
|
|
147278
147327
|
let sessionId;
|
|
147328
|
+
let resultErrorSubtype = null;
|
|
147279
147329
|
let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
|
|
147280
147330
|
let accumulatedCostUsd = 0;
|
|
147281
147331
|
let tokensLogged = false;
|
|
@@ -147379,9 +147429,14 @@ async function runClaude(params) {
|
|
|
147379
147429
|
tokensLogged = true;
|
|
147380
147430
|
}
|
|
147381
147431
|
} else if (subtype === "error_max_turns") {
|
|
147432
|
+
resultErrorSubtype = subtype;
|
|
147382
147433
|
log.info(`\xBB ${params.label} max turns reached: ${JSON.stringify(event)}`);
|
|
147383
147434
|
} else if (subtype === "error_during_execution") {
|
|
147435
|
+
resultErrorSubtype = subtype;
|
|
147384
147436
|
log.info(`\xBB ${params.label} execution error: ${JSON.stringify(event)}`);
|
|
147437
|
+
} else if (subtype.startsWith("error")) {
|
|
147438
|
+
resultErrorSubtype = subtype;
|
|
147439
|
+
log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
|
|
147385
147440
|
} else {
|
|
147386
147441
|
log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
|
|
147387
147442
|
}
|
|
@@ -147512,6 +147567,15 @@ ${stderrContext}`);
|
|
|
147512
147567
|
sessionId
|
|
147513
147568
|
};
|
|
147514
147569
|
}
|
|
147570
|
+
if (resultErrorSubtype) {
|
|
147571
|
+
return {
|
|
147572
|
+
success: false,
|
|
147573
|
+
output: finalOutput || output,
|
|
147574
|
+
error: `result subtype: ${resultErrorSubtype}`,
|
|
147575
|
+
usage,
|
|
147576
|
+
sessionId
|
|
147577
|
+
};
|
|
147578
|
+
}
|
|
147515
147579
|
return { success: true, output: finalOutput || output, usage, sessionId };
|
|
147516
147580
|
} catch (error49) {
|
|
147517
147581
|
params.todoTracker?.cancel();
|
|
@@ -147640,6 +147704,8 @@ var claude = agent({
|
|
|
147640
147704
|
initialResult: result,
|
|
147641
147705
|
initialUsage: result.usage,
|
|
147642
147706
|
stopScript: ctx.stopScript,
|
|
147707
|
+
summaryFilePath: ctx.summaryFilePath,
|
|
147708
|
+
summarySeed: ctx.summarySeed,
|
|
147643
147709
|
reflectionPrompt: buildLearningsReflectionPrompt("claude"),
|
|
147644
147710
|
canResume: (r) => Boolean(r.sessionId),
|
|
147645
147711
|
resume: async (c2) => {
|
|
@@ -147667,6 +147733,7 @@ async function installOpencodeCli() {
|
|
|
147667
147733
|
installDependencies: true
|
|
147668
147734
|
});
|
|
147669
147735
|
}
|
|
147736
|
+
var PULLFROG_OPENCODE_OUTPUT_LIMIT = 5e3;
|
|
147670
147737
|
function buildSecurityConfig(ctx, model) {
|
|
147671
147738
|
const config3 = {
|
|
147672
147739
|
permission: {
|
|
@@ -147973,6 +148040,12 @@ async function runOpenCode(params) {
|
|
|
147973
148040
|
log.debug(withLabel(label, `tool output: ${outputStr}`));
|
|
147974
148041
|
}
|
|
147975
148042
|
},
|
|
148043
|
+
error: (event) => {
|
|
148044
|
+
agentErrorEvent = event;
|
|
148045
|
+
const errorName = event.error?.name || "unknown";
|
|
148046
|
+
const errorMessage = event.error?.data?.message || event.error?.name || JSON.stringify(event);
|
|
148047
|
+
log.info(`\xBB ${params.label} error event: ${errorName}: ${errorMessage}`);
|
|
148048
|
+
},
|
|
147976
148049
|
result: async (event) => {
|
|
147977
148050
|
const status = event.status || "unknown";
|
|
147978
148051
|
const duration4 = event.stats?.duration_ms || 0;
|
|
@@ -147993,6 +148066,7 @@ async function runOpenCode(params) {
|
|
|
147993
148066
|
};
|
|
147994
148067
|
const recentStderr = [];
|
|
147995
148068
|
let lastProviderError = null;
|
|
148069
|
+
let agentErrorEvent = null;
|
|
147996
148070
|
let output = "";
|
|
147997
148071
|
let stdoutBuffer = "";
|
|
147998
148072
|
try {
|
|
@@ -148111,6 +148185,17 @@ ${stderrContext}`);
|
|
|
148111
148185
|
usage
|
|
148112
148186
|
};
|
|
148113
148187
|
}
|
|
148188
|
+
if (agentErrorEvent) {
|
|
148189
|
+
const errorEvent = agentErrorEvent;
|
|
148190
|
+
const errorName = errorEvent.error?.name || "agent error";
|
|
148191
|
+
const errorMessage = errorEvent.error?.data?.message || errorEvent.error?.name || JSON.stringify(errorEvent);
|
|
148192
|
+
return {
|
|
148193
|
+
success: false,
|
|
148194
|
+
output: finalOutput || output,
|
|
148195
|
+
error: `${errorName}: ${errorMessage}`,
|
|
148196
|
+
usage
|
|
148197
|
+
};
|
|
148198
|
+
}
|
|
148114
148199
|
return { success: true, output: finalOutput || output, usage };
|
|
148115
148200
|
} catch (error49) {
|
|
148116
148201
|
params.todoTracker?.cancel();
|
|
@@ -148164,6 +148249,7 @@ var opencode = agent({
|
|
|
148164
148249
|
...homeEnv,
|
|
148165
148250
|
OPENCODE_CONFIG_CONTENT: buildSecurityConfig(ctx, model),
|
|
148166
148251
|
OPENCODE_PERMISSION: permissionOverride,
|
|
148252
|
+
OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX: PULLFROG_OPENCODE_OUTPUT_LIMIT.toString(),
|
|
148167
148253
|
GOOGLE_GENERATIVE_AI_API_KEY: process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GEMINI_API_KEY
|
|
148168
148254
|
};
|
|
148169
148255
|
const repoDir = process.cwd();
|
|
@@ -148186,6 +148272,8 @@ var opencode = agent({
|
|
|
148186
148272
|
initialResult: result,
|
|
148187
148273
|
initialUsage: result.usage,
|
|
148188
148274
|
stopScript: ctx.stopScript,
|
|
148275
|
+
summaryFilePath: ctx.summaryFilePath,
|
|
148276
|
+
summarySeed: ctx.summarySeed,
|
|
148189
148277
|
reflectionPrompt: buildLearningsReflectionPrompt("opencode"),
|
|
148190
148278
|
resume: async (c2) => runOpenCode({
|
|
148191
148279
|
...runParams,
|
|
@@ -152732,7 +152820,7 @@ When embedding images (e.g. uploaded screenshots) in comments or PR bodies, alwa
|
|
|
152732
152820
|
|
|
152733
152821
|
**\`report_progress\`**: call this exactly once at the end of every run with a brief final summary (1-3 sentences) unless the mode guidance instructs otherwise. Never call it for intermediate status updates (e.g., "Checking for changes...", "Starting review...") \u2014 the task list handles live progress automatically. Calling \`report_progress\` replaces the task list with your summary and preserves the current task list in a collapsible section. Keep the summary concise \u2014 do not repeat what the task list already shows. Focus on the outcome (what was accomplished, links to artifacts) rather than listing individual steps. If something failed, include the tool's error text even when that makes the summary longer.
|
|
152734
152822
|
|
|
152735
|
-
Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments
|
|
152823
|
+
Never use \`create_issue_comment\` for task progress \u2014 that creates duplicate comments and leaves the progress comment stuck in its initial state. \`create_issue_comment\` is only for standalone comments unrelated to your current task (e.g., Plan comments).
|
|
152736
152824
|
|
|
152737
152825
|
### If you get stuck
|
|
152738
152826
|
|
|
@@ -152896,7 +152984,8 @@ var JsonPayload = type({
|
|
|
152896
152984
|
"progressComment?": type({
|
|
152897
152985
|
id: "string",
|
|
152898
152986
|
type: "'issue' | 'review'"
|
|
152899
|
-
}).or("undefined")
|
|
152987
|
+
}).or("undefined"),
|
|
152988
|
+
"generateSummary?": "boolean | undefined"
|
|
152900
152989
|
});
|
|
152901
152990
|
var COLLABORATOR_PERMISSIONS = ["admin", "maintain", "write"];
|
|
152902
152991
|
function isCollaborator(event) {
|
|
@@ -152979,6 +153068,7 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
|
|
|
152979
153068
|
timeout: inputs.timeout ?? jsonPayload?.timeout,
|
|
152980
153069
|
cwd: resolveCwd(inputs.cwd),
|
|
152981
153070
|
progressComment: jsonPayload?.progressComment,
|
|
153071
|
+
generateSummary: jsonPayload?.generateSummary,
|
|
152982
153072
|
// permissions: inputs > repoSettings > fallbacks
|
|
152983
153073
|
push: inputs.push ?? repoSettings.push ?? "restricted",
|
|
152984
153074
|
shell: resolvedShell,
|
|
@@ -152987,6 +153077,40 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
|
|
|
152987
153077
|
};
|
|
152988
153078
|
}
|
|
152989
153079
|
|
|
153080
|
+
// utils/prSummary.ts
|
|
153081
|
+
import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
|
|
153082
|
+
import { dirname as dirname4, join as join14 } from "node:path";
|
|
153083
|
+
var SUMMARY_FILE_NAME = "pullfrog-summary.md";
|
|
153084
|
+
var SUMMARY_SCAFFOLD = `# PR summary
|
|
153085
|
+
|
|
153086
|
+
<!-- durable cross-run context. edit in place; the next agent run reads this
|
|
153087
|
+
before reviewing new commits. structure however serves the PR best. -->
|
|
153088
|
+
`;
|
|
153089
|
+
var MIN_SNAPSHOT_LENGTH = 60;
|
|
153090
|
+
var MAX_SNAPSHOT_LENGTH = 32768;
|
|
153091
|
+
function summaryFilePath(tmpdir3) {
|
|
153092
|
+
return join14(tmpdir3, SUMMARY_FILE_NAME);
|
|
153093
|
+
}
|
|
153094
|
+
async function seedSummaryFile(params) {
|
|
153095
|
+
const path3 = summaryFilePath(params.tmpdir);
|
|
153096
|
+
await mkdir(dirname4(path3), { recursive: true });
|
|
153097
|
+
const seed = params.previousSnapshot && params.previousSnapshot.trim().length >= MIN_SNAPSHOT_LENGTH ? params.previousSnapshot : SUMMARY_SCAFFOLD;
|
|
153098
|
+
await writeFile2(path3, seed, "utf8");
|
|
153099
|
+
return path3;
|
|
153100
|
+
}
|
|
153101
|
+
async function readSummaryFile(path3) {
|
|
153102
|
+
let raw2;
|
|
153103
|
+
try {
|
|
153104
|
+
raw2 = await readFile2(path3, "utf8");
|
|
153105
|
+
} catch {
|
|
153106
|
+
return null;
|
|
153107
|
+
}
|
|
153108
|
+
const trimmed = raw2.trim();
|
|
153109
|
+
if (trimmed.length < MIN_SNAPSHOT_LENGTH) return null;
|
|
153110
|
+
if (trimmed.length > MAX_SNAPSHOT_LENGTH) return trimmed.slice(0, MAX_SNAPSHOT_LENGTH);
|
|
153111
|
+
return trimmed;
|
|
153112
|
+
}
|
|
153113
|
+
|
|
152990
153114
|
// utils/reviewCleanup.ts
|
|
152991
153115
|
var RE_REVIEW_PREAMBLE = "Incrementally re-review the new commits on this pull request. Use the IncrementalReview mode.";
|
|
152992
153116
|
async function postReviewCleanup(ctx) {
|
|
@@ -153046,11 +153170,16 @@ async function dispatchFollowUpReReview(ctx, reviewedSha) {
|
|
|
153046
153170
|
await ctx.octokit.rest.actions.createWorkflowDispatch({
|
|
153047
153171
|
owner: ctx.repo.owner,
|
|
153048
153172
|
repo: ctx.repo.name,
|
|
153049
|
-
workflow_id:
|
|
153173
|
+
workflow_id: getCurrentWorkflowFilename(),
|
|
153050
153174
|
ref: pr.data.base.repo.default_branch,
|
|
153051
153175
|
inputs: { prompt: JSON.stringify(payload) }
|
|
153052
153176
|
});
|
|
153053
153177
|
}
|
|
153178
|
+
function getCurrentWorkflowFilename() {
|
|
153179
|
+
const ref = process.env.GITHUB_WORKFLOW_REF ?? "";
|
|
153180
|
+
const match3 = ref.match(/\/([^/]+)@/);
|
|
153181
|
+
return match3?.[1] ?? "pullfrog.yml";
|
|
153182
|
+
}
|
|
153054
153183
|
|
|
153055
153184
|
// utils/run.ts
|
|
153056
153185
|
async function handleAgentResult(ctx) {
|
|
@@ -153190,9 +153319,9 @@ async function resolveRunContextData(params) {
|
|
|
153190
153319
|
import { execFileSync as execFileSync5, execSync as execSync3 } from "node:child_process";
|
|
153191
153320
|
import { mkdtempSync } from "node:fs";
|
|
153192
153321
|
import { tmpdir as tmpdir2 } from "node:os";
|
|
153193
|
-
import { join as
|
|
153322
|
+
import { join as join15 } from "node:path";
|
|
153194
153323
|
function createTempDirectory() {
|
|
153195
|
-
const sharedTempDir = mkdtempSync(
|
|
153324
|
+
const sharedTempDir = mkdtempSync(join15(tmpdir2(), "pullfrog-"));
|
|
153196
153325
|
process.env.PULLFROG_TEMP_DIR = sharedTempDir;
|
|
153197
153326
|
log.info(`\xBB created temp dir at ${sharedTempDir}`);
|
|
153198
153327
|
return sharedTempDir;
|
|
@@ -153525,39 +153654,71 @@ var TransientError = class extends Error {
|
|
|
153525
153654
|
this.name = "TransientError";
|
|
153526
153655
|
}
|
|
153527
153656
|
};
|
|
153528
|
-
function
|
|
153657
|
+
function billingConsoleUrl(owner, anchor) {
|
|
153658
|
+
return `https://pullfrog.com/console/${encodeURIComponent(owner)}#${anchor}`;
|
|
153659
|
+
}
|
|
153660
|
+
function formatBillingErrorSummary(error49, owner) {
|
|
153529
153661
|
if (error49.code === "router_requires_card") {
|
|
153530
153662
|
return [
|
|
153531
|
-
"
|
|
153663
|
+
"**Add a card to start using Pullfrog Router.**",
|
|
153532
153664
|
"",
|
|
153533
|
-
"
|
|
153665
|
+
"Router proxies OpenRouter at raw cost \u2014 no platform markup, and your first $20 of usage is on us.",
|
|
153534
153666
|
"",
|
|
153535
|
-
|
|
153667
|
+
`[Add a card \u2192](${billingConsoleUrl(owner, "model-access")})`
|
|
153668
|
+
].join("\n");
|
|
153669
|
+
}
|
|
153670
|
+
if (error49.code === "router_balance_exhausted") {
|
|
153671
|
+
return [
|
|
153672
|
+
"**Your Pullfrog Router balance is exhausted.**",
|
|
153673
|
+
"",
|
|
153674
|
+
"You have a card on file but auto-reload is disabled, so runs paused once your balance went past the overdraft buffer.",
|
|
153675
|
+
"",
|
|
153676
|
+
`[Top up balance \u2192](${billingConsoleUrl(owner, "billing")}) \xB7 [Enable auto-reload \u2192](${billingConsoleUrl(owner, "model-access")})`
|
|
153677
|
+
].join("\n");
|
|
153678
|
+
}
|
|
153679
|
+
if (error49.code === "router_keylimit_exhausted") {
|
|
153680
|
+
return [
|
|
153681
|
+
"**This run was cut short \u2014 your Pullfrog Router balance ran out mid-run.**",
|
|
153682
|
+
"",
|
|
153683
|
+
"OpenRouter stopped the agent because the per-run budget was exhausted. Your wallet is now negative; top up or enable auto-reload to keep runs flowing.",
|
|
153684
|
+
"",
|
|
153685
|
+
`[Top up balance \u2192](${billingConsoleUrl(owner, "billing")}) \xB7 [Enable auto-reload \u2192](${billingConsoleUrl(owner, "model-access")})`
|
|
153536
153686
|
].join("\n");
|
|
153537
153687
|
}
|
|
153538
153688
|
if (error49.needsReauthentication) {
|
|
153689
|
+
const code = error49.declineCode ?? "authentication_required";
|
|
153539
153690
|
return [
|
|
153540
|
-
|
|
153691
|
+
`**Your card issuer requires 3D Secure on every charge** (\`${code}\`).`,
|
|
153541
153692
|
"",
|
|
153542
|
-
|
|
153693
|
+
"Pullfrog can't complete a 3DS challenge from inside a workflow. Top up your Router balance once in Stripe Checkout \u2014 subsequent runs draw from the prepaid balance without re-triggering 3DS.",
|
|
153543
153694
|
"",
|
|
153544
|
-
|
|
153695
|
+
`[Top up balance \u2192](${billingConsoleUrl(owner, "billing")})`
|
|
153545
153696
|
].join("\n");
|
|
153546
153697
|
}
|
|
153547
|
-
|
|
153548
|
-
|
|
153549
|
-
|
|
153550
|
-
|
|
153551
|
-
|
|
153552
|
-
|
|
153698
|
+
if (error49.declineCode) {
|
|
153699
|
+
return [
|
|
153700
|
+
`**Your card was declined** (\`${error49.declineCode}\`).`,
|
|
153701
|
+
"",
|
|
153702
|
+
"Update your payment method and Pullfrog will retry on the next run.",
|
|
153703
|
+
"",
|
|
153704
|
+
`[Update payment method \u2192](${billingConsoleUrl(owner, "billing")})`
|
|
153705
|
+
].join("\n");
|
|
153706
|
+
}
|
|
153707
|
+
return [
|
|
153708
|
+
"**Your Pullfrog balance is empty.**",
|
|
153709
|
+
"",
|
|
153710
|
+
"Top up your balance or enable auto-reload to keep runs flowing.",
|
|
153711
|
+
"",
|
|
153712
|
+
`[Manage billing \u2192](${billingConsoleUrl(owner, "billing")})`
|
|
153713
|
+
].join("\n");
|
|
153553
153714
|
}
|
|
153554
|
-
function formatTransientErrorSummary(error49) {
|
|
153715
|
+
function formatTransientErrorSummary(error49, owner) {
|
|
153555
153716
|
return [
|
|
153556
|
-
"
|
|
153717
|
+
"**Pullfrog billing is temporarily unavailable.**",
|
|
153557
153718
|
"",
|
|
153558
153719
|
error49.message,
|
|
153559
153720
|
"",
|
|
153560
|
-
|
|
153721
|
+
`Usually transient \u2014 the next dispatch should succeed. If it persists, check [status.pullfrog.com](https://status.pullfrog.com) or [your console](${billingConsoleUrl(owner, "billing")}).`
|
|
153561
153722
|
].join("\n");
|
|
153562
153723
|
}
|
|
153563
153724
|
async function mintProxyKey(ctx) {
|
|
@@ -153618,6 +153779,43 @@ async function resolveProxyModel(ctx) {
|
|
|
153618
153779
|
const label = ctx.oss ? "oss" : "router";
|
|
153619
153780
|
log.info(`\xBB proxy: ${label} \u2192 ${ctx.proxyModel}`);
|
|
153620
153781
|
}
|
|
153782
|
+
async function fetchPreviousSnapshot(ctx, prNumber) {
|
|
153783
|
+
if (!ctx.githubInstallationToken) return null;
|
|
153784
|
+
try {
|
|
153785
|
+
const response = await apiFetch({
|
|
153786
|
+
path: `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/pr/${prNumber}/summary-comment`,
|
|
153787
|
+
method: "GET",
|
|
153788
|
+
headers: { authorization: `Bearer ${ctx.githubInstallationToken}` },
|
|
153789
|
+
signal: AbortSignal.timeout(1e4)
|
|
153790
|
+
});
|
|
153791
|
+
if (!response.ok) return null;
|
|
153792
|
+
const data = await response.json();
|
|
153793
|
+
return typeof data.snapshot === "string" && data.snapshot.length > 0 ? data.snapshot : null;
|
|
153794
|
+
} catch {
|
|
153795
|
+
return null;
|
|
153796
|
+
}
|
|
153797
|
+
}
|
|
153798
|
+
async function persistSummary(ctx) {
|
|
153799
|
+
const filePath = ctx.toolState.summaryFilePath;
|
|
153800
|
+
if (!filePath) return;
|
|
153801
|
+
if (ctx.toolState.summaryPersistAttempted) return;
|
|
153802
|
+
ctx.toolState.summaryPersistAttempted = true;
|
|
153803
|
+
const snapshot2 = await readSummaryFile(filePath);
|
|
153804
|
+
if (!snapshot2) {
|
|
153805
|
+
log.debug(`pr summary tmpfile missing or invalid at ${filePath} \u2014 skipping persist`);
|
|
153806
|
+
return;
|
|
153807
|
+
}
|
|
153808
|
+
const seed = ctx.toolState.summarySeed?.trim();
|
|
153809
|
+
if (seed !== void 0 && snapshot2 === seed) {
|
|
153810
|
+
log.warning(
|
|
153811
|
+
"\xBB pr summary tmpfile unchanged from seed \u2014 skipping persist (agent did not edit it)"
|
|
153812
|
+
);
|
|
153813
|
+
return;
|
|
153814
|
+
}
|
|
153815
|
+
await patchWorkflowRunFields(ctx, { summarySnapshot: snapshot2 }).catch((err) => {
|
|
153816
|
+
log.debug(`pr summary persist failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
153817
|
+
});
|
|
153818
|
+
}
|
|
153621
153819
|
async function writeJobSummary(toolState) {
|
|
153622
153820
|
const usageSummary = formatUsageSummary(toolState.usageEntries);
|
|
153623
153821
|
const summaryParts = [toolState.lastProgressBody, usageSummary].filter(Boolean);
|
|
@@ -153682,7 +153880,7 @@ async function main() {
|
|
|
153682
153880
|
});
|
|
153683
153881
|
} catch (error49) {
|
|
153684
153882
|
if (error49 instanceof BillingError) {
|
|
153685
|
-
const summary2 = formatBillingErrorSummary(error49);
|
|
153883
|
+
const summary2 = formatBillingErrorSummary(error49, runContext.repo.owner);
|
|
153686
153884
|
await writeSummary(summary2).catch(() => {
|
|
153687
153885
|
});
|
|
153688
153886
|
await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
|
|
@@ -153690,7 +153888,7 @@ async function main() {
|
|
|
153690
153888
|
throw error49;
|
|
153691
153889
|
}
|
|
153692
153890
|
if (error49 instanceof TransientError) {
|
|
153693
|
-
const summary2 = formatTransientErrorSummary(error49);
|
|
153891
|
+
const summary2 = formatTransientErrorSummary(error49, runContext.repo.owner);
|
|
153694
153892
|
await writeSummary(summary2).catch(() => {
|
|
153695
153893
|
});
|
|
153696
153894
|
await reportErrorToComment({ toolState, error: summary2 }).catch(() => {
|
|
@@ -153727,6 +153925,7 @@ async function main() {
|
|
|
153727
153925
|
setGitAuthServer(gitAuthServer);
|
|
153728
153926
|
const resolvedModel = payload.proxyModel ? void 0 : resolveModel({ slug: payload.model });
|
|
153729
153927
|
const agent2 = resolveAgent({ model: resolvedModel });
|
|
153928
|
+
toolState.model = payload.proxyModel ?? resolvedModel ?? payload.model;
|
|
153730
153929
|
validateAgentApiKey({
|
|
153731
153930
|
agent: agent2,
|
|
153732
153931
|
model: payload.proxyModel ?? resolvedModel ?? payload.model,
|
|
@@ -153780,6 +153979,20 @@ async function main() {
|
|
|
153780
153979
|
toolContext.mcpServerUrl = mcpHttpServer.url;
|
|
153781
153980
|
log.info(`\xBB MCP server started at ${mcpHttpServer.url}`);
|
|
153782
153981
|
timer.checkpoint("mcpServer");
|
|
153982
|
+
if (payload.generateSummary && payload.event.is_pr && payload.event.issue_number) {
|
|
153983
|
+
const previousSnapshot = await fetchPreviousSnapshot(toolContext, payload.event.issue_number);
|
|
153984
|
+
const filePath = await seedSummaryFile({ tmpdir: tmpdir3, previousSnapshot });
|
|
153985
|
+
toolState.summaryFilePath = filePath;
|
|
153986
|
+
try {
|
|
153987
|
+
toolState.summarySeed = await readFile3(filePath, "utf8");
|
|
153988
|
+
} catch {
|
|
153989
|
+
}
|
|
153990
|
+
log.info(
|
|
153991
|
+
`\xBB summary snapshot seeded at ${filePath} (previous=${previousSnapshot ? "yes" : "no"})`
|
|
153992
|
+
);
|
|
153993
|
+
const ctxForExit = toolContext;
|
|
153994
|
+
onExitSignal(() => persistSummary(ctxForExit));
|
|
153995
|
+
}
|
|
153783
153996
|
startInstallation(toolContext);
|
|
153784
153997
|
const modelForLog = resolveModelForLog({ payload, resolvedModel });
|
|
153785
153998
|
const agentForLog = resolveAgentForLog({ agentName: agent2.name, resolvedModel });
|
|
@@ -153811,7 +154024,7 @@ ${instructions.user}` : null,
|
|
|
153811
154024
|
log.info(instructions.full);
|
|
153812
154025
|
});
|
|
153813
154026
|
if (agentId === "opencode") {
|
|
153814
|
-
const pluginDir =
|
|
154027
|
+
const pluginDir = join16(process.cwd(), ".opencode", "plugin");
|
|
153815
154028
|
const hasPlugins = existsSync7(pluginDir) && readdirSync(pluginDir).some((f) => /\.[jt]sx?$/.test(f));
|
|
153816
154029
|
if (hasPlugins && toolState.dependencyInstallation?.promise) {
|
|
153817
154030
|
log.info(
|
|
@@ -153870,6 +154083,8 @@ ${instructions.user}` : null,
|
|
|
153870
154083
|
instructions,
|
|
153871
154084
|
todoTracker,
|
|
153872
154085
|
stopScript: runContext.repoSettings.stopScript,
|
|
154086
|
+
summaryFilePath: toolState.summaryFilePath,
|
|
154087
|
+
summarySeed: toolState.summarySeed,
|
|
153873
154088
|
onActivityTimeout: onInnerActivityTimeout,
|
|
153874
154089
|
onToolUse: (event) => {
|
|
153875
154090
|
const wasTracked = recordDiffReadFromToolUse({
|
|
@@ -153924,8 +154139,10 @@ ${instructions.user}` : null,
|
|
|
153924
154139
|
log.debug(`post-review cleanup failed: ${error49}`);
|
|
153925
154140
|
});
|
|
153926
154141
|
}
|
|
153927
|
-
|
|
153928
|
-
|
|
154142
|
+
if (toolContext) {
|
|
154143
|
+
await persistSummary(toolContext);
|
|
154144
|
+
}
|
|
154145
|
+
if (toolContext && toolState.progressComment && !toolState.finalSummaryWritten) {
|
|
153929
154146
|
await deleteProgressComment(toolContext).catch((error49) => {
|
|
153930
154147
|
log.debug(`stranded progress comment cleanup failed: ${error49}`);
|
|
153931
154148
|
});
|
|
@@ -153952,8 +154169,9 @@ ${instructions.user}` : null,
|
|
|
153952
154169
|
todoTracker?.cancel();
|
|
153953
154170
|
killTrackedChildren();
|
|
153954
154171
|
log.error(errorMessage);
|
|
154172
|
+
const billingError = isRouterKeylimitExhaustedError(errorMessage) ? new BillingError(errorMessage, { code: "router_keylimit_exhausted" }) : null;
|
|
153955
154173
|
try {
|
|
153956
|
-
const errorSummary = `### \u274C Pullfrog failed
|
|
154174
|
+
const errorSummary = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : `### \u274C Pullfrog failed
|
|
153957
154175
|
|
|
153958
154176
|
\`\`\`
|
|
153959
154177
|
${errorMessage}
|
|
@@ -153964,7 +154182,8 @@ ${errorMessage}
|
|
|
153964
154182
|
} catch {
|
|
153965
154183
|
}
|
|
153966
154184
|
try {
|
|
153967
|
-
|
|
154185
|
+
const commentBody = billingError ? formatBillingErrorSummary(billingError, runContext.repo.owner) : errorMessage;
|
|
154186
|
+
await reportErrorToComment({ toolState, error: commentBody });
|
|
153968
154187
|
} catch {
|
|
153969
154188
|
}
|
|
153970
154189
|
if (toolContext) {
|
|
@@ -153972,6 +154191,9 @@ ${errorMessage}
|
|
|
153972
154191
|
log.debug(`post-review cleanup failed: ${error50}`);
|
|
153973
154192
|
});
|
|
153974
154193
|
}
|
|
154194
|
+
if (toolContext) {
|
|
154195
|
+
await persistSummary(toolContext);
|
|
154196
|
+
}
|
|
153975
154197
|
return {
|
|
153976
154198
|
success: false,
|
|
153977
154199
|
error: errorMessage
|
|
@@ -154003,169 +154225,8 @@ ${errorMessage}
|
|
|
154003
154225
|
}
|
|
154004
154226
|
}
|
|
154005
154227
|
|
|
154006
|
-
// utils/postCleanup.ts
|
|
154007
|
-
var SHOULD_CHECK_REASON = true;
|
|
154008
|
-
function buildErrorCommentBody(ctx, isCancellation) {
|
|
154009
|
-
let errorMessage = isCancellation ? `This run was cancelled \u{1F6D1}
|
|
154010
|
-
|
|
154011
|
-
The workflow was cancelled before completion.` : `This run croaked \u{1F635}
|
|
154012
|
-
|
|
154013
|
-
The workflow encountered an error before any progress could be reported.`;
|
|
154014
|
-
if (ctx.runId) {
|
|
154015
|
-
errorMessage += " Please check the link below for details.";
|
|
154016
|
-
}
|
|
154017
|
-
const customParts = [];
|
|
154018
|
-
if (!isCancellation && ctx.runId) {
|
|
154019
|
-
const apiUrl = getApiUrl();
|
|
154020
|
-
customParts.push(
|
|
154021
|
-
`[Rerun failed job \u2794](${apiUrl}/trigger/${ctx.repoContext.owner}/${ctx.repoContext.name}/${ctx.runId}?action=rerun)`
|
|
154022
|
-
);
|
|
154023
|
-
}
|
|
154024
|
-
const footer = buildPullfrogFooter({
|
|
154025
|
-
triggeredBy: true,
|
|
154026
|
-
workflowRun: ctx.runId ? {
|
|
154027
|
-
owner: ctx.repoContext.owner,
|
|
154028
|
-
repo: ctx.repoContext.name,
|
|
154029
|
-
runId: ctx.runId
|
|
154030
|
-
} : void 0,
|
|
154031
|
-
customParts
|
|
154032
|
-
});
|
|
154033
|
-
return `${errorMessage}${footer}`;
|
|
154034
|
-
}
|
|
154035
|
-
async function validateStuckProgressComment(ctx) {
|
|
154036
|
-
const promptComment = ctx.promptInput?.progressComment;
|
|
154037
|
-
if (!promptComment) {
|
|
154038
|
-
log.info("[post] no progressComment in prompt input, skipping cleanup");
|
|
154039
|
-
return null;
|
|
154040
|
-
}
|
|
154041
|
-
const comment = parseProgressComment(promptComment);
|
|
154042
|
-
if (!comment) {
|
|
154043
|
-
log.info(`[post] progressComment.id is not a positive integer: ${promptComment.id}`);
|
|
154044
|
-
return null;
|
|
154045
|
-
}
|
|
154046
|
-
log.info(`[post] validating progressComment from prompt input: ${comment.id} (${comment.type})`);
|
|
154047
|
-
try {
|
|
154048
|
-
const fetched = await getProgressComment(
|
|
154049
|
-
{ octokit: ctx.octokit, owner: ctx.repoContext.owner, repo: ctx.repoContext.name },
|
|
154050
|
-
comment
|
|
154051
|
-
);
|
|
154052
|
-
const body = fetched.body ?? "";
|
|
154053
|
-
if (isLeapingIntoActionCommentBody(body)) {
|
|
154054
|
-
log.info(`[post] comment ${comment.id} is stuck on "Leaping into action"`);
|
|
154055
|
-
return comment;
|
|
154056
|
-
}
|
|
154057
|
-
if (/^- \[[ x]\] |^- \*\*→\*\* |^- ~~/.test(body)) {
|
|
154058
|
-
log.info(`[post] comment ${comment.id} is stuck on a todo checklist`);
|
|
154059
|
-
return comment;
|
|
154060
|
-
}
|
|
154061
|
-
log.info(`[post] comment ${comment.id} is not stuck (already updated or different content)`);
|
|
154062
|
-
return null;
|
|
154063
|
-
} catch (error49) {
|
|
154064
|
-
const errorMessage = error49 instanceof Error ? error49.message : String(error49);
|
|
154065
|
-
log.info(`[post] failed to get comment ${comment.id}: ${errorMessage}`);
|
|
154066
|
-
return null;
|
|
154067
|
-
}
|
|
154068
|
-
}
|
|
154069
|
-
async function getIsCancelled(ctx) {
|
|
154070
|
-
if (!ctx.runId) return false;
|
|
154071
|
-
try {
|
|
154072
|
-
const jobsResult = await ctx.octokit.rest.actions.listJobsForWorkflowRun({
|
|
154073
|
-
owner: ctx.repoContext.owner,
|
|
154074
|
-
repo: ctx.repoContext.name,
|
|
154075
|
-
run_id: ctx.runId
|
|
154076
|
-
});
|
|
154077
|
-
const currentJobName = process.env.GITHUB_JOB;
|
|
154078
|
-
const currentJob = currentJobName ? jobsResult.data.jobs.find(
|
|
154079
|
-
(j2) => j2.name === currentJobName || j2.name.startsWith(`${currentJobName} (`)
|
|
154080
|
-
) : jobsResult.data.jobs[0];
|
|
154081
|
-
if (!currentJob) {
|
|
154082
|
-
log.warning("[post] could not find current job");
|
|
154083
|
-
return false;
|
|
154084
|
-
}
|
|
154085
|
-
log.info(`[post] job status: ${currentJob.status}, conclusion: ${currentJob.conclusion}`);
|
|
154086
|
-
if (currentJob.conclusion === "cancelled") return true;
|
|
154087
|
-
const cancelledStep = currentJob.steps?.find((step) => step.conclusion === "cancelled");
|
|
154088
|
-
if (cancelledStep) {
|
|
154089
|
-
log.info(`[post] found cancelled step: ${cancelledStep.name}`);
|
|
154090
|
-
return true;
|
|
154091
|
-
}
|
|
154092
|
-
log.info("[post] no cancellation found, assuming failure");
|
|
154093
|
-
} catch (error49) {
|
|
154094
|
-
log.info(
|
|
154095
|
-
`[post] failed to get job status: ${error49 instanceof Error ? error49.message : String(error49)}`
|
|
154096
|
-
);
|
|
154097
|
-
}
|
|
154098
|
-
return false;
|
|
154099
|
-
}
|
|
154100
|
-
async function runPostCleanup() {
|
|
154101
|
-
log.info("\xBB [post] starting post cleanup");
|
|
154102
|
-
const runId = process.env.GITHUB_RUN_ID ? Number.parseInt(process.env.GITHUB_RUN_ID, 10) : void 0;
|
|
154103
|
-
let promptInput = null;
|
|
154104
|
-
try {
|
|
154105
|
-
const resolved = resolvePromptInput();
|
|
154106
|
-
if (typeof resolved !== "string") promptInput = resolved;
|
|
154107
|
-
} catch (error49) {
|
|
154108
|
-
log.info(
|
|
154109
|
-
`[post] failed to resolve prompt input: ${error49 instanceof Error ? error49.message : String(error49)}`
|
|
154110
|
-
);
|
|
154111
|
-
}
|
|
154112
|
-
const token = getJobToken();
|
|
154113
|
-
const repoContext = parseRepoContext();
|
|
154114
|
-
const octokit = createOctokit(token);
|
|
154115
|
-
const ctx = { repoContext, octokit, runId, promptInput };
|
|
154116
|
-
const stuck = await validateStuckProgressComment(ctx);
|
|
154117
|
-
if (!stuck) return log.info("\xBB [post] no stuck progress comment to update, skipping cleanup");
|
|
154118
|
-
log.info(
|
|
154119
|
-
`\xBB [post] validated stuck comment: ${stuck.id} (${stuck.type}), updating with error message`
|
|
154120
|
-
);
|
|
154121
|
-
try {
|
|
154122
|
-
const body = buildErrorCommentBody(
|
|
154123
|
-
ctx,
|
|
154124
|
-
SHOULD_CHECK_REASON ? await getIsCancelled(ctx) : false
|
|
154125
|
-
);
|
|
154126
|
-
await writeAndVerify(ctx, stuck, body);
|
|
154127
|
-
} catch (error49) {
|
|
154128
|
-
const errorMessage = error49 instanceof Error ? error49.message : String(error49);
|
|
154129
|
-
log.info(`[post] failed to update comment: ${errorMessage}`);
|
|
154130
|
-
}
|
|
154131
|
-
}
|
|
154132
|
-
var VERIFY_DELAY_MS = 3e3;
|
|
154133
|
-
var MAX_WRITE_ATTEMPTS = 3;
|
|
154134
|
-
async function writeAndVerify(ctx, comment, body) {
|
|
154135
|
-
const apiCtx = {
|
|
154136
|
-
octokit: ctx.octokit,
|
|
154137
|
-
owner: ctx.repoContext.owner,
|
|
154138
|
-
repo: ctx.repoContext.name
|
|
154139
|
-
};
|
|
154140
|
-
for (let attempt = 1; attempt <= MAX_WRITE_ATTEMPTS; attempt++) {
|
|
154141
|
-
await updateProgressComment(apiCtx, comment, body);
|
|
154142
|
-
await new Promise((resolve3) => setTimeout(resolve3, VERIFY_DELAY_MS));
|
|
154143
|
-
let fetched;
|
|
154144
|
-
try {
|
|
154145
|
-
fetched = await getProgressComment(apiCtx, comment);
|
|
154146
|
-
} catch (error49) {
|
|
154147
|
-
log.warning(
|
|
154148
|
-
`[post] verify GET failed after attempt ${attempt} \u2014 trusting our PUT landed: ${error49 instanceof Error ? error49.message : String(error49)}`
|
|
154149
|
-
);
|
|
154150
|
-
return;
|
|
154151
|
-
}
|
|
154152
|
-
if (fetched.body === body) {
|
|
154153
|
-
log.info(
|
|
154154
|
-
`\xBB [post] successfully updated progress comment (attempt ${attempt}/${MAX_WRITE_ATTEMPTS})`
|
|
154155
|
-
);
|
|
154156
|
-
return;
|
|
154157
|
-
}
|
|
154158
|
-
log.info(
|
|
154159
|
-
`[post] body was overwritten after our write (attempt ${attempt}/${MAX_WRITE_ATTEMPTS}), retrying`
|
|
154160
|
-
);
|
|
154161
|
-
}
|
|
154162
|
-
log.warning(
|
|
154163
|
-
`[post] gave up after ${MAX_WRITE_ATTEMPTS} attempts \u2014 comment may be stale (in-flight writes from the cancelled run kept clobbering us)`
|
|
154164
|
-
);
|
|
154165
|
-
}
|
|
154166
|
-
|
|
154167
154228
|
// commands/gha.ts
|
|
154168
|
-
process.env.PATH = `${
|
|
154229
|
+
process.env.PATH = `${dirname5(process.execPath)}:${process.env.PATH}`;
|
|
154169
154230
|
var STATE_TOKEN = "token";
|
|
154170
154231
|
async function runMain() {
|
|
154171
154232
|
try {
|
|
@@ -154178,15 +154239,6 @@ async function runMain() {
|
|
|
154178
154239
|
core7.setFailed(`action failed: ${errorMessage}`);
|
|
154179
154240
|
}
|
|
154180
154241
|
}
|
|
154181
|
-
async function runPost() {
|
|
154182
|
-
log.debug(`[post] script started at ${(/* @__PURE__ */ new Date()).toISOString()}`);
|
|
154183
|
-
try {
|
|
154184
|
-
await runPostCleanup();
|
|
154185
|
-
} catch (error49) {
|
|
154186
|
-
const message = error49 instanceof Error ? error49.message : String(error49);
|
|
154187
|
-
log.error(`[post] unexpected error: ${message}`);
|
|
154188
|
-
}
|
|
154189
|
-
}
|
|
154190
154242
|
async function tokenMain() {
|
|
154191
154243
|
const reposInput = core7.getInput("repos");
|
|
154192
154244
|
const additionalRepos = reposInput ? reposInput.split(",").map((r) => r.trim()).filter(Boolean) : [];
|
|
@@ -154207,7 +154259,7 @@ async function tokenPost() {
|
|
|
154207
154259
|
core7.info("\xBB installation token revoked");
|
|
154208
154260
|
}
|
|
154209
154261
|
function printGhaUsage(params) {
|
|
154210
|
-
params.stream(`usage: ${params.prog} gha [
|
|
154262
|
+
params.stream(`usage: ${params.prog} gha [subcommand]
|
|
154211
154263
|
`);
|
|
154212
154264
|
params.stream("run the github action runtime flow.");
|
|
154213
154265
|
params.stream("");
|
|
@@ -154216,9 +154268,29 @@ function printGhaUsage(params) {
|
|
|
154216
154268
|
params.stream("");
|
|
154217
154269
|
params.stream("options:");
|
|
154218
154270
|
params.stream(" -h, --help show help");
|
|
154219
|
-
|
|
154271
|
+
}
|
|
154272
|
+
function printGhaTokenUsage(params) {
|
|
154273
|
+
params.stream(`usage: ${params.prog} gha token [--post]
|
|
154274
|
+
`);
|
|
154275
|
+
params.stream("acquire a github app installation token, or revoke it in the post step.");
|
|
154276
|
+
params.stream("");
|
|
154277
|
+
params.stream("options:");
|
|
154278
|
+
params.stream(" -h, --help show help");
|
|
154279
|
+
params.stream(" --post revoke the previously-acquired token (post-step usage only)");
|
|
154220
154280
|
}
|
|
154221
154281
|
function parseGhaArgs(args2) {
|
|
154282
|
+
return (0, import_arg.default)(
|
|
154283
|
+
{
|
|
154284
|
+
"--help": Boolean,
|
|
154285
|
+
"-h": "--help"
|
|
154286
|
+
},
|
|
154287
|
+
{
|
|
154288
|
+
argv: args2,
|
|
154289
|
+
stopAtPositional: true
|
|
154290
|
+
}
|
|
154291
|
+
);
|
|
154292
|
+
}
|
|
154293
|
+
function parseGhaTokenArgs(args2) {
|
|
154222
154294
|
return (0, import_arg.default)(
|
|
154223
154295
|
{
|
|
154224
154296
|
"--help": Boolean,
|
|
@@ -154249,23 +154321,40 @@ async function runCli(params) {
|
|
|
154249
154321
|
printGhaUsage({ stream: console.log, prog: params.prog });
|
|
154250
154322
|
return;
|
|
154251
154323
|
}
|
|
154252
|
-
const normalizedArgs = ["gha"];
|
|
154253
154324
|
const positional = parsed2._;
|
|
154254
|
-
|
|
154255
|
-
|
|
154325
|
+
const subcommand = positional[0];
|
|
154326
|
+
if (!subcommand) {
|
|
154327
|
+
await run(["gha"]);
|
|
154328
|
+
return;
|
|
154329
|
+
}
|
|
154330
|
+
if (subcommand !== "token") {
|
|
154331
|
+
console.error(`unknown gha subcommand: ${subcommand}
|
|
154256
154332
|
`);
|
|
154257
154333
|
printGhaUsage({ stream: console.error, prog: params.prog });
|
|
154258
154334
|
process.exit(1);
|
|
154259
154335
|
}
|
|
154260
|
-
|
|
154261
|
-
|
|
154262
|
-
|
|
154263
|
-
|
|
154336
|
+
let tokenParsed;
|
|
154337
|
+
try {
|
|
154338
|
+
tokenParsed = parseGhaTokenArgs(positional.slice(1));
|
|
154339
|
+
} catch (error49) {
|
|
154340
|
+
const message = error49 instanceof Error ? error49.message : String(error49);
|
|
154341
|
+
console.error(`${message}
|
|
154342
|
+
`);
|
|
154343
|
+
printGhaTokenUsage({ stream: console.error, prog: params.prog });
|
|
154344
|
+
process.exit(1);
|
|
154345
|
+
}
|
|
154346
|
+
if (tokenParsed["--help"]) {
|
|
154347
|
+
printGhaTokenUsage({ stream: console.log, prog: params.prog });
|
|
154348
|
+
return;
|
|
154349
|
+
}
|
|
154350
|
+
if (tokenParsed._.length > 0) {
|
|
154351
|
+
console.error(`unexpected positional arguments for gha token: ${tokenParsed._.join(" ")}
|
|
154264
154352
|
`);
|
|
154265
|
-
|
|
154353
|
+
printGhaTokenUsage({ stream: console.error, prog: params.prog });
|
|
154266
154354
|
process.exit(1);
|
|
154267
154355
|
}
|
|
154268
|
-
|
|
154356
|
+
const normalizedArgs = ["gha", "token"];
|
|
154357
|
+
if (tokenParsed["--post"]) {
|
|
154269
154358
|
normalizedArgs.push("--post");
|
|
154270
154359
|
}
|
|
154271
154360
|
await run(normalizedArgs);
|
|
@@ -154278,8 +154367,6 @@ async function run(args2) {
|
|
|
154278
154367
|
} else {
|
|
154279
154368
|
await tokenMain();
|
|
154280
154369
|
}
|
|
154281
|
-
} else if (args2.includes("--post")) {
|
|
154282
|
-
await runPost();
|
|
154283
154370
|
} else {
|
|
154284
154371
|
await runMain();
|
|
154285
154372
|
}
|
|
@@ -155949,7 +156036,7 @@ async function run2() {
|
|
|
155949
156036
|
}
|
|
155950
156037
|
|
|
155951
156038
|
// cli.ts
|
|
155952
|
-
var VERSION10 = "0.0
|
|
156039
|
+
var VERSION10 = "0.1.0";
|
|
155953
156040
|
var bin = basename2(process.argv[1] || "");
|
|
155954
156041
|
var PROG = bin === "pf" || bin === "pullfrog" ? bin : "pullfrog";
|
|
155955
156042
|
var rawArgs = process.argv.slice(2);
|