pullfrog 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/opencodeShared.d.ts +40 -0
- package/dist/agents/postRun.d.ts +11 -3
- package/dist/agents/shared.d.ts +7 -0
- package/dist/cli.mjs +4609 -3445
- package/dist/external.d.ts +1 -1
- package/dist/index.js +2048 -1416
- package/dist/internal/index.d.ts +2 -1
- package/dist/internal.js +245 -85
- package/dist/mcp/shell.d.ts +5 -0
- package/dist/models.d.ts +10 -0
- package/dist/modes.d.ts +1 -1
- package/dist/toolState.d.ts +6 -0
- package/dist/utils/activity.d.ts +31 -1
- package/dist/utils/agentHangReport.d.ts +38 -0
- package/dist/utils/apiKeys.d.ts +5 -1
- package/dist/utils/billingErrors.d.ts +85 -0
- package/dist/utils/buildPullfrogFooter.d.ts +7 -0
- package/dist/utils/byokFallback.d.ts +50 -0
- package/dist/utils/codexHome.d.ts +23 -0
- package/dist/utils/errorReport.d.ts +9 -0
- package/dist/utils/gitAuth.d.ts +27 -0
- package/dist/utils/learnings.d.ts +20 -0
- package/dist/utils/learningsTruncate.d.ts +25 -0
- package/dist/utils/lifecycle.d.ts +23 -3
- package/dist/utils/overrides.d.ts +40 -0
- package/dist/utils/payload.d.ts +7 -0
- package/dist/utils/prSummary.d.ts +21 -0
- package/dist/utils/providerErrors.d.ts +11 -0
- package/dist/utils/proxy.d.ts +47 -0
- package/dist/utils/runContext.d.ts +0 -9
- package/dist/utils/runErrorRenderer.d.ts +41 -0
- package/dist/utils/runLifecycle.d.ts +75 -0
- package/dist/utils/runStartupLog.d.ts +15 -0
- package/dist/utils/subprocess.d.ts +1 -0
- package/package.json +3 -2
- /package/dist/agents/{opencode.d.ts → opencode_v2.d.ts} +0 -0
package/dist/internal.js
CHANGED
|
@@ -31,6 +31,7 @@ var providers = {
|
|
|
31
31
|
openai: provider({
|
|
32
32
|
displayName: "OpenAI",
|
|
33
33
|
envVars: ["OPENAI_API_KEY"],
|
|
34
|
+
managedCredentials: ["CODEX_AUTH_JSON"],
|
|
34
35
|
models: {
|
|
35
36
|
gpt: {
|
|
36
37
|
displayName: "GPT",
|
|
@@ -90,12 +91,16 @@ var providers = {
|
|
|
90
91
|
displayName: "Gemini Pro",
|
|
91
92
|
resolve: "google/gemini-3.1-pro-preview",
|
|
92
93
|
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
93
|
-
preferred: true
|
|
94
|
-
|
|
94
|
+
preferred: true
|
|
95
|
+
// Inherit (subagents stay on Pro). Google has no in-between tier;
|
|
96
|
+
// dropping to Flash for review work was a meaningful capability cliff
|
|
97
|
+
// (Flash missed the catastrophic camelCase/snake_case mismatch in
|
|
98
|
+
// the v4 e2e test). Pro is cost-effective enough to use for both
|
|
99
|
+
// orchestrator and lenses.
|
|
95
100
|
},
|
|
96
101
|
"gemini-flash": {
|
|
97
102
|
displayName: "Gemini Flash",
|
|
98
|
-
resolve: "google/gemini-3-flash
|
|
103
|
+
resolve: "google/gemini-3.5-flash",
|
|
99
104
|
openRouterResolve: "openrouter/google/gemini-3-flash-preview"
|
|
100
105
|
}
|
|
101
106
|
}
|
|
@@ -110,15 +115,22 @@ var providers = {
|
|
|
110
115
|
openRouterResolve: "openrouter/x-ai/grok-4.3",
|
|
111
116
|
preferred: true
|
|
112
117
|
},
|
|
118
|
+
// legacy aliases — xAI retired the entire fast/code-fast line on
|
|
119
|
+
// 2026-05-15 (https://docs.x.ai/developers/migration/may-15-deprecation)
|
|
120
|
+
// and now redirects every deprecated text-model slug to grok-4.3 at
|
|
121
|
+
// standard pricing. fall back to the live `xai/grok` so the alias
|
|
122
|
+
// chain resolves to grok-4.3 for both direct-key and OpenRouter users.
|
|
113
123
|
"grok-fast": {
|
|
114
124
|
displayName: "Grok Fast",
|
|
115
125
|
resolve: "xai/grok-4-1-fast",
|
|
116
|
-
openRouterResolve: "openrouter/x-ai/grok-4.
|
|
126
|
+
openRouterResolve: "openrouter/x-ai/grok-4.3",
|
|
127
|
+
fallback: "xai/grok"
|
|
117
128
|
},
|
|
118
129
|
"grok-code-fast": {
|
|
119
130
|
displayName: "Grok Code Fast",
|
|
120
131
|
resolve: "xai/grok-code-fast-1",
|
|
121
|
-
openRouterResolve: "openrouter/x-ai/grok-
|
|
132
|
+
openRouterResolve: "openrouter/x-ai/grok-4.3",
|
|
133
|
+
fallback: "xai/grok"
|
|
122
134
|
}
|
|
123
135
|
}
|
|
124
136
|
}),
|
|
@@ -232,8 +244,8 @@ var providers = {
|
|
|
232
244
|
"gemini-pro": {
|
|
233
245
|
displayName: "Gemini Pro",
|
|
234
246
|
resolve: "opencode/gemini-3.1-pro",
|
|
235
|
-
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
236
|
-
|
|
247
|
+
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
248
|
+
// Inherit — see google/gemini-pro for rationale.
|
|
237
249
|
},
|
|
238
250
|
"gemini-flash": {
|
|
239
251
|
displayName: "Gemini Flash",
|
|
@@ -345,8 +357,8 @@ var providers = {
|
|
|
345
357
|
"gemini-pro": {
|
|
346
358
|
displayName: "Gemini Pro",
|
|
347
359
|
resolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
348
|
-
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
349
|
-
|
|
360
|
+
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
361
|
+
// Inherit — see google/gemini-pro for rationale.
|
|
350
362
|
},
|
|
351
363
|
"gemini-flash": {
|
|
352
364
|
displayName: "Gemini Flash",
|
|
@@ -410,6 +422,11 @@ function getModelEnvVars(slug) {
|
|
|
410
422
|
}
|
|
411
423
|
return providerConfig.envVars.slice();
|
|
412
424
|
}
|
|
425
|
+
function getModelManagedCredentials(slug) {
|
|
426
|
+
const parsed = parseModel(slug);
|
|
427
|
+
const providerConfig = providers[parsed.provider];
|
|
428
|
+
return providerConfig?.managedCredentials?.slice() ?? [];
|
|
429
|
+
}
|
|
413
430
|
var modelAliases = Object.entries(providers).flatMap(
|
|
414
431
|
([providerKey, config]) => Object.entries(config.models).map(([modelId, def]) => ({
|
|
415
432
|
slug: `${providerKey}/${modelId}`,
|
|
@@ -471,52 +488,145 @@ var REVIEWER_AGENT_NAME = "reviewfrog";
|
|
|
471
488
|
// modes.ts
|
|
472
489
|
var PR_SUMMARY_FORMAT = `### Default format
|
|
473
490
|
|
|
474
|
-
|
|
491
|
+
The body has at most three parts in this exact order:
|
|
492
|
+
|
|
493
|
+
1. **Reviewed changes preamble** \u2014 one bolded inline lead-in describing what was reviewed in this run, a bullet list of the substantive changes, and an HTML comment carrying review metadata for downstream agents.
|
|
494
|
+
2. **Cross-cutting issue sections** (zero or more) \u2014 one \`### \` heading per concern, with a human-readable problem write-up and a collapsed \`<details>Technical details</details>\` block underneath.
|
|
495
|
+
3. **\`### \u2139\uFE0F Nitpicks\`** at the very bottom (only if there are nits worth surfacing in the body) \u2014 a flat bullet list, no technical-details block.
|
|
496
|
+
|
|
497
|
+
Inline-vs-body split: concerns that anchor to a specific line go inline (use the \`comments\` parameter). Body \`### \` sections are reserved for concerns that **have no line to anchor to** \u2014 typically because the concern is about *absence* (something the diff should have done but didn't), *sequencing* (rollout / deletion / migration order), *design decisions only the human can make*, or *scope questions the diff implicitly raises but doesn't address*. A concern that anchors to a line but has broad implications still goes inline (use the technical-details block there to capture the implications \u2014 see Inline technical details below). If you found no non-anchorable concerns, the body has zero \`### \` issue sections \u2014 just the preamble + metadata.
|
|
498
|
+
|
|
499
|
+
## 1. Reviewed changes preamble
|
|
500
|
+
|
|
501
|
+
Open with a single bolded inline lead-in followed immediately by the bullet list (no \`### Key changes\` heading, no \`<b>TL;DR</b>\`):
|
|
502
|
+
|
|
503
|
+
\`\`\`
|
|
504
|
+
**Reviewed changes** \u2014 one sentence on what was reviewed in this run. For Review (initial), this is what the PR does and why. For IncrementalReview, this is what changed since the prior pullfrog review. Focus on intent, not mechanics.
|
|
505
|
+
|
|
506
|
+
- **Short human-readable title** \u2014 1 sentence per substantive change. Write a short prose phrase; when you name a file, type, or function, put that name in backticks (e.g. **Add \\\`TodoTracker\\\` for live checklists**). A reviewer should understand the full reviewed scope from this list alone \u2014 this IS the dispassionate "what was reviewed and what changed" overview, so cover the substantive changes, not just the loudest ones.
|
|
507
|
+
|
|
508
|
+
<!--
|
|
509
|
+
Pullfrog review metadata \u2014 for any agent (or human-with-agent) reading this
|
|
510
|
+
review. Incorporate the fields below into your understanding of the context
|
|
511
|
+
this review was made in. The findings below were written against
|
|
512
|
+
{head_sha_short}; if new commits have landed on {head_ref} since this review
|
|
513
|
+
was submitted, treat any specific bug, file, or line callout as POTENTIALLY
|
|
514
|
+
STALE \u2014 re-diff against {head_sha_short} (or trigger a fresh review) and
|
|
515
|
+
factor commits past {head_sha_short} into your understanding of the current
|
|
516
|
+
state before acting on findings.
|
|
517
|
+
|
|
518
|
+
- Mode: Review (initial) or IncrementalReview (delta against prior pullfrog review)
|
|
519
|
+
- Files reviewed: {file_count}
|
|
520
|
+
- Commits reviewed: {commit_count}
|
|
521
|
+
- Base: {base_ref} ({base_sha_short})
|
|
522
|
+
- Head: {head_ref} ({head_sha_short})
|
|
523
|
+
- Reviewed commits:
|
|
524
|
+
- {sha_short} \u2014 {commit_subject}
|
|
525
|
+
- ...
|
|
526
|
+
- Prior pullfrog review: none or {prior_sha_short} ({prior_review_html_url})
|
|
527
|
+
- Submitted at: {iso_timestamp}
|
|
528
|
+
-->
|
|
529
|
+
\`\`\`
|
|
530
|
+
|
|
531
|
+
Pull every metadata field from the \`checkout_pr\` tool's response \u2014 file count, commit count, base/head ref + SHA, the commit list. For \`IncrementalReview\` runs, populate \`Prior pullfrog review\` with the prior review's commit_id (short SHA) and \`html_url\` from \`list_pull_request_reviews\`.
|
|
532
|
+
|
|
533
|
+
## 2. Cross-cutting issue sections (zero or more)
|
|
534
|
+
|
|
535
|
+
For each cross-cutting concern, one \`### \` section. Use this exact shape:
|
|
536
|
+
|
|
537
|
+
\`\`\`
|
|
538
|
+
### {emoji} {short, descriptive title \u2014 what's wrong, not what to do}
|
|
539
|
+
|
|
540
|
+
{Human-readable problem write-up. Describes the PROBLEM only \u2014 what's broken, what the symptom is, what the blast radius is. NO asks, NO suggested fixes, NO "the right thing to do is...". Asks and fixes live in the technical-details block below; the visible part is for the human to *understand* the problem, not to implement it.}
|
|
541
|
+
|
|
542
|
+
<details><summary>Technical details</summary>
|
|
543
|
+
|
|
544
|
+
\\\`\\\`\\\`\\\`markdown
|
|
545
|
+
# {title repeated}
|
|
546
|
+
|
|
547
|
+
## Affected sites
|
|
548
|
+
- {file path:line} \u2014 {what's wrong there}
|
|
549
|
+
- ...
|
|
475
550
|
|
|
476
|
-
|
|
477
|
-
|
|
551
|
+
## Required outcome
|
|
552
|
+
- {what the fix needs to achieve, not how to achieve it}
|
|
553
|
+
- ...
|
|
478
554
|
|
|
479
|
-
|
|
555
|
+
## Suggested approach (optional)
|
|
556
|
+
{When the fix shape is non-obvious, sketch one or more reasonable directions. Skip when the outcome alone makes the fix obvious.}
|
|
480
557
|
|
|
481
|
-
|
|
558
|
+
## Open questions for the human (optional)
|
|
559
|
+
- {Any decision an implementing agent shouldn't make unilaterally \u2014 pricing thresholds, breaking-change policy, naming, scope of follow-up.}
|
|
560
|
+
\\\`\\\`\\\`\\\`
|
|
482
561
|
|
|
483
|
-
|
|
484
|
-
|
|
562
|
+
</details>
|
|
563
|
+
\`\`\`
|
|
485
564
|
|
|
486
|
-
|
|
565
|
+
Concrete example of the visible part of a non-anchored section (technical-details block unchanged from the template above):
|
|
487
566
|
|
|
488
|
-
|
|
567
|
+
\`\`\`
|
|
568
|
+
### \u2139\uFE0F Legacy \`opencode.ts\` has no documented deletion plan
|
|
489
569
|
|
|
490
|
-
|
|
570
|
+
The v2 harness lands alongside the v1 file and imports one helper from it. Worth a follow-up issue or a TODO so the next maintainer doesn't have to re-derive the cleanup plan.
|
|
571
|
+
\`\`\`
|
|
491
572
|
|
|
492
|
-
|
|
493
|
-
IMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate \`>\` lines creates a double line break.
|
|
573
|
+
The example's value is its *shape*: a finding about absence (no deletion plan), not a line-anchored bug. Body sections live or die on whether the concern genuinely doesn't fit on a line.
|
|
494
574
|
|
|
495
|
-
|
|
575
|
+
**Heading severity emoji** \u2014 every \`### \` heading carries one:
|
|
496
576
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
> </details>
|
|
577
|
+
- \u{1F6A8} critical \u2014 blocks merge (data loss, security, broken core flow)
|
|
578
|
+
- \u26A0\uFE0F important \u2014 must address before merging (regression, missing validation, incorrect behavior)
|
|
579
|
+
- \u2139\uFE0F informational \u2014 surfaced for awareness; mergeable as-is
|
|
501
580
|
|
|
502
|
-
|
|
503
|
-
[\`file.ts\`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \xB7 ...
|
|
581
|
+
**Visible problem write-up rules:**
|
|
504
582
|
|
|
505
|
-
|
|
583
|
+
- **No asks, no suggested fixes** in the visible part. The visible portion describes the problem; the technical-details block describes the fix shape and any open questions. The exception: a fix so self-evident that NOT stating it would be weird (e.g. "the typo is missing an 'r'") \u2014 in that case, fold it into the problem statement and skip the suggested-approach block in technical details too.
|
|
584
|
+
- **Never two successive plain paragraphs.** Every transition between block-level elements must alternate prose with structure: paragraph \u2192 bullet list \u2192 paragraph; paragraph \u2192 code fence \u2192 bullet list; paragraph \u2192 table \u2192 paragraph. Two consecutive paragraphs in a row create a wall of text that's impossible to digest. If you catch yourself writing one, find a way to split it: pull a list out of it, drop a 2-3 line code fence between them, or merge them into a single tighter paragraph.
|
|
585
|
+
- **Per-paragraph budget:** ~3 sentences max. Past that, you're explaining where you should be structuring.
|
|
586
|
+
- **Identifier discipline still applies** in the visible part. Lead with behavior in plain English; name an identifier only when it's the subject of the concern or a public surface a reader would recognize. The technical-details block is where dense identifier references belong.
|
|
506
587
|
|
|
507
|
-
|
|
508
|
-
GitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
|
|
588
|
+
**Technical-details block rules:**
|
|
509
589
|
|
|
510
|
-
|
|
511
|
-
-
|
|
512
|
-
-
|
|
513
|
-
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
-
|
|
518
|
-
|
|
519
|
-
|
|
590
|
+
- Wrapped in a 4-backtick markdown fence (\`\\\`\\\`\\\`\\\`markdown ... \\\`\\\`\\\`\\\`\`) so it's visually distinct, one-click copyable, and can contain its own 3-backtick code fences without escape gymnastics. The contents are agent-readable \u2014 a fix-agent will pull the body down and use this block as the brief.
|
|
591
|
+
- File paths and \`file:line\` refs are encouraged (and necessary) \u2014 the next agent uses these to navigate. Identifier density is fine here.
|
|
592
|
+
- Slightly more verbose than the absolute minimum is OK when it materially helps the next agent: a small code snippet showing the symptom, a short table of mismatched key/column pairs, a one-paragraph "why CI doesn't catch it" note. Skip massive regression-test scaffolding or full route rewrites \u2014 the implementing agent writes those.
|
|
593
|
+
- Use the four standard sections (\`Affected sites\`, \`Required outcome\`, optional \`Suggested approach\`, optional \`Open questions for the human\`). Skip the optional sections when they wouldn't add anything.
|
|
594
|
+
|
|
595
|
+
## Inline technical details
|
|
596
|
+
|
|
597
|
+
Inline comments are short (~2-3 sentences) by default. When an inline finding has broader implications worth recording for a fix-agent \u2014 e.g. a localized bug whose proper fix requires touching several files, or where the right fix depends on a design decision the human needs to make \u2014 append a collapsed \`<details><summary>Technical details</summary>\` block to the inline comment's body. Same shape as the body-section technical-details block (4-backtick fenced markdown, \`## Affected sites\` / \`## Required outcome\` / optional \`## Suggested approach\` / optional \`## Open questions for the human\`).
|
|
598
|
+
|
|
599
|
+
GitHub renders the same markdown parser in inline comments as in the review body, so the collapsed-details affordance works the same way. The visible part of the inline comment stays scannable; the depth is one click away for any agent that needs it.
|
|
600
|
+
|
|
601
|
+
## 3. \`### \u2139\uFE0F Nitpicks\` (optional, last section)
|
|
602
|
+
|
|
603
|
+
Only when there are nits that for some reason can't be inlined. Filepaths in nit text are fine \u2014 these are simple enough that a human or agent reads once and acts. No technical-details block.
|
|
604
|
+
|
|
605
|
+
\`\`\`
|
|
606
|
+
### \u2139\uFE0F Nitpicks
|
|
607
|
+
|
|
608
|
+
- {nit, with file path inline if useful, \u2264 ~200 chars}
|
|
609
|
+
- ...
|
|
610
|
+
\`\`\`
|
|
611
|
+
|
|
612
|
+
## Inline comment shape
|
|
613
|
+
|
|
614
|
+
Inline comments use the same severity framing as body \`### \` sections, scaled down for line-anchored use:
|
|
615
|
+
|
|
616
|
+
- **Lead with a 1-2 sentence problem statement.** The reader is looking at the line in question, so don't restate what the line says \u2014 describe what's wrong with it. Optionally prefix the visible line with a severity emoji (\u{1F6A8} / \u26A0\uFE0F / \u2139\uFE0F) when severity isn't obvious from context.
|
|
617
|
+
- **Optional \`<details><summary>Technical details</summary>...</details>\` collapsible** for findings whose technical context (longer file:line references, related-code snippets, suggested approach, regression-risk notes) would overwhelm the human-readable lead-in. Same agent-readable purpose, same 4-backtick fence shape, and same 4-section structure as the body's technical-details block \u2014 see *Inline technical details* above. Encouraged whenever the depth helps a downstream fix-agent; don't force one when the inline lead-in already says everything.
|
|
618
|
+
- **Visible portion \u2264 2-3 sentences.** If you find yourself writing more, that's the cue to split the depth into the \`Technical details\` collapsible.
|
|
619
|
+
|
|
620
|
+
## Body-wide rules
|
|
621
|
+
|
|
622
|
+
- **Inline-vs-body discipline (repeated for emphasis):** anything that anchors to a specific line goes inline (with a \`<details>Technical details</details>\` block when the implications are broad). The body is for non-anchorable concerns only \u2014 absence, sequencing, design decisions, scope questions, architectural risk.
|
|
623
|
+
- **No \`### Issues found\` heading** above the issue sections \u2014 each \`### \` heading IS the issue.
|
|
624
|
+
- **Severity emoji on every \`### \` heading** (\u{1F6A8} / \u26A0\uFE0F / \u2139\uFE0F). No emoji on the preamble lead-in or anywhere else.
|
|
625
|
+
- **GitHub block-level rendering**: GitHub's markdown parser requires a blank line between ALL block-level elements (HTML tags like \`<br/>\`, \`<sub>\`, \`<details>\`, \`<b>\` and markdown syntax like headings, lists, blockquotes, code fences, paragraphs). Without a blank line, GitHub treats following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.
|
|
626
|
+
- **Backtick-wrap** every variable, identifier, or file name when you mention one (in either visible or technical-details portions).
|
|
627
|
+
- **Don't repeat diff content**, don't include raw \`+123 / -45\` stats, don't include a changelog section, don't use horizontal rules (\`---\`).
|
|
628
|
+
- **Pull file/commit counts from \`checkout_pr\` metadata** \u2014 never count manually.
|
|
629
|
+
- **Legacy headings REMOVED.** Do not use \`### Key changes\`, \`### Issues found\`, \`<b>TL;DR</b>\`, or \`<sub><b>Summary</b>\`. The new structure subsumes them.`;
|
|
520
630
|
function computeModes(agentId) {
|
|
521
631
|
const t = (toolName) => formatMcpToolRef(agentId, toolName);
|
|
522
632
|
return [
|
|
@@ -558,7 +668,7 @@ function computeModes(agentId) {
|
|
|
558
668
|
|
|
559
669
|
Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
|
|
560
670
|
|
|
561
|
-
Provide the subagent with YOUR TASK, the output of \`git diff
|
|
671
|
+
Provide the subagent with YOUR TASK, the output of \`git diff origin/<base-branch>\` (single-rev form, no \`HEAD\` \u2014 this compares the working tree against the remote base and captures committed + staged + unstaged work; \`main...HEAD\` and \`--cached\` both miss the uncommitted edits Build self-review runs on, since self-review happens BEFORE the commit), and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
|
|
562
672
|
|
|
563
673
|
Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
|
|
564
674
|
- Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
|
|
@@ -567,7 +677,7 @@ function computeModes(agentId) {
|
|
|
567
677
|
- Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
|
|
568
678
|
- For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
|
|
569
679
|
|
|
570
|
-
|
|
680
|
+
Be **discerning** about what comes back. The reviewer is an AI subagent and is fallible \u2014 treat every finding as a hypothesis, not a directive, and **verify each one yourself** against the diff and the code before deciding whether to apply. You are searching for a solution that is **complete, minimal, and elegant** \u2014 you may need to think hard to find it. Do not over-engineer, do not be over-defensive, **do not write AI slop**. Reviewers bias toward *recommending additions*, and that bias has a recognizable slop texture: defensive checks for cases that cannot happen, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards, error handlers for cases the type system already rules out. Reject those. For each surviving finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three means look harder for a fix that gets all three before settling. After applying the fixes you accept, re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
|
|
571
681
|
|
|
572
682
|
6. **finalize**:
|
|
573
683
|
- confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
|
|
@@ -591,7 +701,8 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
591
701
|
|
|
592
702
|
4. For each comment:
|
|
593
703
|
- understand the feedback
|
|
594
|
-
-
|
|
704
|
+
- **verify the finding yourself** against the actual code before deciding whether to apply \u2014 every comment (human or agent) is a hypothesis, not a directive. agent reviewers especially are fallible.
|
|
705
|
+
- you are searching for a solution that is **complete, minimal, and elegant** \u2014 you may need to think hard to find it. do not over-engineer, do not be over-defensive, **do not write AI slop**. reviewers bias toward *recommending additions*, and that bias has a recognizable slop texture: defensive checks for impossible cases, extra abstractions used once, comments restating obvious code, tests asserting tautologies, "just-in-case" guards, error handlers for cases the type system already rules out. reject those. evaluate whether applying the finding would leave the code more **sound, correct, AND elegant**; two-out-of-three is a signal to look harder for a fix that gets all three. if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it.
|
|
595
706
|
- if the request stands, make the code change using your native tools; otherwise reply explaining why
|
|
596
707
|
- record what was done (or why nothing was done)
|
|
597
708
|
|
|
@@ -599,11 +710,13 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
599
710
|
- test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
|
|
600
711
|
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
601
712
|
|
|
602
|
-
6. Finalize:
|
|
713
|
+
6. Finalize. Reply + resolve are paired write actions: do BOTH or NEITHER for each thread.
|
|
603
714
|
- confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
|
|
604
|
-
-
|
|
605
|
-
-
|
|
606
|
-
|
|
715
|
+
- **if push fails**, call \`${t("report_progress")}\` with the exact error and STOP \u2014 do NOT reply or resolve any thread until the fix is live on the remote. Resolving a thread without the fix landing misleads the reviewer.
|
|
716
|
+
- **on push success**, for each thread you acted on:
|
|
717
|
+
- reply ONCE via \`${t("reply_to_review_comment")}\`. The \`comment_id\` parameter takes the root comment's numeric \`id=\` (from the first \`comment author=...\` tag in the \`${t("get_review_comments")}\` output) \u2014 NOT the \`thread=\` value; that's a separate GraphQL ID used by resolve. The runtime dedupes identical bodies within a session.
|
|
718
|
+
- **immediately** call \`${t("resolve_review_thread")}\` with that thread's \`thread=\` value as \`thread_id\`. Resolve every thread where you (a) made the requested code change in full \u2014 partial fixes leave the thread open \u2014 OR (b) replied with a substantive answer the user explicitly asked for. Do NOT resolve threads where you pushed back on the request and the disagreement is unresolved; leave those open for the human to mediate.
|
|
719
|
+
- call \`${t("report_progress")}\` with a brief summary`
|
|
607
720
|
},
|
|
608
721
|
// Review and IncrementalReview use a 0-or-2+ lens pattern. The default is
|
|
609
722
|
// 0 lenses (orchestrator handles the review solo). Multi-lens (2+
|
|
@@ -620,9 +733,12 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
620
733
|
// the Review/IncrementalReview lens fan-out where independence between
|
|
621
734
|
// perspectives is what's being purchased.
|
|
622
735
|
//
|
|
623
|
-
//
|
|
624
|
-
//
|
|
625
|
-
//
|
|
736
|
+
// Severity categorization is split across two surfaces: the opening
|
|
737
|
+
// callout (CAUTION/IMPORTANT/ℹ️/✅) sets the review's overall tier, and
|
|
738
|
+
// per-bullet emoji prefixes (🚨/⚠️/ℹ️ in PR_SUMMARY_FORMAT) tag
|
|
739
|
+
// individual points inside summary sections — scoping severity to the
|
|
740
|
+
// specific bullet rather than the whole section keeps a section that
|
|
741
|
+
// mixes a 🚨 and an ℹ️ from being mislabeled by either of them.
|
|
626
742
|
{
|
|
627
743
|
name: "Review",
|
|
628
744
|
description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
|
|
@@ -708,7 +824,9 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
708
824
|
|
|
709
825
|
6. **aggregate & draft**: when the fan-out lands, merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
|
|
710
826
|
|
|
711
|
-
for
|
|
827
|
+
**Hunt for non-anchored concerns before drafting.** After collecting your anchored findings, deliberately scan for concerns that have no specific line to point at \u2014 typically: deletion / cleanup plans for code the diff replaces or shadows; rollout sequencing (what happens to in-flight state during deploy / revert?); coverage gaps the diff implies but doesn't add; scope questions that only the human can answer (e.g. is the legacy path going away or is this a long-term dual track?); architectural risks the diff opens up that aren't a single-line bug. On substantial PRs (migrations, refactors, multi-file rewrites, version bumps that change runtime semantics), at least one such concern almost always exists; if you can't think of any, your bar is probably too high.
|
|
828
|
+
|
|
829
|
+
for surviving findings, draft inline comments with NEW line numbers from the diff \u2014 attach a \`<details>Technical details</details>\` block to any inline comment whose fix is non-trivial or has cross-file implications (see Inline technical details in the format below). every comment must be actionable, 2-3 sentences max in the visible part. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
|
|
712
830
|
|
|
713
831
|
7. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
|
|
714
832
|
|
|
@@ -716,12 +834,12 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
716
834
|
|
|
717
835
|
The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
|
|
718
836
|
|
|
719
|
-
|
|
837
|
+
The opening callout is what the author sees first \u2014 pick the one that matches what you want them to do. Five tiers, from loudest to friendliest:
|
|
720
838
|
|
|
721
839
|
- \`[!CAUTION]\` \u2014 large red banner. Reads as "this will break something."
|
|
722
840
|
- \`[!IMPORTANT]\` \u2014 large purple banner. Reads as "you need to look at this before merging."
|
|
723
|
-
-
|
|
724
|
-
-
|
|
841
|
+
- \`> \u2139\uFE0F ...\` \u2014 informational blockquote. Reads as "minor suggestions, nothing blocking."
|
|
842
|
+
- \`> \u2705 ...\` \u2014 green friendly blockquote. Reads as "no concerns, mergeable."
|
|
725
843
|
|
|
726
844
|
Two reinforcing levers: callout intensity (above) and \`approved\` (which gates the footer Fix-button affordance \u2014 Fix renders on every non-approving review, so \`approved: true\` suppresses it). Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing. Pick the tier the author's actual next action justifies.
|
|
727
845
|
|
|
@@ -730,25 +848,25 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
730
848
|
- **must-address non-critical findings** (real consequences if shipped \u2014 incorrect behavior in non-critical paths, missing validation on user input, regressions the author should fix before merge):
|
|
731
849
|
\`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> ...\`, followed by the PR summary. Reserve this tier for findings with concrete fallout \u2014 do NOT use \`[!IMPORTANT]\` for nits, style preferences, or "consider also" suggestions. Include all inline comments via \`comments\`.
|
|
732
850
|
- **minor suggestions only** (single-line nits, doc/comment polish, defer-able observations, "rough edges"):
|
|
733
|
-
\`approved: false\`.
|
|
851
|
+
\`approved: false\`. Body opens with \`> \u2139\uFE0F No critical issues \u2014 minor suggestions inline.\\n\\n\` followed by the PR summary. Include all inline comments via \`comments\`. Vary the wording after the emoji to fit the review (e.g. "Minor suggestions only.", "Two rough edges worth a look."), but always keep the \u2139\uFE0F prefix and keep it short.
|
|
734
852
|
- **informational observations** (mergeable as-is, nothing actionable \u2014 e.g. prior feedback addressed cleanly, surfacing a minor stale doc reference, calling out something noteworthy without recommending a change):
|
|
735
|
-
\`approved: true\`. Body opens with \`>
|
|
853
|
+
\`approved: true\`. Body opens with \`> \u2705 No new issues found.\\n\\n\` followed by the PR summary. Do NOT include inline \`comments\` \u2014 the \u2705 signals "no action needed", which contradicts an actionable anchor; if a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead.
|
|
736
854
|
- **no actionable issues**:
|
|
737
|
-
\`approved: true\`. Body opens with
|
|
855
|
+
\`approved: true\`. Body opens with \`> \u2705 No new issues found.\\n\\n\` followed by the PR summary.
|
|
738
856
|
|
|
739
857
|
${PR_SUMMARY_FORMAT}`
|
|
740
858
|
},
|
|
741
|
-
// IncrementalReview shares Review's 0-or-2+ lens pattern
|
|
742
|
-
//
|
|
743
|
-
//
|
|
744
|
-
//
|
|
745
|
-
// subagents matches the canonical anneal
|
|
746
|
-
// pre-existing failures — don't flag these"
|
|
747
|
-
// regressions the new commits amplified.
|
|
748
|
-
//
|
|
749
|
-
//
|
|
750
|
-
//
|
|
751
|
-
//
|
|
859
|
+
// IncrementalReview shares Review's 0-or-2+ lens pattern AND its body
|
|
860
|
+
// format (PR_SUMMARY_FORMAT), scoped to the incremental delta against the
|
|
861
|
+
// prior pullfrog review. The "issues must be NEW since the last Pullfrog
|
|
862
|
+
// review" filter lives at aggregation time (step 8), NOT in the subagent
|
|
863
|
+
// prompt — pushing the filter into subagents matches the canonical anneal
|
|
864
|
+
// anti-pattern of "list known pre-existing failures — don't flag these"
|
|
865
|
+
// and suppresses signal on regressions the new commits amplified. A
|
|
866
|
+
// separate "Prior review feedback" checklist would duplicate the rolling
|
|
867
|
+
// PR summary snapshot's record of what earlier runs already addressed and
|
|
868
|
+
// add noise to the user-facing body. Same opening-callout + per-bullet
|
|
869
|
+
// emoji severity split as Review.
|
|
752
870
|
{
|
|
753
871
|
name: "IncrementalReview",
|
|
754
872
|
description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
|
|
@@ -760,7 +878,15 @@ ${PR_SUMMARY_FORMAT}`
|
|
|
760
878
|
|
|
761
879
|
3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
|
|
762
880
|
|
|
763
|
-
4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}
|
|
881
|
+
4. **prior feedback \u2014 read AND retire it**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`, then call \`${t("get_review_comments")}\` on each prior Pullfrog review. Each thread renders as a section whose first line is a fenced tag \`comment author=<login> id=<fullDatabaseId> review=<reviewId> thread=<graphqlId>\`; section headers carry \`[RESOLVED]\` / \`[OUTDATED]\` when relevant. For every **open, Pullfrog-originated** thread, decide and act:
|
|
882
|
+
|
|
883
|
+
- **Pullfrog-originated** means the FIRST \`comment author=...\` tag in the section is \`author=pullfrog[bot]\`. The \`*\` marker on individual comments is unrelated \u2014 it flags whether a comment belongs to the queried review, not whether it is the thread root.
|
|
884
|
+
- **addressed?** read the file at the thread's anchor and judge whether the substantive concern is now resolved by the new commits. Lines being modified isn't enough: reformatting, renaming, or moving the same code elsewhere doesn't address a concern. If the comment raised multiple distinct concerns, ALL must be addressed. The \`[OUTDATED]\` tag means GitHub moved the anchor (line shift, force-push, rename) \u2014 it does NOT mean the concern was addressed; re-read the code at its new location before deciding.
|
|
885
|
+
- **if addressed**: call \`${t("reply_to_review_comment")}\` with the root tag's numeric \`id=\` as \`comment_id\` (NOT the \`thread=\` value \u2014 that's a separate GraphQL ID used only by resolve) and a one-line body (e.g. \`Addressed in <short-sha>.\`), then call \`${t("resolve_review_thread")}\` with the root tag's \`thread=\` value as \`thread_id\`. Do this BEFORE drafting the new review so the GitHub thread state aligns with the new review by the time it lands.
|
|
886
|
+
- **if uncertain or partially addressed**: leave open. False-positive resolutions erode trust faster than false negatives.
|
|
887
|
+
- **scope**: only retire Pullfrog-originated threads. Threads from human reviewers belong to those humans to resolve, even if the commit happened to address them.
|
|
888
|
+
|
|
889
|
+
The remaining open threads feed step 8's dedup filter \u2014 anything already flagged and unchanged by the new commits should not be re-raised. The rolling PR summary snapshot is the durable record of retire activity; you don't need to surface it in the review body.
|
|
764
890
|
|
|
765
891
|
5. **triage**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces. pull as much context as you need to render a confident review: read related files, grep for callers of changed symbols, check tests that exercise the touched paths. **you are the synthesizer.**
|
|
766
892
|
|
|
@@ -806,22 +932,28 @@ ${PR_SUMMARY_FORMAT}`
|
|
|
806
932
|
- do NOT pre-shape their output with a finding schema
|
|
807
933
|
- do NOT mention the other lenses (independence is the point)
|
|
808
934
|
|
|
809
|
-
8. **aggregate, draft, self-critique**: merge findings (yours + any subagent output if you went multi-lens); de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review.
|
|
935
|
+
8. **aggregate, draft, self-critique**: merge findings (yours + any subagent output if you went multi-lens); de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review.
|
|
936
|
+
|
|
937
|
+
**Hunt for non-anchored concerns before drafting.** After collecting your anchored findings, deliberately scan for concerns that have no specific line to point at \u2014 typically: deletion / cleanup plans for code the new commits replace or shadow; rollout sequencing (what happens to in-flight state during deploy / revert?); coverage gaps the new commits imply but don't add; scope questions that only the human can answer (e.g. is the legacy path going away or is this a long-term dual track?); architectural risks the new commits open up that aren't a single-line bug. On substantial incremental diffs (migrations, refactors, multi-file rewrites, version bumps that change runtime semantics), at least one such concern almost always exists; if you can't think of any, your bar is probably too high.
|
|
938
|
+
|
|
939
|
+
draft inline comments with NEW line numbers from the full PR diff \u2014 attach a \`<details>Technical details</details>\` block to any inline comment whose fix is non-trivial or has cross-file implications (see Inline technical details in the format below). every comment must be actionable, 2-3 sentences max in the visible part.
|
|
810
940
|
|
|
811
|
-
9. **build the review body
|
|
941
|
+
9. **build the review body**: use the same default format as Review mode (preamble + optional cross-cutting \`### \` sections + optional \`### \u2139\uFE0F Nitpicks\`) \u2014 scoped to the **incremental delta**, not the full PR. The "Reviewed changes" bullets describe what changed since the prior pullfrog review (each bullet starts with a past-tense verb, e.g. \`- Extracted shared CLI runtime into a single module\`). Do NOT include a separate "Prior review feedback" checklist \u2014 that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). In some cases you may receive a complete diff for the whole PR instead of an incremental one; when this happens, determine what changed since Pullfrog's most recent review yourself before drafting bullets.
|
|
812
942
|
|
|
813
943
|
10. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
|
|
814
944
|
|
|
815
|
-
Same callout
|
|
945
|
+
Same callout ladder as Review mode \u2014 \`[!CAUTION]\` (red, "will break") \u2192 \`[!IMPORTANT]\` (purple, "must address before merging") \u2192 \`> \u2139\uFE0F ...\` (informational, "minor suggestions only") \u2192 \`> \u2705 ...\` (green friendly, "no concerns"). Same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
|
|
816
946
|
|
|
817
947
|
Follow these rules:
|
|
818
948
|
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
819
949
|
- IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Instead call \`${t("report_progress")}\` with a 1-2 sentence note explaining no review was warranted (e.g. "No new issues. Changes since last review are formatting-only."). this leaves a visible signal that the run completed.
|
|
820
|
-
- ELSE IF NEW CRITICAL ISSUES (blocks merge \u2014 bugs, security, data loss, broken core flows): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!CAUTION]\\n> This PR introduces ...\`,
|
|
821
|
-
- ELSE IF NEW MUST-ADDRESS NON-CRITICAL FINDINGS (real consequences if shipped \u2014 incorrect behavior, missing validation, regressions the author should fix before merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\`,
|
|
822
|
-
- ELSE IF NEW MINOR SUGGESTIONS ONLY (single-line nits, doc/comment polish, defer-able observations, "rough edges"): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens
|
|
823
|
-
- ELSE IF INFORMATIONAL OBSERVATIONS (mergeable as-is, but worth surfacing \u2014 e.g. prior feedback addressed cleanly with one minor stale doc reference, or a noteworthy positive observation): call \`${t("create_pull_request_review")}\` with \`approved: true\`, NO inline comments, and the review body. body opens with \`>
|
|
824
|
-
- ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered,
|
|
950
|
+
- ELSE IF NEW CRITICAL ISSUES (blocks merge \u2014 bugs, security, data loss, broken core flows): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary using the default format below.
|
|
951
|
+
- ELSE IF NEW MUST-ADDRESS NON-CRITICAL FINDINGS (real consequences if shipped \u2014 incorrect behavior, missing validation, regressions the author should fix before merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\`, followed by the PR summary using the default format below. Do NOT use this tier for nits, style preferences, or "consider also" suggestions.
|
|
952
|
+
- ELSE IF NEW MINOR SUGGESTIONS ONLY (single-line nits, doc/comment polish, defer-able observations, "rough edges"): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> \u2139\uFE0F No critical issues \u2014 minor suggestions inline.\\n\\n\` (vary the wording after \u2139\uFE0F to fit the review), followed by the PR summary using the default format below.
|
|
953
|
+
- ELSE IF INFORMATIONAL OBSERVATIONS (mergeable as-is, but worth surfacing \u2014 e.g. prior feedback addressed cleanly with one minor stale doc reference, or a noteworthy positive observation): call \`${t("create_pull_request_review")}\` with \`approved: true\`, NO inline comments, and the review body. body opens with \`> \u2705 No new issues found.\\n\\n\` (or similar friendly green opener), followed by the PR summary using the default format below. If a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead \u2014 the \u2705 signals "no action needed", which contradicts an actionable anchor.
|
|
954
|
+
- ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, set \`approved: true\`. body opens with \`> \u2705 No new issues found.\\n\\n\`, followed by the PR summary using the default format below.
|
|
955
|
+
|
|
956
|
+
${PR_SUMMARY_FORMAT}`
|
|
825
957
|
},
|
|
826
958
|
{
|
|
827
959
|
name: "Plan",
|
|
@@ -836,7 +968,7 @@ ${PR_SUMMARY_FORMAT}`
|
|
|
836
968
|
|
|
837
969
|
3. Produce a structured, actionable plan with clear milestones.
|
|
838
970
|
|
|
839
|
-
4. Call \`${t("report_progress")}\` with the plan.`
|
|
971
|
+
4. Call \`${t("report_progress")}\` with the plan body. Do NOT set \`target_plan_comment\` \u2014 that flag is exclusively for revising an existing plan, and \`${t("select_mode")}\` will route you to a separate PlanEdit checklist when a prior plan comment exists for this issue.`
|
|
840
972
|
},
|
|
841
973
|
{
|
|
842
974
|
name: "Fix",
|
|
@@ -917,14 +1049,25 @@ var modes = computeModes("opencode");
|
|
|
917
1049
|
// utils/buildPullfrogFooter.ts
|
|
918
1050
|
var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
|
|
919
1051
|
var FROG_LOGO = `<a href="https://pullfrog.com"><picture><source media="(prefers-color-scheme: dark)" srcset="https://pullfrog.com/logos/frog-white-full-18px.png"><img src="https://pullfrog.com/logos/frog-green-full-18px.png" width="9px" height="9px" style="vertical-align: middle; " alt="Pullfrog"></picture></a>`;
|
|
920
|
-
function
|
|
921
|
-
|
|
1052
|
+
function providerDisplayName(slug) {
|
|
1053
|
+
try {
|
|
1054
|
+
const key = getModelProvider(slug);
|
|
1055
|
+
const meta = providers[key];
|
|
1056
|
+
return meta?.displayName ?? key;
|
|
1057
|
+
} catch {
|
|
1058
|
+
return slug;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
function formatModelLabel(params) {
|
|
1062
|
+
const alias = resolveDisplayAlias(params.model) ?? // reverse-lookup: when the caller passes an effective model (proxy or
|
|
922
1063
|
// resolved target like "openrouter/anthropic/claude-opus-4.7") instead of
|
|
923
1064
|
// a stored alias slug, find the alias whose resolve target matches so we
|
|
924
1065
|
// still render a friendly display name.
|
|
925
|
-
modelAliases.find((a) => a.resolve ===
|
|
926
|
-
|
|
927
|
-
|
|
1066
|
+
modelAliases.find((a) => a.resolve === params.model || a.openRouterResolve === params.model);
|
|
1067
|
+
const displayName = alias?.displayName ?? params.model;
|
|
1068
|
+
const base = alias?.isFree ? `\`${displayName}\` (free)` : `\`${displayName}\``;
|
|
1069
|
+
if (!params.fallbackFrom) return base;
|
|
1070
|
+
return `${base} (credentials for ${providerDisplayName(params.fallbackFrom)} not configured)`;
|
|
928
1071
|
}
|
|
929
1072
|
function buildPullfrogFooter(params) {
|
|
930
1073
|
const parts = [];
|
|
@@ -942,7 +1085,9 @@ function buildPullfrogFooter(params) {
|
|
|
942
1085
|
parts.push("via [Pullfrog](https://pullfrog.com)");
|
|
943
1086
|
}
|
|
944
1087
|
if (params.model) {
|
|
945
|
-
parts.push(
|
|
1088
|
+
parts.push(
|
|
1089
|
+
`Using ${formatModelLabel({ model: params.model, fallbackFrom: params.fallbackFrom })}`
|
|
1090
|
+
);
|
|
946
1091
|
}
|
|
947
1092
|
const allParts = [...parts, "[\u{1D54F}](https://x.com/pullfrogai)"];
|
|
948
1093
|
return `
|
|
@@ -966,6 +1111,18 @@ function isLeapingIntoActionCommentBody(body) {
|
|
|
966
1111
|
return new RegExp(`(^|\\s)${LEAPING_INTO_ACTION_PREFIX}(\\.\\.\\.)?$`).test(firstLine);
|
|
967
1112
|
}
|
|
968
1113
|
|
|
1114
|
+
// utils/learningsTruncate.ts
|
|
1115
|
+
var MAX_LEARNINGS_LENGTH = 1e5;
|
|
1116
|
+
var TRUNCATION_LINE_BOUNDARY_TOLERANCE = 4096;
|
|
1117
|
+
function truncateAtLineBoundary(body, cap) {
|
|
1118
|
+
if (body.length <= cap) return body;
|
|
1119
|
+
const head = body.slice(0, cap);
|
|
1120
|
+
const lastNewline = head.lastIndexOf("\n");
|
|
1121
|
+
if (lastNewline <= 0) return head;
|
|
1122
|
+
if (cap - lastNewline > TRUNCATION_LINE_BOUNDARY_TOLERANCE) return head;
|
|
1123
|
+
return head.slice(0, lastNewline);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
969
1126
|
// utils/progressComment.ts
|
|
970
1127
|
async function getProgressComment(ctx, comment) {
|
|
971
1128
|
const result = await (comment.type === "review" ? ctx.octokit.rest.pulls.getReviewComment({
|
|
@@ -1079,12 +1236,14 @@ function isValidTimeString(input) {
|
|
|
1079
1236
|
}
|
|
1080
1237
|
export {
|
|
1081
1238
|
LEAPING_INTO_ACTION_PREFIX,
|
|
1239
|
+
MAX_LEARNINGS_LENGTH,
|
|
1082
1240
|
PULLFROG_DIVIDER,
|
|
1083
1241
|
TIMEOUT_DISABLED,
|
|
1084
1242
|
buildPullfrogFooter,
|
|
1085
1243
|
createLeapingProgressComment,
|
|
1086
1244
|
deleteProgressCommentApi,
|
|
1087
1245
|
getModelEnvVars,
|
|
1246
|
+
getModelManagedCredentials,
|
|
1088
1247
|
getModelProvider,
|
|
1089
1248
|
getProgressComment,
|
|
1090
1249
|
getProviderDisplayName,
|
|
@@ -1101,5 +1260,6 @@ export {
|
|
|
1101
1260
|
resolveModelSlug,
|
|
1102
1261
|
resolveOpenRouterModel,
|
|
1103
1262
|
stripExistingFooter,
|
|
1263
|
+
truncateAtLineBoundary,
|
|
1104
1264
|
updateProgressComment
|
|
1105
1265
|
};
|
package/dist/mcp/shell.d.ts
CHANGED
|
@@ -9,6 +9,11 @@ export declare const ShellParams: import("arktype/internal/variants/object.ts").
|
|
|
9
9
|
export type SandboxMethod = "unshare" | "sudo-unshare" | "none";
|
|
10
10
|
/** get the current sandbox method (for testing/diagnostics) */
|
|
11
11
|
export declare function getSandboxMethod(): SandboxMethod;
|
|
12
|
+
/** chars of shell output kept inline in the agent reply. anything past this
|
|
13
|
+
* blows the agent's context budget on commands that dump big logs (test
|
|
14
|
+
* runners, build tools, grep on large trees), so the overflow is spilled
|
|
15
|
+
* to a tempfile the agent can re-read selectively (cat/tail/grep). */
|
|
16
|
+
export declare const MAX_OUTPUT_CHARS = 5000;
|
|
12
17
|
export declare function ShellTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
|
|
13
18
|
command: string;
|
|
14
19
|
description: string;
|