pullfrog 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/internal.js CHANGED
@@ -471,18 +471,20 @@ function computeModes(agentId) {
471
471
  description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
472
472
  prompt: `### Checklist
473
473
 
474
- 1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
474
+ 1. **task list**: create your task list for this run as your first action.
475
475
 
476
- 2. **setup**: checkout or create the branch:
476
+ 2. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
477
+
478
+ 3. **setup**: checkout or create the branch:
477
479
  - **PR event, modifying the existing PR**: call \`${t("checkout_pr")}\`
478
480
  - **new branch**: use \`${t("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
479
481
 
480
- 3. **build**: implement changes using your native file and shell tools:
482
+ 4. **build**: implement changes using your native file and shell tools:
481
483
  - follow the plan (if you ran a plan phase)
482
484
  - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
483
485
  - run relevant tests/lints before committing
484
486
 
485
- 4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
487
+ 5. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
486
488
 
487
489
  Skip self-review (commit directly) when the diff is **genuinely trivial**:
488
490
  - doc typos, comment-only edits, whitespace/format-only, import reordering
@@ -513,7 +515,7 @@ function computeModes(agentId) {
513
515
 
514
516
  Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
515
517
 
516
- 5. **finalize**:
518
+ 6. **finalize**:
517
519
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
518
520
  - create a PR via \`${t("create_pull_request")}\`
519
521
  - call \`${t("report_progress")}\` with the PR link or the exact error if push/PR failed
@@ -527,23 +529,25 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
527
529
  description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
528
530
  prompt: `### Checklist
529
531
 
530
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
532
+ 1. **task list**: create your task list for this run as your first action.
533
+
534
+ 2. Checkout the PR branch via \`${t("checkout_pr")}\`.
531
535
 
532
- 2. Fetch review comments via \`${t("get_review_comments")}\`.
536
+ 3. Fetch review comments via \`${t("get_review_comments")}\`.
533
537
 
534
- 3. For each comment:
538
+ 4. For each comment:
535
539
  - understand the feedback
536
540
  - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
537
541
  - if the request stands, make the code change using your native tools; otherwise reply explaining why
538
542
  - record what was done (or why nothing was done)
539
543
 
540
- 4. Quality check:
544
+ 5. Quality check:
541
545
  - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
542
546
  - commit locally via shell (\`git add . && git commit -m "..."\`)
543
547
 
544
- 5. Finalize:
548
+ 6. Finalize:
545
549
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
546
- - reply to each comment using \`${t("reply_to_review_comment")}\`
550
+ - reply to each comment **exactly once** using \`${t("reply_to_review_comment")}\` \u2014 do not re-emit the same call (the runtime dedupes identical bodies and the second call is wasted)
547
551
  - resolve addressed threads via \`${t("resolve_review_thread")}\`
548
552
  - call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)`
549
553
  },
@@ -564,11 +568,13 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
564
568
  description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
565
569
  prompt: `### Checklist
566
570
 
567
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
571
+ 1. **task list**: create your task list for this run as your first action.
568
572
 
569
- 2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
573
+ 2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
570
574
 
571
- if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
575
+ 3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
576
+
577
+ if the PR is **genuinely trivial**, skip steps 4\u20135 entirely and submit a \`No new issues found.\` review per step 6. there's no value in dispatching even one lens for a typo.
572
578
 
573
579
  "Genuinely trivial" (skip):
574
580
  - single-word doc typo, whitespace/format-only, comment-only across any number of files
@@ -613,7 +619,7 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
613
619
  - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
614
620
  - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
615
621
 
616
- 3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
622
+ 4. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
617
623
  - the diff path / target \u2014 reading the diff and the codebase is its job
618
624
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
619
625
  - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
@@ -628,20 +634,33 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
628
634
  - do NOT pre-shape their output with a finding schema
629
635
  - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
630
636
 
631
- 4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
637
+ 5. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
632
638
 
633
639
  for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
634
640
 
635
- 5. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
641
+ 6. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
636
642
 
637
643
  note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
638
644
 
639
645
  The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
640
646
 
641
- - **critical issues** (blocks merge \u2014 bugs, security, data loss):
647
+ GitHub alert blockquotes render at four visual intensities \u2014 the callout is what the author sees first, so pick the one that matches what you want them to do:
648
+
649
+ - \`[!CAUTION]\` \u2014 large red banner. Reads as "this will break something."
650
+ - \`[!IMPORTANT]\` \u2014 large purple banner. Reads as "you need to look at this before merging."
651
+ - \`[!NOTE]\` \u2014 small blue inline callout. Reads as "FYI, here's something worth noting."
652
+ - no callout \u2014 plain text. Reads as routine review output.
653
+
654
+ Two reinforcing levers: callout intensity (above) and \`approved\` (which gates the footer Fix-button affordance \u2014 Fix renders on every non-approving review, so \`approved: true\` suppresses it). Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing. Pick the tier the author's actual next action justifies.
655
+
656
+ - **critical issues** (blocks merge \u2014 bugs, security, data loss, broken core flows):
642
657
  \`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
643
- - **recommended changes** (non-critical):
644
- \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
658
+ - **must-address non-critical findings** (real consequences if shipped \u2014 incorrect behavior in non-critical paths, missing validation on user input, regressions the author should fix before merge):
659
+ \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> ...\`, followed by the PR summary. Reserve this tier for findings with concrete fallout \u2014 do NOT use \`[!IMPORTANT]\` for nits, style preferences, or "consider also" suggestions. Include all inline comments via \`comments\`.
660
+ - **minor suggestions only** (single-line nits, doc/comment polish, defer-able observations, "rough edges"):
661
+ \`approved: false\`. NO alert blockquote. Body opens directly with the PR summary. Include all inline comments via \`comments\`.
662
+ - **informational observations** (mergeable as-is, nothing actionable \u2014 e.g. prior feedback addressed cleanly, surfacing a minor stale doc reference, calling out something noteworthy without recommending a change):
663
+ \`approved: true\`. Body opens with \`> [!NOTE]\\n> ...\`, followed by the PR summary. Do NOT include inline \`comments\` \u2014 \`[!NOTE]\` signals "no action needed", which contradicts an actionable anchor; if a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead.
645
664
  - **no actionable issues**:
646
665
  \`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
647
666
 
@@ -650,7 +669,7 @@ ${PR_SUMMARY_FORMAT}`
650
669
  // IncrementalReview shares Review's multi-lens orchestrator pattern but
651
670
  // scopes the target to the incremental diff. The "issues must be NEW
652
671
  // since the last Pullfrog review" filter lives at aggregation time
653
- // (step 5), NOT in the subagent prompt — pushing the filter into
672
+ // (step 6), NOT in the subagent prompt — pushing the filter into
654
673
  // subagents matches the canonical anneal anti-pattern of "list known
655
674
  // pre-existing failures — don't flag these" and suppresses signal on
656
675
  // regressions the new commits amplified. The review body is just
@@ -663,15 +682,17 @@ ${PR_SUMMARY_FORMAT}`
663
682
  description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
664
683
  prompt: `### Checklist
665
684
 
666
- 1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
685
+ 1. **task list**: create your task list for this run as your first action.
686
+
687
+ 2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
667
688
 
668
- 2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
689
+ 3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
669
690
 
670
- 3. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
691
+ 4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 6 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
671
692
 
672
- 4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
693
+ 5. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
673
694
 
674
- if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
695
+ if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 8's non-substantive path (do NOT submit a review).
675
696
 
676
697
  "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
677
698
  "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
@@ -679,8 +700,8 @@ ${PR_SUMMARY_FORMAT}`
679
700
 
680
701
  otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
681
702
 
682
- dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
683
- - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
703
+ dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 5 entirely on a single subagent failure. each subagent gets:
704
+ - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 6), not in the subagent prompt
684
705
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
685
706
  - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
686
707
  - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
@@ -694,15 +715,21 @@ ${PR_SUMMARY_FORMAT}`
694
715
  - do NOT pre-shape their output with a finding schema
695
716
  - do NOT mention the other lenses (independence is the point)
696
717
 
697
- 5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
718
+ 6. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
698
719
 
699
- 6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
720
+ 7. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
700
721
 
701
- 7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
722
+ 8. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
723
+
724
+ Same callout-intensity ladder as Review mode \u2014 \`[!CAUTION]\` (large red, "will break") \u2192 \`[!IMPORTANT]\` (large purple, "must address before merging") \u2192 \`[!NOTE]\` (small blue, "FYI") \u2192 no callout (plain text). And the same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
725
+
726
+ Follow these rules:
702
727
  - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
703
- - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
704
- - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
705
- - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
728
+ - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Instead call \`${t("report_progress")}\` with a 1-2 sentence note explaining no review was warranted (e.g. "No new issues. Changes since last review are formatting-only."). this leaves a visible signal that the run completed.
729
+ - ELSE IF NEW CRITICAL ISSUES (blocks merge \u2014 bugs, security, data loss, broken core flows): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, then the Reviewed-changes summary.
730
+ - ELSE IF NEW MUST-ADDRESS NON-CRITICAL FINDINGS (real consequences if shipped \u2014 incorrect behavior, missing validation, regressions the author should fix before merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\`, then the Reviewed-changes summary. Do NOT use this tier for nits, style preferences, or "consider also" suggestions.
731
+ - ELSE IF NEW MINOR SUGGESTIONS ONLY (single-line nits, doc/comment polish, defer-able observations, "rough edges"): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens directly with \`Reviewed the following changes:\\n\` (NO alert blockquote), then the Reviewed-changes summary.
732
+ - ELSE IF INFORMATIONAL OBSERVATIONS (mergeable as-is, but worth surfacing \u2014 e.g. prior feedback addressed cleanly with one minor stale doc reference, or a noteworthy positive observation): call \`${t("create_pull_request_review")}\` with \`approved: true\`, NO inline comments, and the review body. body opens with \`> [!NOTE]\\n> ...\` alert, then the Reviewed-changes summary. If a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead \u2014 \`[!NOTE]\` and inline comments don't mix.
706
733
  - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
707
734
  },
708
735
  {
@@ -710,33 +737,37 @@ ${PR_SUMMARY_FORMAT}`
710
737
  description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
711
738
  prompt: `### Checklist
712
739
 
713
- 1. Analyze the task and gather context:
740
+ 1. **task list**: create your task list for this run as your first action.
741
+
742
+ 2. Analyze the task and gather context:
714
743
  - read AGENTS.md and relevant codebase files
715
744
  - understand the architecture and constraints
716
745
 
717
- 2. Produce a structured, actionable plan with clear milestones.
746
+ 3. Produce a structured, actionable plan with clear milestones.
718
747
 
719
- 3. Call \`${t("report_progress")}\` with the plan.`
748
+ 4. Call \`${t("report_progress")}\` with the plan.`
720
749
  },
721
750
  {
722
751
  name: "Fix",
723
752
  description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
724
753
  prompt: `### Checklist
725
754
 
726
- 1. Checkout the PR branch via \`${t("checkout_pr")}\`.
755
+ 1. **task list**: create your task list for this run as your first action.
727
756
 
728
- 2. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
757
+ 2. Checkout the PR branch via \`${t("checkout_pr")}\`.
729
758
 
730
- 3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
759
+ 3. Fetch check suite logs via \`${t("get_check_suite_logs")}\`.
731
760
 
732
- 4. Diagnose and fix:
761
+ 4. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
762
+
763
+ 5. Diagnose and fix:
733
764
  - read the workflow file, reproduce locally with the EXACT same commands CI runs
734
765
  - fix the issue using your native file and shell tools
735
766
  - verify the fix by re-running the exact CI command
736
767
  - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
737
768
  - commit locally via shell (\`git add . && git commit -m "..."\`)
738
769
 
739
- 5. Finalize:
770
+ 6. Finalize:
740
771
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
741
772
  - call \`${t("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)`
742
773
  },
@@ -745,22 +776,24 @@ ${PR_SUMMARY_FORMAT}`
745
776
  description: "Resolve merge conflicts in a PR branch against the base branch",
746
777
  prompt: `### Checklist
747
778
 
748
- 1. **Setup**:
779
+ 1. **task list**: create your task list for this run as your first action.
780
+
781
+ 2. **Setup**:
749
782
  - Call \`${t("checkout_pr")}\` to get the PR branch.
750
783
  - Call \`${t("get_pull_request")}\` to identify the base branch (e.g., 'main').
751
784
  - Call \`${t("git_fetch")}\` to fetch the base branch.
752
785
 
753
- 2. **Merge Attempt**:
786
+ 3. **Merge Attempt**:
754
787
  - Run \`git merge origin/<base_branch>\` via shell.
755
- - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
756
- - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
788
+ - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 4\u20135.**
789
+ - If it fails (conflicts), resolve them manually (continue to steps 4\u20135).
757
790
 
758
- 3. **Resolve Conflicts**:
791
+ 4. **Resolve Conflicts**:
759
792
  - Run \`git status\` or parse the merge output to find the list of conflicting files.
760
793
  - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
761
794
  - Verify the file syntax is correct after resolution.
762
795
 
763
- 4. **Finalize**:
796
+ 5. **Finalize**:
764
797
  - Run a final verification (build/test) to ensure the resolution works.
765
798
  - \`git add . && git commit -m "resolve merge conflicts"\`
766
799
  - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
@@ -771,15 +804,17 @@ ${PR_SUMMARY_FORMAT}`
771
804
  description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
772
805
  prompt: `### Checklist
773
806
 
774
- 1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
807
+ 1. **task list**: create your task list for this run as your first action.
808
+
809
+ 2. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
775
810
 
776
- 2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
811
+ 3. For substantial work \u2014 code changes across multiple files, multi-step investigations:
777
812
  - plan your approach before starting
778
813
  - use native file and shell tools for local operations
779
814
  - use ${pullfrogMcpName} MCP tools for GitHub/git operations
780
815
  - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
781
816
 
782
- 3. Finalize:
817
+ 4. Finalize:
783
818
  - if code changes were made, push to a pull request (new or existing) using \`${t("push_branch")}\` and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
784
819
  - call \`${t("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
785
820
  - if the task involved labeling, commenting, or other GitHub operations, perform those directly`
@@ -71,6 +71,41 @@ export declare const ReplyToReviewComment: import("arktype/internal/variants/obj
71
71
  comment_id: number;
72
72
  body: string;
73
73
  }, {}>;
74
+ /**
75
+ * decision returned by `duplicateReplyDecision` when a session has already
76
+ * posted an identical reply to the same parent review comment.
77
+ */
78
+ export interface DuplicateReplyDecision {
79
+ kind: "already-replied";
80
+ commentId: number;
81
+ url: string | undefined;
82
+ reason: string;
83
+ }
84
+ /**
85
+ * decide whether a second reply_to_review_comment call in the same session
86
+ * is a duplicate of an earlier reply to the same parent comment.
87
+ *
88
+ * the agent is instructed to call reply_to_review_comment exactly once per
89
+ * parent comment per AddressReviews session, but in practice it sometimes
90
+ * emits the same call twice. PR #610 reproduced this with Kimi K2:
91
+ * identical body posted 3 seconds apart, only one tool_use event in the
92
+ * agent log. the second post is always redundant and clutters the PR thread.
93
+ *
94
+ * we key on (comment_id, bodyWithFooter) so a legitimate follow-up reply
95
+ * with different content still goes through. within a single run the
96
+ * footer is constant (workflow run + model + jobId), so byte-equal bodies
97
+ * catch the stutter without blocking real follow-ups.
98
+ *
99
+ * mirrors the shape of `duplicateReviewDecision` in mcp/review.ts.
100
+ */
101
+ export declare function duplicateReplyDecision(params: {
102
+ existing: {
103
+ commentId: number;
104
+ url: string | undefined;
105
+ bodyWithFooter: string;
106
+ } | undefined;
107
+ bodyWithFooter: string;
108
+ }): DuplicateReplyDecision | null;
74
109
  export declare function ReplyToReviewCommentTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
75
110
  pull_number: number;
76
111
  comment_id: number;
@@ -1,5 +1,7 @@
1
1
  import type { RestEndpointMethodTypes } from "@octokit/rest";
2
+ import type { CommentableLines } from "../toolState.ts";
2
3
  import type { ToolContext } from "./server.ts";
4
+ export type { CommentableLines };
3
5
  /**
4
6
  * detect GitHub's generic server-side 422 ("An internal error occurred,
5
7
  * please try again.") that sometimes fires on `POST /pulls/{n}/reviews`.
@@ -13,10 +15,6 @@ import type { ToolContext } from "./server.ts";
13
15
  */
14
16
  export declare function isTransientReviewError(err: unknown): boolean;
15
17
  export declare const TRANSIENT_REVIEW_RETRY_DELAYS_MS: number[];
16
- export type CommentableLines = {
17
- RIGHT: Set<number>;
18
- LEFT: Set<number>;
19
- };
20
18
  /**
21
19
  * parse a PR file's patch to determine which line numbers on each side are
22
20
  * valid anchors for inline comments. GitHub only accepts comments on lines
@@ -1,81 +1,11 @@
1
1
  import "./arkConfig.ts";
2
- import type { AgentUsage } from "../agents/index.ts";
3
2
  import { type AgentId } from "../external.ts";
4
3
  import type { Mode } from "../modes.ts";
5
- import type { PrepResult } from "../prep/index.ts";
6
- import type { DiffCoverageState } from "../utils/diffCoverage.ts";
4
+ import type { ToolState } from "../toolState.ts";
7
5
  import type { OctokitWithPlugins } from "../utils/github.ts";
8
6
  import type { ResolvedPayload } from "../utils/payload.ts";
9
- import { type ProgressComment, type ProgressCommentType } from "../utils/progressComment.ts";
10
7
  import type { AccountPlan } from "../utils/runContext.ts";
11
8
  import type { RunContextData } from "../utils/runContextData.ts";
12
- import type { TodoTracker } from "../utils/todoTracking.ts";
13
- import type { CommentableLines } from "./review.ts";
14
- export type BackgroundProcess = {
15
- pid: number;
16
- outputPath: string;
17
- pidPath: string;
18
- };
19
- export type BrowserDaemon = {
20
- binDir: string;
21
- error?: never;
22
- } | {
23
- binDir?: never;
24
- error: string;
25
- };
26
- export type StoredPushDest = {
27
- remoteName: string;
28
- remoteBranch: string;
29
- localBranch: string;
30
- };
31
- export interface ToolState {
32
- pushUrl?: string;
33
- pushDest?: StoredPushDest;
34
- issueNumber?: number;
35
- checkoutSha?: string;
36
- commentableLinesByFile?: Map<string, CommentableLines>;
37
- commentableLinesPullNumber?: number;
38
- commentableLinesCheckoutSha?: string | undefined;
39
- beforeSha?: string;
40
- selectedMode?: string;
41
- backgroundProcesses: Map<string, BackgroundProcess>;
42
- browserDaemon?: BrowserDaemon | undefined;
43
- review?: {
44
- id: number;
45
- nodeId: string;
46
- reviewedSha: string | undefined;
47
- };
48
- dependencyInstallation?: {
49
- status: "not_started" | "in_progress" | "completed" | "failed";
50
- promise: Promise<PrepResult[]> | undefined;
51
- results: PrepResult[] | undefined;
52
- };
53
- progressComment: ProgressComment | null | undefined;
54
- hadProgressComment: boolean;
55
- lastProgressBody?: string;
56
- wasUpdated?: boolean;
57
- finalSummaryWritten?: boolean;
58
- existingPlanCommentId?: number;
59
- previousPlanBody?: string;
60
- summaryFilePath?: string;
61
- summarySeed?: string;
62
- summaryPersistAttempted?: boolean;
63
- learningsFilePath?: string;
64
- learningsSeed?: string;
65
- learningsPersistAttempted?: boolean;
66
- output?: string;
67
- usageEntries: AgentUsage[];
68
- model?: string | undefined;
69
- todoTracker?: TodoTracker | undefined;
70
- diffCoverage?: DiffCoverageState | undefined;
71
- }
72
- interface InitToolStateParams {
73
- progressComment: {
74
- id: string;
75
- type: ProgressCommentType;
76
- } | undefined;
77
- }
78
- export declare function initToolState(params: InitToolStateParams): ToolState;
79
9
  export interface ToolContext {
80
10
  agentId: AgentId;
81
11
  repo: RunContextData["repo"];
package/dist/modes.d.ts CHANGED
@@ -7,3 +7,13 @@ export interface Mode {
7
7
  export declare const PR_SUMMARY_FORMAT = "### Default format\n\nFollow this structure exactly:\n\n<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.\nNOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.\n\n### Key changes\n\n- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add `TodoTracker` for live checklists**). A reviewer should understand the full PR from this list alone.\n\n<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: `{base}` \u2190 `{head}`</sub>\nNOTE: the metadata line goes AFTER the bullet list, not before it.\n\nThen for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).\n\n<br/>\n\n## Example readable section title\n\n> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]\nIMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate `>` lines creates a double line break.\n\n1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.\n\nIf a change warrants deeper explanation, use a blockquoted details/summary framed as a question:\n> <details><summary>How does X work?</summary>\n> Extended explanation here.\n> </details>\n\nEnd each section with a file links trail (3-4 key files max):\n[`file.ts`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \u00B7 ...\n\nSingle-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.\n\nCRITICAL \u2014 GitHub markdown rendering rule:\nGitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n\nRules:\n- `##` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title\n- ALL variable names, identifiers, and file names in body text must be in backticks\n- ALL file references MUST link to the PR Files Changed view. Use the `diff-<hex>` anchor precomputed next to each filename in the `checkout_pr` TOC \u2014 do NOT run `sha256sum` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the `#diff-` anchor rather than guessing.\n- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)\n- Do NOT include raw diff stats like '+123 / -45' or line counts\n- Do NOT include code blocks or repeat diff contents\n- Do NOT include a changelog section \u2014 the key changes list serves this purpose\n- Focus on *intent*, not *what* \u2014 the diff already shows what changed\n- Get the file count and commit count from the checkout_pr metadata, not by counting manually";
8
8
  export declare function computeModes(agentId: AgentId): Mode[];
9
9
  export declare const modes: Mode[];
10
+ /**
11
+ * modes that legitimately never modify the working tree. used by the post-run
12
+ * dirty-tree gate to suppress the "commit and push" nudge — those modes
13
+ * complete by submitting a review (`Review` / `IncrementalReview`) or by
14
+ * posting a Plan comment (`Plan`), not by touching files. any leftover in the
15
+ * tree at end-of-run is incidental tool noise (e.g. a `node_modules/` from a
16
+ * stray install attempt) on an ephemeral worktree; nudging the agent to
17
+ * commit it would produce a spurious PR.
18
+ */
19
+ export declare const NON_COMMITTING_MODES: ReadonlySet<string>;
@@ -0,0 +1,109 @@
1
+ import type { AgentUsage } from "./agents/shared.ts";
2
+ import type { PrepResult } from "./prep/types.ts";
3
+ import type { DiffCoverageState } from "./utils/diffCoverage.ts";
4
+ import { type ProgressComment, type ProgressCommentType } from "./utils/progressComment.ts";
5
+ import type { TodoTracker } from "./utils/todoTracking.ts";
6
+ export type BackgroundProcess = {
7
+ pid: number;
8
+ outputPath: string;
9
+ pidPath: string;
10
+ };
11
+ export type BrowserDaemon = {
12
+ binDir: string;
13
+ error?: never;
14
+ } | {
15
+ binDir?: never;
16
+ error: string;
17
+ };
18
+ export type StoredPushDest = {
19
+ remoteName: string;
20
+ remoteBranch: string;
21
+ localBranch: string;
22
+ };
23
+ /**
24
+ * Valid inline-comment anchor lines per side at a particular checkout SHA.
25
+ * Lives here (not in `mcp/review.ts`) so `ToolState` — which caches
26
+ * `Map<path, CommentableLines>` per checkout — does not pull the MCP server
27
+ * graph into every consumer of run state (the action's main loop, agent
28
+ * harnesses, cf-worker indexing).
29
+ */
30
+ export type CommentableLines = {
31
+ RIGHT: Set<number>;
32
+ LEFT: Set<number>;
33
+ };
34
+ /**
35
+ * mutable per-run record of facts that occurred during execution. shared
36
+ * between the action process and the MCP server (one process — toolState is
37
+ * just a JS object passed by reference into both surfaces).
38
+ *
39
+ * design rule: ToolState is LITERAL. each field records a thing that
40
+ * happened — `review` is set when `create_pull_request_review` succeeded,
41
+ * `finalSummaryWritten` flips when `report_progress` wrote a non-plan body,
42
+ * `selectedMode` is set when `select_mode` was called. fields should never
43
+ * encode the absence of an event ("unsubmittedReview", "missingArtifact"),
44
+ * speculative state, or values derived from other fields.
45
+ *
46
+ * any predicate the rest of the code needs ("the agent picked review mode but
47
+ * never produced a review or progress write") is computed inline at the call
48
+ * site, not stored. derived state in this struct invariably drifts from the
49
+ * literal fields under refactors and is the wrong layer for the check.
50
+ *
51
+ * write narrowly: prefer adding state inside the tool that mutates it (e.g.
52
+ * `create_pull_request_review` populates `toolState.review`) and reading
53
+ * narrowly elsewhere. don't introduce flags from main.ts that mirror what an
54
+ * MCP tool already records.
55
+ */
56
+ export interface ToolState {
57
+ pushUrl?: string;
58
+ pushDest?: StoredPushDest;
59
+ issueNumber?: number;
60
+ checkoutSha?: string;
61
+ commentableLinesByFile?: Map<string, CommentableLines>;
62
+ commentableLinesPullNumber?: number;
63
+ commentableLinesCheckoutSha?: string | undefined;
64
+ beforeSha?: string;
65
+ selectedMode?: string;
66
+ backgroundProcesses: Map<string, BackgroundProcess>;
67
+ browserDaemon?: BrowserDaemon | undefined;
68
+ review?: {
69
+ id: number;
70
+ nodeId: string;
71
+ reviewedSha: string | undefined;
72
+ };
73
+ reviewReplies?: Map<number, {
74
+ commentId: number;
75
+ url: string | undefined;
76
+ bodyWithFooter: string;
77
+ }>;
78
+ dependencyInstallation?: {
79
+ status: "not_started" | "in_progress" | "completed" | "failed";
80
+ promise: Promise<PrepResult[]> | undefined;
81
+ results: PrepResult[] | undefined;
82
+ };
83
+ progressComment: ProgressComment | null | undefined;
84
+ hadProgressComment: boolean;
85
+ lastProgressBody?: string;
86
+ wasUpdated?: boolean;
87
+ finalSummaryWritten?: boolean;
88
+ existingPlanCommentId?: number;
89
+ previousPlanBody?: string;
90
+ summaryFilePath?: string;
91
+ summarySeed?: string;
92
+ summaryPersistAttempted?: boolean;
93
+ learningsFilePath?: string;
94
+ learningsSeed?: string;
95
+ learningsPersistAttempted?: boolean;
96
+ output?: string;
97
+ usageEntries: AgentUsage[];
98
+ model?: string | undefined;
99
+ todoTracker?: TodoTracker | undefined;
100
+ diffCoverage?: DiffCoverageState | undefined;
101
+ }
102
+ interface InitToolStateParams {
103
+ progressComment: {
104
+ id: string;
105
+ type: ProgressCommentType;
106
+ } | undefined;
107
+ }
108
+ export declare function initToolState(params: InitToolStateParams): ToolState;
109
+ export {};
@@ -1,4 +1,4 @@
1
- import type { ToolState } from "../mcp/server.ts";
1
+ import type { ToolState } from "../toolState.ts";
2
2
  /**
3
3
  * ensure the agent-browser daemon is running by issuing a real command.
4
4
  *
@@ -1,4 +1,4 @@
1
- import type { ToolState } from "../mcp/server.ts";
1
+ import type { ToolState } from "../toolState.ts";
2
2
  interface ReportErrorParams {
3
3
  toolState: ToolState;
4
4
  error: string;