pullfrog 0.1.29 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/internal.js CHANGED
@@ -9,11 +9,20 @@ var providers = {
9
9
  displayName: "Anthropic",
10
10
  envVars: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"],
11
11
  models: {
12
+ // OpenRouter serves claude-fable-5, but models.dev's OpenRouter mirror
13
+ // hasn't indexed it yet (shipped 2026-06-09), so the catalog drift gate
14
+ // can't validate an openRouterResolve. omit it until the mirror catches
15
+ // up; direct BYOK / Claude Code resolves anthropic/claude-fable-5 fine.
16
+ "claude-fable": {
17
+ displayName: "Claude Fable",
18
+ resolve: "anthropic/claude-fable-5",
19
+ preferred: true,
20
+ subagentModel: "claude-sonnet"
21
+ },
12
22
  "claude-opus": {
13
23
  displayName: "Claude Opus",
14
24
  resolve: "anthropic/claude-opus-4-8",
15
25
  openRouterResolve: "openrouter/anthropic/claude-opus-4.8",
16
- preferred: true,
17
26
  subagentModel: "claude-sonnet"
18
27
  },
19
28
  "claude-sonnet": {
@@ -79,7 +88,8 @@ var providers = {
79
88
  },
80
89
  o3: {
81
90
  displayName: "O3",
82
- resolve: "openai/o3"
91
+ resolve: "openai/o3",
92
+ openRouterResolve: "openrouter/openai/o3"
83
93
  }
84
94
  }
85
95
  }),
@@ -679,8 +689,10 @@ Inline comments use the same severity framing as body \`### \` sections, scaled
679
689
  - **Don't repeat diff content**, don't include raw \`+123 / -45\` stats, don't include a changelog section, don't use horizontal rules (\`---\`).
680
690
  - **Pull file/commit counts from \`checkout_pr\` metadata** \u2014 never count manually.
681
691
  - **Legacy headings REMOVED.** Do not use \`### Key changes\`, \`### Issues found\`, \`<b>TL;DR</b>\`, or \`<sub><b>Summary</b>\`. The new structure subsumes them.`;
682
- function computeModes(agentId) {
692
+ function computeModes(agentId, signedCommits = false) {
683
693
  const t = (toolName) => formatMcpToolRef(agentId, toolName);
694
+ const commitStep = signedCommits ? `commit via \`${t("commit_changes")}\` \u2014 it lands a GitHub-signed commit directly on the remote branch (no push step)` : `commit locally via shell (\`git add . && git commit -m "..."\`)`;
695
+ const finalizeStep = signedCommits ? `confirm a clean working tree (\`git status\`) \u2014 your \`${t("commit_changes")}\` calls already landed the work on the remote` : `confirm a clean working tree, then push via \`${t("push_branch")}\``;
684
696
  return [
685
697
  {
686
698
  name: "Build",
@@ -751,10 +763,10 @@ function computeModes(agentId) {
751
763
  - Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
752
764
  - For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
753
765
 
754
- Be **discerning** about what comes back. The reviewer is an AI subagent and is fallible \u2014 treat every finding as a hypothesis, not a directive, and **verify each one yourself** against the diff and the code before deciding whether to apply. You are searching for a solution that is **complete, minimal, and elegant** \u2014 you may need to think hard to find it. Do not over-engineer, do not be over-defensive, **do not write AI slop**. Reviewers bias toward *recommending additions*, and that bias has a recognizable slop texture: defensive checks for cases that cannot happen, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards, error handlers for cases the type system already rules out. Reject those. For each surviving finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three means look harder for a fix that gets all three before settling. After applying the fixes you accept, re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
766
+ Be **discerning** about what comes back. The reviewer is an AI subagent and is fallible \u2014 treat every finding as a hypothesis, not a directive, and **verify each one yourself** against the diff and the code before deciding whether to apply. You are searching for a solution that is **complete, minimal, and elegant** \u2014 you may need to think hard to find it. Do not over-engineer, do not be over-defensive, **do not write AI slop**. Reviewers bias toward *recommending additions*, and that bias has a recognizable slop texture: defensive checks for cases that cannot happen, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards, error handlers for cases the type system already rules out. Reject those. For each surviving finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three means look harder for a fix that gets all three before settling. After applying the fixes you accept, re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Then ${commitStep}.
755
767
 
756
768
  6. **finalize**:
757
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
769
+ - ${finalizeStep} (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
758
770
  - create a PR via \`${t("create_pull_request")}\`
759
771
  - call \`${t("report_progress")}\` with the PR link or the exact error if push/PR failed
760
772
 
@@ -782,12 +794,12 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
782
794
 
783
795
  5. Quality check:
784
796
  - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
785
- - commit locally via shell (\`git add . && git commit -m "..."\`)
797
+ - ${commitStep}
786
798
 
787
799
  6. Finalize. Reply + resolve are paired write actions: do BOTH or NEITHER for each thread.
788
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
789
- - **if push fails**, call \`${t("report_progress")}\` with the exact error and STOP \u2014 do NOT reply or resolve any thread until the fix is live on the remote. Resolving a thread without the fix landing misleads the reviewer.
790
- - **on push success**, for each thread you acted on:
800
+ - ${finalizeStep} (same push/prepush guidance as Build mode in *SYSTEM*)
801
+ - **if the push/commit fails**, call \`${t("report_progress")}\` with the exact error and STOP \u2014 do NOT reply or resolve any thread until the fix is live on the remote. Resolving a thread without the fix landing misleads the reviewer.
802
+ - **once the fix is live on the remote**, for each thread you acted on:
791
803
  - reply ONCE via \`${t("reply_to_review_comment")}\`. The \`comment_id\` parameter takes the root comment's numeric \`id=\` (from the first \`comment author=...\` tag in the \`${t("get_review_comments")}\` output) \u2014 NOT the \`thread=\` value; that's a separate GraphQL ID used by resolve. The runtime dedupes identical bodies within a session.
792
804
  - **immediately** call \`${t("resolve_review_thread")}\` with that thread's \`thread=\` value as \`thread_id\`. Resolve every thread where you (a) made the requested code change in full \u2014 partial fixes leave the thread open \u2014 OR (b) replied with a substantive answer the user explicitly asked for. Do NOT resolve threads where you pushed back on the request and the disagreement is unresolved; leave those open for the human to mediate.
793
805
  - call \`${t("report_progress")}\` with a brief summary`
@@ -1062,10 +1074,10 @@ ${PR_SUMMARY_FORMAT}`
1062
1074
  - fix the issue using your native file and shell tools
1063
1075
  - verify the fix by re-running the exact CI command
1064
1076
  - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
1065
- - commit locally via shell (\`git add . && git commit -m "..."\`)
1077
+ - ${commitStep}
1066
1078
 
1067
1079
  6. Finalize:
1068
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
1080
+ - ${finalizeStep} (same push/prepush guidance as Build mode in *SYSTEM*)
1069
1081
  - call \`${t("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)`
1070
1082
  },
1071
1083
  {
@@ -1081,8 +1093,8 @@ ${PR_SUMMARY_FORMAT}`
1081
1093
  - Call \`${t("git_fetch")}\` to fetch the base branch.
1082
1094
 
1083
1095
  3. **Merge Attempt**:
1084
- - Run \`git merge origin/<base_branch>\` via shell.
1085
- - If it succeeds automatically, confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 4\u20135.**
1096
+ - Run \`git merge ${signedCommits ? "--no-commit " : ""}origin/<base_branch>\` via shell.
1097
+ - If it succeeds automatically, ${signedCommits ? `conclude it via \`${t("commit_changes")}\` (it turns the pending merge into a signed merge commit on the remote)` : `confirm a clean working tree, push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)`}, and call \`${t("report_progress")}\` with a brief success note or the exact error if it failed \u2014 **then stop; do not run steps 4\u20135.**
1086
1098
  - If it fails (conflicts), resolve them manually (continue to steps 4\u20135).
1087
1099
 
1088
1100
  4. **Resolve Conflicts**:
@@ -1092,8 +1104,8 @@ ${PR_SUMMARY_FORMAT}`
1092
1104
 
1093
1105
  5. **Finalize**:
1094
1106
  - Run a final verification (build/test) to ensure the resolution works.
1095
- - \`git add . && git commit -m "resolve merge conflicts"\`
1096
- - confirm a clean working tree, then push via \`${t("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
1107
+ - ${signedCommits ? `\`git add .\`, then conclude via \`${t("commit_changes")}\` with message "resolve merge conflicts"` : `\`git add . && git commit -m "resolve merge conflicts"\``}
1108
+ - ${finalizeStep} (same push/prepush guidance as Build mode in *SYSTEM*)
1097
1109
  - Call \`${t("report_progress")}\` with a summary of what was resolved (or the exact push error if push failed)`
1098
1110
  },
1099
1111
  {
@@ -1112,7 +1124,7 @@ ${PR_SUMMARY_FORMAT}`
1112
1124
  - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
1113
1125
 
1114
1126
  4. Finalize:
1115
- - if code changes were made, push to a pull request (new or existing) using \`${t("push_branch")}\` and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
1127
+ - if code changes were made, get them onto a pull request (new or existing) using ${signedCommits ? `\`${t("commit_changes")}\`` : `\`${t("push_branch")}\``} and \`${t("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if this fails).
1116
1128
  - call \`${t("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
1117
1129
  - if the task involved labeling, commenting, or other GitHub operations, perform those directly`
1118
1130
  }
@@ -1123,15 +1135,6 @@ var modes = computeModes("opencode");
1123
1135
  // utils/buildPullfrogFooter.ts
1124
1136
  var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
1125
1137
  var FROG_LOGO = `<a href="https://pullfrog.com"><picture><source media="(prefers-color-scheme: dark)" srcset="https://pullfrog.com/logos/frog-white-full-18px.png"><img src="https://pullfrog.com/logos/frog-green-full-18px.png" width="9px" height="9px" style="vertical-align: middle; " alt="Pullfrog"></picture></a>`;
1126
- function providerDisplayName(slug) {
1127
- try {
1128
- const key = getModelProvider(slug);
1129
- const meta = providers[key];
1130
- return meta?.displayName ?? key;
1131
- } catch {
1132
- return slug;
1133
- }
1134
- }
1135
1138
  function formatModelLabel(params) {
1136
1139
  const alias = resolveDisplayAlias(params.model) ?? // reverse-lookup: when the caller passes an effective model (proxy or
1137
1140
  // resolved target like "openrouter/anthropic/claude-opus-4.7") instead of
@@ -1142,9 +1145,7 @@ function formatModelLabel(params) {
1142
1145
  if (params.oss) {
1143
1146
  return `\`${displayName}\` (free via [Pullfrog for OSS](https://pullfrog.com/for-oss))`;
1144
1147
  }
1145
- const base = alias?.isFree ? `\`${displayName}\` (free)` : `\`${displayName}\``;
1146
- if (!params.fallbackFrom) return base;
1147
- return `${base} (credentials for ${providerDisplayName(params.fallbackFrom)} not configured)`;
1148
+ return alias?.isFree ? `\`${displayName}\` (free)` : `\`${displayName}\``;
1148
1149
  }
1149
1150
  function buildPullfrogFooter(params) {
1150
1151
  const parts = [];
@@ -1162,9 +1163,7 @@ function buildPullfrogFooter(params) {
1162
1163
  parts.push("via [Pullfrog](https://pullfrog.com)");
1163
1164
  }
1164
1165
  if (params.model) {
1165
- parts.push(
1166
- `Using ${formatModelLabel({ model: params.model, fallbackFrom: params.fallbackFrom, oss: params.oss })}`
1167
- );
1166
+ parts.push(`Using ${formatModelLabel({ model: params.model, oss: params.oss })}`);
1168
1167
  }
1169
1168
  const allParts = [...parts, "[\u{1D54F}](https://x.com/pullfrogai)"];
1170
1169
  return `
package/dist/mcp/git.d.ts CHANGED
@@ -15,6 +15,13 @@ export declare function PushBranchTool(ctx: ToolContext): import("fastmcp").Tool
15
15
  branchName?: string;
16
16
  force?: boolean;
17
17
  }>>;
18
+ export declare function CommitChangesTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
19
+ message: string;
20
+ files?: string[];
21
+ }, {
22
+ message: string;
23
+ files?: string[];
24
+ }>>;
18
25
  export declare const AUTH_REQUIRED_REDIRECT: Record<string, string>;
19
26
  export declare const NOSHELL_BLOCKED_SUBCOMMANDS: Record<string, string>;
20
27
  export declare const NOSHELL_BLOCKED_ARGS: string[];
@@ -18,6 +18,7 @@ export interface ToolContext {
18
18
  postCheckoutScript: string | null;
19
19
  prepushScript: string | null;
20
20
  prApproveEnabled: boolean;
21
+ signedCommits: boolean;
21
22
  modeInstructions: Record<string, string>;
22
23
  toolState: ToolState;
23
24
  runId: number | undefined;
package/dist/modes.d.ts CHANGED
@@ -5,7 +5,7 @@ export interface Mode {
5
5
  prompt?: string | undefined;
6
6
  }
7
7
  export declare const PR_SUMMARY_FORMAT = "### Default format\n\nThe body has at most three parts in this exact order:\n\n1. **Reviewed changes preamble** \u2014 one bolded inline lead-in describing what was reviewed in this run, a bullet list of the substantive changes, and an HTML comment carrying review metadata for downstream agents.\n2. **Cross-cutting issue sections** (zero or more) \u2014 one `### ` heading per concern, with a human-readable problem write-up and a collapsed `<details>Technical details</details>` block underneath.\n3. **`### \u2139\uFE0F Nitpicks`** at the very bottom (only if there are nits worth surfacing in the body) \u2014 a flat bullet list, no technical-details block.\n\nInline-vs-body split: concerns that anchor to a specific line go inline (use the `comments` parameter). Body `### ` sections are reserved for concerns that **have no line to anchor to** \u2014 typically because the concern is about *absence* (something the diff should have done but didn't), *sequencing* (rollout / deletion / migration order), *design decisions only the human can make*, or *scope questions the diff implicitly raises but doesn't address*. A concern that anchors to a line but has broad implications still goes inline (use the technical-details block there to capture the implications \u2014 see Inline technical details below). If you found no non-anchorable concerns, the body has zero `### ` issue sections \u2014 just the preamble + metadata.\n\n## 1. Reviewed changes preamble\n\nOpen with a single bolded inline lead-in followed immediately by the bullet list (no `### Key changes` heading, no `<b>TL;DR</b>`):\n\n```\n**Reviewed changes** \u2014 one sentence on what was reviewed in this run. For Review (initial), this is what the PR does and why. For IncrementalReview, this is what changed since the prior pullfrog review. Focus on intent, not mechanics.\n\n- **Short human-readable title** \u2014 1 sentence per substantive change. Write a short prose phrase; when you name a file, type, or function, put that name in backticks (e.g. **Add \\`TodoTracker\\` for live checklists**). A reviewer should understand the full reviewed scope from this list alone \u2014 this IS the dispassionate \"what was reviewed and what changed\" overview, so cover the substantive changes, not just the loudest ones.\n\n<!--\nPullfrog review metadata \u2014 for any agent (or human-with-agent) reading this\nreview. Incorporate the fields below into your understanding of the context\nthis review was made in. The findings below were written against\n{head_sha_short}; if new commits have landed on {head_ref} since this review\nwas submitted, treat any specific bug, file, or line callout as POTENTIALLY\nSTALE \u2014 re-diff against {head_sha_short} (or trigger a fresh review) and\nfactor commits past {head_sha_short} into your understanding of the current\nstate before acting on findings.\n\n- Mode: Review (initial) or IncrementalReview (delta against prior pullfrog review)\n- Files reviewed: {file_count}\n- Commits reviewed: {commit_count}\n- Base: {base_ref} ({base_sha_short})\n- Head: {head_ref} ({head_sha_short})\n- Reviewed commits:\n - {sha_short} \u2014 {commit_subject}\n - ...\n- Prior pullfrog review: none or {prior_sha_short} ({prior_review_html_url})\n- Submitted at: {iso_timestamp}\n-->\n```\n\nPull every metadata field from the `checkout_pr` tool's response \u2014 file count, commit count, base/head ref + SHA, the commit list. For `IncrementalReview` runs, populate `Prior pullfrog review` with the prior review's commit_id (short SHA) and `html_url` from `list_pull_request_reviews`.\n\n## 2. Cross-cutting issue sections (zero or more)\n\nFor each cross-cutting concern, one `### ` section. Use this exact shape:\n\n```\n### {emoji} {short, descriptive title \u2014 what's wrong, not what to do}\n\n{Human-readable problem write-up. Describes the PROBLEM only \u2014 what's broken, what the symptom is, what the blast radius is. NO asks, NO suggested fixes, NO \"the right thing to do is...\". Asks and fixes live in the technical-details block below; the visible part is for the human to *understand* the problem, not to implement it.}\n\n<details><summary>Technical details</summary>\n\n\\`\\`\\`\\`markdown\n# {title repeated}\n\n## Affected sites\n- {file path:line} \u2014 {what's wrong there}\n- ...\n\n## Required outcome\n- {what the fix needs to achieve, not how to achieve it}\n- ...\n\n## Suggested approach (optional)\n{When the fix shape is non-obvious, sketch one or more reasonable directions. Skip when the outcome alone makes the fix obvious.}\n\n## Open questions for the human (optional)\n- {Any decision an implementing agent shouldn't make unilaterally \u2014 pricing thresholds, breaking-change policy, naming, scope of follow-up.}\n\\`\\`\\`\\`\n\n</details>\n```\n\nConcrete example of the visible part of a non-anchored section (technical-details block unchanged from the template above):\n\n```\n### \u2139\uFE0F Legacy `opencode.ts` has no documented deletion plan\n\nThe v2 harness lands alongside the v1 file and imports one helper from it. Worth a follow-up issue or a TODO so the next maintainer doesn't have to re-derive the cleanup plan.\n```\n\nThe example's value is its *shape*: a finding about absence (no deletion plan), not a line-anchored bug. Body sections live or die on whether the concern genuinely doesn't fit on a line.\n\n**Heading severity emoji** \u2014 every `### ` heading carries one:\n\n- \uD83D\uDEA8 critical \u2014 blocks merge (data loss, security, broken core flow)\n- \u26A0\uFE0F important \u2014 must address before merging (regression, missing validation, incorrect behavior)\n- \u2139\uFE0F informational \u2014 surfaced for awareness; mergeable as-is\n\n**Visible problem write-up rules:**\n\n- **No asks, no suggested fixes** in the visible part. The visible portion describes the problem; the technical-details block describes the fix shape and any open questions. The exception: a fix so self-evident that NOT stating it would be weird (e.g. \"the typo is missing an 'r'\") \u2014 in that case, fold it into the problem statement and skip the suggested-approach block in technical details too.\n- **Never two successive plain paragraphs.** Every transition between block-level elements must alternate prose with structure: paragraph \u2192 bullet list \u2192 paragraph; paragraph \u2192 code fence \u2192 bullet list; paragraph \u2192 table \u2192 paragraph. Two consecutive paragraphs in a row create a wall of text that's impossible to digest. If you catch yourself writing one, find a way to split it: pull a list out of it, drop a 2-3 line code fence between them, or merge them into a single tighter paragraph.\n- **Per-paragraph budget:** ~3 sentences max. Past that, you're explaining where you should be structuring.\n- **Identifier discipline still applies** in the visible part. Lead with behavior in plain English; name an identifier only when it's the subject of the concern or a public surface a reader would recognize. The technical-details block is where dense identifier references belong.\n\n**Technical-details block rules:**\n\n- Wrapped in a 4-backtick markdown fence (`\\`\\`\\`\\`markdown ... \\`\\`\\`\\``) so it's visually distinct, one-click copyable, and can contain its own 3-backtick code fences without escape gymnastics. The contents are agent-readable \u2014 a fix-agent will pull the body down and use this block as the brief.\n- File paths and `file:line` refs are encouraged (and necessary) \u2014 the next agent uses these to navigate. Identifier density is fine here.\n- Slightly more verbose than the absolute minimum is OK when it materially helps the next agent: a small code snippet showing the symptom, a short table of mismatched key/column pairs, a one-paragraph \"why CI doesn't catch it\" note. Skip massive regression-test scaffolding or full route rewrites \u2014 the implementing agent writes those.\n- Use the four standard sections (`Affected sites`, `Required outcome`, optional `Suggested approach`, optional `Open questions for the human`). Skip the optional sections when they wouldn't add anything.\n\n## Inline technical details\n\nInline comments are short (~2-3 sentences) by default. When an inline finding has broader implications worth recording for a fix-agent \u2014 e.g. a localized bug whose proper fix requires touching several files, or where the right fix depends on a design decision the human needs to make \u2014 append a collapsed `<details><summary>Technical details</summary>` block to the inline comment's body. Same shape as the body-section technical-details block (4-backtick fenced markdown, `## Affected sites` / `## Required outcome` / optional `## Suggested approach` / optional `## Open questions for the human`).\n\nGitHub renders the same markdown parser in inline comments as in the review body, so the collapsed-details affordance works the same way. The visible part of the inline comment stays scannable; the depth is one click away for any agent that needs it.\n\n## 3. `### \u2139\uFE0F Nitpicks` (optional, last section)\n\nOnly when there are nits that for some reason can't be inlined. Filepaths in nit text are fine \u2014 these are simple enough that a human or agent reads once and acts. No technical-details block.\n\n```\n### \u2139\uFE0F Nitpicks\n\n- {nit, with file path inline if useful, \u2264 ~200 chars}\n- ...\n```\n\n## Inline comment shape\n\nInline comments use the same severity framing as body `### ` sections, scaled down for line-anchored use:\n\n- **Lead with a 1-2 sentence problem statement.** The reader is looking at the line in question, so don't restate what the line says \u2014 describe what's wrong with it. Optionally prefix the visible line with a severity emoji (\uD83D\uDEA8 / \u26A0\uFE0F / \u2139\uFE0F) when severity isn't obvious from context.\n- **Optional `<details><summary>Technical details</summary>...</details>` collapsible** for findings whose technical context (longer file:line references, related-code snippets, suggested approach, regression-risk notes) would overwhelm the human-readable lead-in. Same agent-readable purpose, same 4-backtick fence shape, and same 4-section structure as the body's technical-details block \u2014 see *Inline technical details* above. Encouraged whenever the depth helps a downstream fix-agent; don't force one when the inline lead-in already says everything.\n- **Visible portion \u2264 2-3 sentences.** If you find yourself writing more, that's the cue to split the depth into the `Technical details` collapsible.\n\n## Body-wide rules\n\n- **Inline-vs-body discipline (repeated for emphasis):** anything that anchors to a specific line goes inline (with a `<details>Technical details</details>` block when the implications are broad). The body is for non-anchorable concerns only \u2014 absence, sequencing, design decisions, scope questions, architectural risk.\n- **No `### Issues found` heading** above the issue sections \u2014 each `### ` heading IS the issue.\n- **Severity emoji on every `### ` heading** (\uD83D\uDEA8 / \u26A0\uFE0F / \u2139\uFE0F). No emoji on the preamble lead-in or anywhere else.\n- **GitHub block-level rendering**: GitHub's markdown parser requires a blank line between ALL block-level elements (HTML tags like `<br/>`, `<sub>`, `<details>`, `<b>` and markdown syntax like headings, lists, blockquotes, code fences, paragraphs). Without a blank line, GitHub treats following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n- **Backtick-wrap** every variable, identifier, or file name when you mention one (in either visible or technical-details portions).\n- **Don't repeat diff content**, don't include raw `+123 / -45` stats, don't include a changelog section, don't use horizontal rules (`---`).\n- **Pull file/commit counts from `checkout_pr` metadata** \u2014 never count manually.\n- **Legacy headings REMOVED.** Do not use `### Key changes`, `### Issues found`, `<b>TL;DR</b>`, or `<sub><b>Summary</b>`. The new structure subsumes them.";
8
- export declare function computeModes(agentId: AgentId): Mode[];
8
+ export declare function computeModes(agentId: AgentId, signedCommits?: boolean): Mode[];
9
9
  export declare const modes: Mode[];
10
10
  /**
11
11
  * modes that legitimately never modify the working tree. used by the post-run
@@ -105,9 +105,6 @@ export interface ToolState {
105
105
  output?: string | undefined;
106
106
  usageEntries: AgentUsage[];
107
107
  model?: string | undefined;
108
- modelFallback?: {
109
- from: string;
110
- } | undefined;
111
108
  oss?: boolean | undefined;
112
109
  todoTracker?: TodoTracker | undefined;
113
110
  diffCoverage?: DiffCoverageState | undefined;
@@ -0,0 +1,40 @@
1
+ /** one working-tree change to include in an API commit. */
2
+ export type ChangedFile = {
3
+ path: string;
4
+ deleted: boolean;
5
+ };
6
+ /**
7
+ * all working-tree changes vs HEAD: tracked changes (committed-to-index,
8
+ * staged, and unstaged all collapse into `git diff HEAD`) plus untracked
9
+ * files from `git status`. respects .gitignore. throws on unresolved
10
+ * conflicts — the caller's guidance is to resolve and `git add` first.
11
+ */
12
+ export declare function detectWorkingTreeChanges(): ChangedFile[];
13
+ /**
14
+ * refuse content the API path cannot faithfully commit: git-lfs files (the
15
+ * pointer would be committed without the lfs object upload that the git
16
+ * pre-push hook performs) and directories (nested repositories / submodule
17
+ * pointers). other clean filters are fine — blob content goes through
18
+ * `git hash-object`, which applies them exactly like a local commit.
19
+ */
20
+ export declare function assertApiCommittable(files: ChangedFile[]): Promise<void>;
21
+ /**
22
+ * create one GitHub-signed commit on `remoteBranch` containing `files` read
23
+ * from the working tree, parented on `parents` (first parent supplies the
24
+ * base tree; a second parent — MERGE_HEAD — makes it a true merge commit so
25
+ * base-branch integration doesn't pollute the PR diff). empty `files` with
26
+ * two parents concludes a merge that resolved to the first parent's tree.
27
+ * creates the remote branch at the new commit when it doesn't exist yet.
28
+ */
29
+ export declare function createSignedCommit(params: {
30
+ token: string;
31
+ owner: string;
32
+ repo: string;
33
+ remoteBranch: string;
34
+ message: string;
35
+ parents: string[];
36
+ files: ChangedFile[];
37
+ }): Promise<{
38
+ sha: string;
39
+ createdBranch: boolean;
40
+ }>;
@@ -27,8 +27,23 @@ export declare function validateAgentApiKey(params: {
27
27
  * {"type":"error","error":{"type":"authentication_error", ...
28
28
  * "Invalid bearer token"}}`) emitted by the Claude CLI for revoked /
29
29
  * mistyped / rotated `ANTHROPIC_API_KEY`. see #782.
30
+ * - expired credentials (#931): Bedrock 403 `Failed to authenticate. API
31
+ * Error: 403 {"Message":"*** has expired"}` (short-lived bearer tokens),
32
+ * OpenAI OAuth "Your authentication token has expired", and Codex
33
+ * "Token refresh failed: 401". the Bedrock pattern is anchored to the
34
+ * Claude CLI emission ("Failed to authenticate. API Error:") so generic
35
+ * auth chatter in agent stderr can't misclassify a hang as a key error.
30
36
  */
31
37
  export declare function isApiKeyAuthError(text: string): boolean;
38
+ /**
39
+ * Expired OAuth-connection credential shapes (#931) — the fix is to
40
+ * re-authenticate the provider connection (`pullfrog auth <provider>`), not
41
+ * to rotate a repo-secret API key, so `formatApiKeyErrorSummary` renders
42
+ * distinct copy for these. Patterns are deliberately narrow:
43
+ * "authentication token has expired" (not bare "token has expired") so a
44
+ * GitHub installation-token expiry can't be misread as an LLM key problem.
45
+ */
46
+ export declare function isOAuthCredentialExpiredError(text: string): boolean;
32
47
  /**
33
48
  * Friendly Markdown summary for both the missing-key and invalid-key cases.
34
49
  * Used in the catch / result-failure paths in `main.ts` to overwrite the raw
@@ -17,13 +17,6 @@ export interface BuildPullfrogFooterParams {
17
17
  customParts?: string[] | undefined;
18
18
  /** model slug from payload (e.g., "anthropic/claude-opus"). shown in footer as "Using `Model Name`" */
19
19
  model?: string | undefined;
20
- /**
21
- * When the action engaged the BYOK fallback, this is the slug the user
22
- * had configured (e.g. "anthropic/claude-opus") — the footer renders
23
- * `Using <free model> (credentials for <configured> not configured)`
24
- * so the substitution is visible in PR comments + reviews.
25
- */
26
- fallbackFrom?: string | undefined;
27
20
  /**
28
21
  * true when the run's model costs are covered by the Pullfrog for OSS
29
22
  * program — the footer renders `Using <model> (free via Pullfrog for OSS)`
@@ -0,0 +1,30 @@
1
+ export type SubscriptionPreflight = {
2
+ usable: true;
3
+ } | {
4
+ usable: false;
5
+ reason: string;
6
+ };
7
+ /**
8
+ * preflight a Claude subscription OAuth token (`CLAUDE_CODE_OAUTH_TOKEN`)
9
+ * with a 1-token Messages call, so the agent can fall back to
10
+ * `ANTHROPIC_API_KEY` when the subscription is exhausted or revoked instead
11
+ * of failing the whole run at its first model call. rides the same de-facto
12
+ * OAuth surface Claude Code itself uses: Bearer auth + the
13
+ * `claude-code-20250219,oauth-2025-04-20` betas + the identity system prompt.
14
+ *
15
+ * probes the run's own model when known — subscription limits can be
16
+ * per-model ("You've hit your Opus limit"), so a cheaper stand-in could pass
17
+ * preflight and still leave the run dead on arrival.
18
+ *
19
+ * fail-open by design: only 401 (revoked/expired token) and 429
20
+ * (session/weekly/per-model limit) mark the token unusable. network errors,
21
+ * 5xx, and request-shape drift (400) all keep today's subscription-first
22
+ * behavior, so the preflight can never fail a run that would have worked —
23
+ * the worst wrong answer is a run that bills the API key instead of the
24
+ * subscription.
25
+ */
26
+ export declare function preflightClaudeSubscription(params: {
27
+ token: string;
28
+ /** bare Anthropic model id the run will use (e.g. "claude-fable-5") */
29
+ model: string | undefined;
30
+ }): Promise<SubscriptionPreflight>;
@@ -1,5 +1,10 @@
1
1
  import { throttling } from "@octokit/plugin-throttling";
2
2
  import { Octokit } from "@octokit/rest";
3
+ /** GitHub Actions OIDC request credentials, stashed before env wipes */
4
+ export interface OidcCredentials {
5
+ requestUrl: string;
6
+ requestToken: string;
7
+ }
3
8
  export interface InstallationToken {
4
9
  token: string;
5
10
  expires_at: string;
@@ -37,7 +42,21 @@ type GitHubAppPermissions = {
37
42
  type AcquireTokenOptions = {
38
43
  repos?: string[];
39
44
  permissions?: GitHubAppPermissions;
45
+ /**
46
+ * stashed OIDC credentials for minting after restricted mode deletes
47
+ * ACTIONS_ID_TOKEN_REQUEST_* from process.env (mid-run token refresh)
48
+ */
49
+ oidc?: OidcCredentials | undefined;
40
50
  };
51
+ /**
52
+ * mint a GitHub Actions OIDC ID token from stashed credentials without
53
+ * touching process.env — `core.getIDToken` reads the env vars directly,
54
+ * which restricted mode has already deleted by the time a refresh runs.
55
+ * throws TokenExchangeError on HTTP errors and a "timed out" Error on
56
+ * timeout so `acquireNewToken`'s retry predicate treats 5xx/429/timeouts
57
+ * as transient.
58
+ */
59
+ export declare function fetchIdTokenFromStash(creds: OidcCredentials): Promise<string>;
41
60
  /**
42
61
  * ensure a GitHub token is available in the environment.
43
62
  *
@@ -51,6 +70,13 @@ type AcquireTokenOptions = {
51
70
  * main() directly and never calls this.
52
71
  */
53
72
  export declare function ensureGitHubToken(): Promise<void>;
73
+ /**
74
+ * retry predicate shared by token mints: 4xx is terminal user state (app not
75
+ * installed, permissions wrong) — retrying just triples our log noise and the
76
+ * user's CI bill (see #693). 5xx/429 and network failures are transient
77
+ * (vercel cold start, github outage, rate limit) and should ride the backoff.
78
+ */
79
+ export declare function isTransientTokenError(error: unknown): boolean;
54
80
  export declare function acquireNewToken(opts?: AcquireTokenOptions): Promise<string>;
55
81
  export interface RepoContext {
56
82
  owner: string;
@@ -74,5 +100,5 @@ export interface UsageSummary {
74
100
  };
75
101
  }
76
102
  export declare function writeGitHubUsageSummaryToFile(path: string): Promise<void>;
77
- export declare function createOctokit(token: string): OctokitWithPlugins;
103
+ export declare function createOctokit(token: string, refreshAuth?: (stale: string) => Promise<string>): OctokitWithPlugins;
78
104
  export {};
@@ -9,6 +9,9 @@ interface InstructionsContext {
9
9
  modes: Mode[];
10
10
  agentId: AgentId;
11
11
  outputSchema?: Record<string, unknown> | undefined;
12
+ /** commits are created via the GitHub API (commit_changes tool) so GitHub
13
+ * signs them — flips the Git instructions to the signed-commits flow. */
14
+ signedCommits: boolean;
12
15
  /** absolute path to the seeded learnings tmpfile, or null when the file
13
16
  * couldn't be seeded for some reason. main.ts always seeds, so in
14
17
  * practice this is always set; the null case keeps the type honest. */
@@ -6,6 +6,6 @@ export declare function captureBaselineModels(cliPath: string): void;
6
6
  * `» BYOK auth enabled N model(s): …`. */
7
7
  export declare function captureAuthorizedModels(cliPath: string): void;
8
8
  /** Authorized set captured after Pullfrog-stored auth is applied. Throws if
9
- * called before `captureAuthorizedModels` — the call sites (fallback gate,
10
- * api-key validation, auto-select) all run strictly after capture. */
9
+ * called before `captureAuthorizedModels` — the call sites (api-key
10
+ * validation, auto-select) all run strictly after capture. */
11
11
  export declare function getAuthorizedModels(): Set<string>;
@@ -4,7 +4,8 @@
4
4
  * billing accounts) or OSS-grant paths.
5
5
  *
6
6
  * Authenticates one of two ways:
7
- * - production: GitHub Actions OIDC token via `core.getIDToken`
7
+ * - production: GitHub Actions OIDC token minted from the stashed
8
+ * credentials via `fetchIdTokenFromStash` (env-free)
8
9
  * - local dev (`API_URL` is localhost): `x-dev-repo` header bypass
9
10
  *
10
11
  * `runProxyResolution` is the entrypoint `main.ts` calls. It wraps
@@ -17,11 +18,8 @@
17
18
  * - 503 → `TransientError` (transient sync issue — retry next dispatch)
18
19
  */
19
20
  import type { ToolState } from "../toolState.ts";
21
+ import { type OidcCredentials } from "./github.ts";
20
22
  import type { ResolvedPayload } from "./payload.ts";
21
- export interface OidcCredentials {
22
- requestUrl: string;
23
- requestToken: string;
24
- }
25
23
  /**
26
24
  * Run `resolveProxyModel`; if it throws a Billing or Transient error, render
27
25
  * the user-facing summary, mirror it to the PR progress comment, and rethrow.
@@ -31,6 +31,7 @@ export interface RepoSettings {
31
31
  push: PushPermission;
32
32
  shell: ShellPermission;
33
33
  prApproveEnabled: boolean;
34
+ signedCommits: boolean;
34
35
  modeInstructions: Record<string, string>;
35
36
  learnings: string | null;
36
37
  learningsHeadings: LearningsHeading[];
@@ -24,8 +24,8 @@
24
24
  * the underlying provider error often lands); `formatApiKeyErrorSummary`
25
25
  * renders provider + console-link copy.
26
26
  *
27
- * 4. ProviderModelNotFoundError — stale free-fallback model id no longer
28
- * in the OpenCode catalog; renders a nudge to add a BYOK key.
27
+ * 4. ProviderModelNotFoundError — configured model id no longer in the
28
+ * OpenCode catalog; renders a nudge to pick a different model.
29
29
  *
30
30
  * 5. Activity-timeout hang — `errorMessage` starts with
31
31
  * `"activity timeout"` or `"agent still pending"` AND none of the
@@ -1,7 +1,12 @@
1
1
  import type { PushPermission } from "../external.ts";
2
- import { acquireNewToken } from "./github.ts";
2
+ import { acquireNewToken, type OidcCredentials } from "./github.ts";
3
3
  export { acquireNewToken as acquireInstallationToken };
4
4
  export { revokeGitHubInstallationToken as revokeInstallationToken };
5
+ /**
6
+ * get the refresh function for the MCP token, if re-acquisition is possible.
7
+ * pass to `createOctokit` so a mid-run 401 triggers a refresh + retry (#891).
8
+ */
9
+ export declare function getMcpTokenRefresh(): ((stale: string) => Promise<string>) | undefined;
5
10
  /**
6
11
  * get the job-scoped token from action input.
7
12
  * this token has permissions defined by the workflow's permissions block.
@@ -19,6 +24,12 @@ export type TokenRef = {
19
24
  };
20
25
  type ResolveTokensParams = {
21
26
  push: PushPermission;
27
+ /**
28
+ * OIDC credentials stashed by main.ts before the restricted-mode env wipe —
29
+ * the mid-run MCP token refresh mints from this snapshot (#891). null when
30
+ * OIDC isn't available (local dev, external token).
31
+ */
32
+ oidc: OidcCredentials | null;
22
33
  };
23
34
  /**
24
35
  * resolve tokens for the action run.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pullfrog",
3
- "version": "0.1.29",
3
+ "version": "0.1.30",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "pullfrog": "dist/cli.mjs",
@@ -1,44 +0,0 @@
1
- import type { AgentId } from "../external.ts";
2
- /**
3
- * Slug we fall back to when a BYOK-required model is configured but the
4
- * runner has no provider key in env. Picked because it's free, stable, and
5
- * currently served by OpenCode Zen without a key.
6
- *
7
- * The slug is intentionally hard-coded and not a config knob — the
8
- * fallback is a safety net, not a user-facing preference, and adding a
9
- * config surface here would just push the same "what to fall back to"
10
- * decision into another setting that goes stale the same way.
11
- */
12
- export declare const FREE_FALLBACK_SLUG = "opencode/big-pickle";
13
- export type FallbackDecision = {
14
- fallback: false;
15
- } | {
16
- fallback: true;
17
- from: string;
18
- to: string;
19
- };
20
- /**
21
- * If the resolved model is NOT in OpenCode's `authorized` set (the
22
- * authoritative "what can OpenCode route right now" snapshot captured
23
- * after dbSecrets + Codex auth.json are in place), swap to a free
24
- * OpenCode slug so the run can still produce value. Caller is responsible
25
- * for surfacing the swap (log line + run summary).
26
- *
27
- * Skip cases (return `fallback: false` without consulting `authorized`):
28
- * - Router / proxy runs (`proxyModel` set): Pullfrog mints the key.
29
- * - No resolved model: auto-select handles it downstream.
30
- * - Resolved model is the free fallback already.
31
- * - Resolved model is a raw Bedrock / Vertex ID (no `/`): the routing
32
- * validators (`validateBedrockSetup` / `validateVertexSetup`) cover
33
- * auth + region/location/model-id; `opencode models` does not.
34
- * - The selected agent is `claude`: the Claude Code harness brings its own
35
- * auth and `resolveAgent` only returns it when that auth is present.
36
- * `opencode models` can't see `CLAUDE_CODE_OAUTH_TOKEN`, so without this
37
- * an OAuth-subscription run on an Anthropic model would wrongly fall back.
38
- */
39
- export declare function selectFallbackModelIfNeeded(input: {
40
- resolvedModel: string | undefined;
41
- proxyModel: string | undefined;
42
- authorized: Set<string>;
43
- agentName: AgentId;
44
- }): FallbackDecision;