npm - pullfrog - Versions diffs - 0.1.8 → 0.1.10 - Mend

pullfrog 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/agents/opencodeShared.d.ts +40 -0
package/dist/agents/postRun.d.ts +11 -3
package/dist/agents/shared.d.ts +7 -0
package/dist/cli.mjs +4275 -3256
package/dist/external.d.ts +1 -1
package/dist/index.js +1706 -1219
package/dist/internal/index.d.ts +2 -1
package/dist/internal.js +245 -85
package/dist/models.d.ts +10 -0
package/dist/modes.d.ts +1 -1
package/dist/toolState.d.ts +4 -0
package/dist/utils/activity.d.ts +31 -1
package/dist/utils/apiKeys.d.ts +5 -1
package/dist/utils/billingErrors.d.ts +85 -0
package/dist/utils/buildPullfrogFooter.d.ts +7 -0
package/dist/utils/byokFallback.d.ts +50 -0
package/dist/utils/codexHome.d.ts +23 -0
package/dist/utils/errorReport.d.ts +9 -0
package/dist/utils/learnings.d.ts +20 -0
package/dist/utils/learningsTruncate.d.ts +25 -0
package/dist/utils/lifecycle.d.ts +23 -3
package/dist/utils/overrides.d.ts +40 -0
package/dist/utils/payload.d.ts +7 -0
package/dist/utils/prSummary.d.ts +21 -0
package/dist/utils/proxy.d.ts +47 -0
package/dist/utils/runContext.d.ts +0 -9
package/dist/utils/runErrorRenderer.d.ts +41 -0
package/dist/utils/runLifecycle.d.ts +75 -0
package/dist/utils/runStartupLog.d.ts +15 -0
package/dist/utils/subprocess.d.ts +1 -0
package/package.json +3 -2
/package/dist/agents/{opencode.d.ts → opencode_v2.d.ts} +0 -0

package/dist/models.d.ts CHANGED Viewed

@@ -68,6 +68,11 @@ interface ModelDef {
 export interface ProviderConfig {
     displayName: string;
     envVars: readonly string[];
+    /** credentials authored only via `pullfrog auth <provider>` — never
+     * user-facing in `init`, never documented as a manual GHA secret. counted
+     * for hasAnyKey / log-redaction purposes but excluded from any prompt /
+     * paste flow. CLI-managed magic. see wiki/codex-auth.md. */
+    managedCredentials?: readonly string[];
     models: Record<string, ModelDef>;
 }
 export declare const providers: {
@@ -89,6 +94,11 @@ export declare function parseModel(slug: string): {
 export declare function getModelProvider(slug: string): string;
 export declare function getProviderDisplayName(slug: string): string | undefined;
 export declare function getModelEnvVars(slug: string): string[];
+/** managed credentials are authored only via `pullfrog auth <provider>` — they
+ * count as "configured" for hasAnyKey-style UI checks but are never offered as
+ * a manual-paste option in `init` or the AgentSettings env-var button row.
+ * see `provider.managedCredentials` and wiki/codex-auth.md. */
+export declare function getModelManagedCredentials(slug: string): string[];
 export declare const modelAliases: ModelAlias[];
 /** resolve a model slug to its concrete models.dev specifier (e.g. "anthropic/claude-opus-4-6") */
 export declare function resolveModelSlug(slug: string): string | undefined;

package/dist/modes.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ export interface Mode {
     description: string;
     prompt?: string | undefined;
 }
-export declare const PR_SUMMARY_FORMAT = "### Default format\n\nFollow this structure exactly:\n\n<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.\nNOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.\n\n### Key changes\n\n- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add `TodoTracker` for live checklists**). A reviewer should understand the full PR from this list alone.\n\n<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: `{base}` \u2190 `{head}`</sub>\nNOTE: the metadata line goes AFTER the bullet list, not before it.\n\nThen for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).\n\n<br/>\n\n## Example readable section title\n\n> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]\nIMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate `>` lines creates a double line break.\n\n1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.\n\nIf a change warrants deeper explanation, use a blockquoted details/summary framed as a question:\n> <details><summary>How does X work?</summary>\n> Extended explanation here.\n> </details>\n\nEnd each section with a file links trail (3-4 key files max):\n[`file.ts`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \u00B7 ...\n\nSingle-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.\n\nCRITICAL \u2014 GitHub markdown rendering rule:\nGitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n\nRules:\n- `##` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title\n- ALL variable names, identifiers, and file names in body text must be in backticks\n- ALL file references MUST link to the PR Files Changed view. Use the `diff-<hex>` anchor precomputed next to each filename in the `checkout_pr` TOC \u2014 do NOT run `sha256sum` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the `#diff-` anchor rather than guessing.\n- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)\n- Do NOT include raw diff stats like '+123 / -45' or line counts\n- Do NOT include code blocks or repeat diff contents\n- Do NOT include a changelog section \u2014 the key changes list serves this purpose\n- Focus on *intent*, not *what* \u2014 the diff already shows what changed\n- Get the file count and commit count from the checkout_pr metadata, not by counting manually";
+export declare const PR_SUMMARY_FORMAT = "### Default format\n\nThe body has at most three parts in this exact order:\n\n1. **Reviewed changes preamble** \u2014 one bolded inline lead-in describing what was reviewed in this run, a bullet list of the substantive changes, and an HTML comment carrying review metadata for downstream agents.\n2. **Cross-cutting issue sections** (zero or more) \u2014 one `### ` heading per concern, with a human-readable problem write-up and a collapsed `<details>Technical details</details>` block underneath.\n3. **`### \u2139\uFE0F Nitpicks`** at the very bottom (only if there are nits worth surfacing in the body) \u2014 a flat bullet list, no technical-details block.\n\nInline-vs-body split: concerns that anchor to a specific line go inline (use the `comments` parameter). Body `### ` sections are reserved for concerns that **have no line to anchor to** \u2014 typically because the concern is about *absence* (something the diff should have done but didn't), *sequencing* (rollout / deletion / migration order), *design decisions only the human can make*, or *scope questions the diff implicitly raises but doesn't address*. A concern that anchors to a line but has broad implications still goes inline (use the technical-details block there to capture the implications \u2014 see Inline technical details below). If you found no non-anchorable concerns, the body has zero `### ` issue sections \u2014 just the preamble + metadata.\n\n## 1. Reviewed changes preamble\n\nOpen with a single bolded inline lead-in followed immediately by the bullet list (no `### Key changes` heading, no `<b>TL;DR</b>`):\n\n```\n**Reviewed changes** \u2014 one sentence on what was reviewed in this run. For Review (initial), this is what the PR does and why. For IncrementalReview, this is what changed since the prior pullfrog review. Focus on intent, not mechanics.\n\n- **Short human-readable title** \u2014 1 sentence per substantive change. Write a short prose phrase; when you name a file, type, or function, put that name in backticks (e.g. **Add \\`TodoTracker\\` for live checklists**). A reviewer should understand the full reviewed scope from this list alone \u2014 this IS the dispassionate \"what was reviewed and what changed\" overview, so cover the substantive changes, not just the loudest ones.\n\n<!--\nPullfrog review metadata \u2014 for any agent (or human-with-agent) reading this\nreview. Incorporate the fields below into your understanding of the context\nthis review was made in. The findings below were written against\n{head_sha_short}; if new commits have landed on {head_ref} since this review\nwas submitted, treat any specific bug, file, or line callout as POTENTIALLY\nSTALE \u2014 re-diff against {head_sha_short} (or trigger a fresh review) and\nfactor commits past {head_sha_short} into your understanding of the current\nstate before acting on findings.\n\n- Mode: Review (initial)   or   IncrementalReview (delta against prior pullfrog review)\n- Files reviewed: {file_count}\n- Commits reviewed: {commit_count}\n- Base: {base_ref} ({base_sha_short})\n- Head: {head_ref} ({head_sha_short})\n- Reviewed commits:\n  - {sha_short} \u2014 {commit_subject}\n  - ...\n- Prior pullfrog review: none   or   {prior_sha_short} ({prior_review_html_url})\n- Submitted at: {iso_timestamp}\n-->\n```\n\nPull every metadata field from the `checkout_pr` tool's response \u2014 file count, commit count, base/head ref + SHA, the commit list. For `IncrementalReview` runs, populate `Prior pullfrog review` with the prior review's commit_id (short SHA) and `html_url` from `list_pull_request_reviews`.\n\n## 2. Cross-cutting issue sections (zero or more)\n\nFor each cross-cutting concern, one `### ` section. Use this exact shape:\n\n```\n### {emoji} {short, descriptive title \u2014 what's wrong, not what to do}\n\n{Human-readable problem write-up. Describes the PROBLEM only \u2014 what's broken, what the symptom is, what the blast radius is. NO asks, NO suggested fixes, NO \"the right thing to do is...\". Asks and fixes live in the technical-details block below; the visible part is for the human to *understand* the problem, not to implement it.}\n\n<details><summary>Technical details</summary>\n\n\\`\\`\\`\\`markdown\n# {title repeated}\n\n## Affected sites\n- {file path:line} \u2014 {what's wrong there}\n- ...\n\n## Required outcome\n- {what the fix needs to achieve, not how to achieve it}\n- ...\n\n## Suggested approach (optional)\n{When the fix shape is non-obvious, sketch one or more reasonable directions. Skip when the outcome alone makes the fix obvious.}\n\n## Open questions for the human (optional)\n- {Any decision an implementing agent shouldn't make unilaterally \u2014 pricing thresholds, breaking-change policy, naming, scope of follow-up.}\n\\`\\`\\`\\`\n\n</details>\n```\n\nConcrete example of the visible part of a non-anchored section (technical-details block unchanged from the template above):\n\n```\n### \u2139\uFE0F Legacy `opencode.ts` has no documented deletion plan\n\nThe v2 harness lands alongside the v1 file and imports one helper from it. Worth a follow-up issue or a TODO so the next maintainer doesn't have to re-derive the cleanup plan.\n```\n\nThe example's value is its *shape*: a finding about absence (no deletion plan), not a line-anchored bug. Body sections live or die on whether the concern genuinely doesn't fit on a line.\n\n**Heading severity emoji** \u2014 every `### ` heading carries one:\n\n- \uD83D\uDEA8 critical \u2014 blocks merge (data loss, security, broken core flow)\n- \u26A0\uFE0F important \u2014 must address before merging (regression, missing validation, incorrect behavior)\n- \u2139\uFE0F informational \u2014 surfaced for awareness; mergeable as-is\n\n**Visible problem write-up rules:**\n\n- **No asks, no suggested fixes** in the visible part. The visible portion describes the problem; the technical-details block describes the fix shape and any open questions. The exception: a fix so self-evident that NOT stating it would be weird (e.g. \"the typo is missing an 'r'\") \u2014 in that case, fold it into the problem statement and skip the suggested-approach block in technical details too.\n- **Never two successive plain paragraphs.** Every transition between block-level elements must alternate prose with structure: paragraph \u2192 bullet list \u2192 paragraph; paragraph \u2192 code fence \u2192 bullet list; paragraph \u2192 table \u2192 paragraph. Two consecutive paragraphs in a row create a wall of text that's impossible to digest. If you catch yourself writing one, find a way to split it: pull a list out of it, drop a 2-3 line code fence between them, or merge them into a single tighter paragraph.\n- **Per-paragraph budget:** ~3 sentences max. Past that, you're explaining where you should be structuring.\n- **Identifier discipline still applies** in the visible part. Lead with behavior in plain English; name an identifier only when it's the subject of the concern or a public surface a reader would recognize. The technical-details block is where dense identifier references belong.\n\n**Technical-details block rules:**\n\n- Wrapped in a 4-backtick markdown fence (`\\`\\`\\`\\`markdown ... \\`\\`\\`\\``) so it's visually distinct, one-click copyable, and can contain its own 3-backtick code fences without escape gymnastics. The contents are agent-readable \u2014 a fix-agent will pull the body down and use this block as the brief.\n- File paths and `file:line` refs are encouraged (and necessary) \u2014 the next agent uses these to navigate. Identifier density is fine here.\n- Slightly more verbose than the absolute minimum is OK when it materially helps the next agent: a small code snippet showing the symptom, a short table of mismatched key/column pairs, a one-paragraph \"why CI doesn't catch it\" note. Skip massive regression-test scaffolding or full route rewrites \u2014 the implementing agent writes those.\n- Use the four standard sections (`Affected sites`, `Required outcome`, optional `Suggested approach`, optional `Open questions for the human`). Skip the optional sections when they wouldn't add anything.\n\n## Inline technical details\n\nInline comments are short (~2-3 sentences) by default. When an inline finding has broader implications worth recording for a fix-agent \u2014 e.g. a localized bug whose proper fix requires touching several files, or where the right fix depends on a design decision the human needs to make \u2014 append a collapsed `<details><summary>Technical details</summary>` block to the inline comment's body. Same shape as the body-section technical-details block (4-backtick fenced markdown, `## Affected sites` / `## Required outcome` / optional `## Suggested approach` / optional `## Open questions for the human`).\n\nGitHub renders the same markdown parser in inline comments as in the review body, so the collapsed-details affordance works the same way. The visible part of the inline comment stays scannable; the depth is one click away for any agent that needs it.\n\n## 3. `### \u2139\uFE0F Nitpicks` (optional, last section)\n\nOnly when there are nits that for some reason can't be inlined. Filepaths in nit text are fine \u2014 these are simple enough that a human or agent reads once and acts. No technical-details block.\n\n```\n### \u2139\uFE0F Nitpicks\n\n- {nit, with file path inline if useful, \u2264 ~200 chars}\n- ...\n```\n\n## Inline comment shape\n\nInline comments use the same severity framing as body `### ` sections, scaled down for line-anchored use:\n\n- **Lead with a 1-2 sentence problem statement.** The reader is looking at the line in question, so don't restate what the line says \u2014 describe what's wrong with it. Optionally prefix the visible line with a severity emoji (\uD83D\uDEA8 / \u26A0\uFE0F / \u2139\uFE0F) when severity isn't obvious from context.\n- **Optional `<details><summary>Technical details</summary>...</details>` collapsible** for findings whose technical context (longer file:line references, related-code snippets, suggested approach, regression-risk notes) would overwhelm the human-readable lead-in. Same agent-readable purpose, same 4-backtick fence shape, and same 4-section structure as the body's technical-details block \u2014 see *Inline technical details* above. Encouraged whenever the depth helps a downstream fix-agent; don't force one when the inline lead-in already says everything.\n- **Visible portion \u2264 2-3 sentences.** If you find yourself writing more, that's the cue to split the depth into the `Technical details` collapsible.\n\n## Body-wide rules\n\n- **Inline-vs-body discipline (repeated for emphasis):** anything that anchors to a specific line goes inline (with a `<details>Technical details</details>` block when the implications are broad). The body is for non-anchorable concerns only \u2014 absence, sequencing, design decisions, scope questions, architectural risk.\n- **No `### Issues found` heading** above the issue sections \u2014 each `### ` heading IS the issue.\n- **Severity emoji on every `### ` heading** (\uD83D\uDEA8 / \u26A0\uFE0F / \u2139\uFE0F). No emoji on the preamble lead-in or anywhere else.\n- **GitHub block-level rendering**: GitHub's markdown parser requires a blank line between ALL block-level elements (HTML tags like `<br/>`, `<sub>`, `<details>`, `<b>` and markdown syntax like headings, lists, blockquotes, code fences, paragraphs). Without a blank line, GitHub treats following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n- **Backtick-wrap** every variable, identifier, or file name when you mention one (in either visible or technical-details portions).\n- **Don't repeat diff content**, don't include raw `+123 / -45` stats, don't include a changelog section, don't use horizontal rules (`---`).\n- **Pull file/commit counts from `checkout_pr` metadata** \u2014 never count manually.\n- **Legacy headings REMOVED.** Do not use `### Key changes`, `### Issues found`, `<b>TL;DR</b>`, or `<sub><b>Summary</b>`. The new structure subsumes them.";
 export declare function computeModes(agentId: AgentId): Mode[];
 export declare const modes: Mode[];
 /**

package/dist/toolState.d.ts CHANGED Viewed

@@ -64,6 +64,7 @@ export interface ToolState {
     commentableLinesCheckoutSha?: string | undefined;
     beforeSha?: string;
     selectedMode?: string;
+    prepushFailureCount: number;
     backgroundProcesses: Map<string, BackgroundProcess>;
     browserDaemon?: BrowserDaemon | undefined;
     review?: {
@@ -97,6 +98,9 @@ export interface ToolState {
     output?: string;
     usageEntries: AgentUsage[];
     model?: string | undefined;
+    modelFallback?: {
+        from: string;
+    } | undefined;
     todoTracker?: TodoTracker | undefined;
     diffCoverage?: DiffCoverageState | undefined;
     agentDiagnostic?: AgentDiagnostic | undefined;

package/dist/utils/activity.d.ts CHANGED Viewed

@@ -12,14 +12,44 @@ export type ActivityTimeout = {
     /** force the timeout to reject immediately with a custom reason */
     forceReject: (reason: string) => void;
 };
+/**
+ * upper bound on how long a single tool call can suspend the activity
+ * watchdog. matched against the typical worst-case `checkout_pr`
+ * fetch+deepen on a large monorepo (issue #760: 4-5min) plus generous
+ * headroom for slower MCP tools, while still bounding the worst case if
+ * a tool genuinely hangs and `tool_result` never arrives — auto-resume
+ * fires here and the normal idle clock takes over from a fresh baseline.
+ */
+export declare const MAX_TOOL_CALL_SUSPENSION_MS: number;
 /**
  * mark activity to reset the no-output timeout.
  * call this whenever the agent emits any event, even if it isn't logged to stdout.
  */
 export declare function markActivity(): void;
 /**
- * get the time since last activity in milliseconds
+ * get the time since last activity in milliseconds.
+ * returns 0 while the watchdog is suspended (issue #760).
  */
 export declare function getIdleMs(): number;
+/**
+ * suspend the activity watchdog while a long-running, in-flight unit of
+ * work is happening (e.g. an MCP `tools/call` that synchronously awaits
+ * a multi-minute git fetch). bracket calls with `resumeActivity()` from
+ * the agent harness's `tool_use` / `tool_result` event handlers.
+ *
+ * - idempotent: nested suspends are no-ops; the first resume wins.
+ * - bounded: auto-resumes after `maxMs` so a buggy tool that never
+ *   produces a `tool_result` can't pin the watchdog open forever.
+ * - safe: only the *agent harness* (claude.ts / opencode.ts) on explicit,
+ *   paired CLI events should call this. NEVER blanket-suspend on internal
+ *   noise — that would resurrect issue #12 zombie runs.
+ */
+export declare function suspendActivity(maxMs?: number): void;
+/**
+ * resume the activity watchdog. resets the idle baseline so a stale
+ * idle window before the suspend can't immediately re-fire.
+ */
+export declare function resumeActivity(): void;
+export declare function isActivitySuspended(): boolean;
 export declare function createProcessOutputActivityTimeout(ctx: ActivityTimeoutContext): ActivityTimeout;
 export {};

package/dist/utils/apiKeys.d.ts CHANGED Viewed

@@ -10,10 +10,14 @@ export declare function validateAgentApiKey(params: {
 }): void;
 /**
  * Detect agent-runtime auth failures that should be reformatted as an actionable
- * key-fix CTA before being shown to the user. Covers the two shapes we see:
+ * key-fix CTA before being shown to the user. Covers the shapes we see:
  *   - missing key (validateAgentApiKey throw): contains MISSING_KEY_MARKER
  *   - revoked / invalid key (Claude CLI 401 surfaced via api_error_status):
  *     "Invalid API key · Fix external API key" + similar provider variants
+ *   - direct-Anthropic 401 (`Failed to authenticate. API Error: 401 ...
+ *     {"type":"error","error":{"type":"authentication_error", ...
+ *     "Invalid bearer token"}}`) emitted by the Claude CLI for revoked /
+ *     mistyped / rotated `ANTHROPIC_API_KEY`. see #782.
  */
 export declare function isApiKeyAuthError(text: string): boolean;
 /**

package/dist/utils/billingErrors.d.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * Billing-error classification + user-facing copy for `/api/proxy-token`
+ * failures and OpenRouter mid-run exhaustion. Two error classes (Billing vs.
+ * Transient) keep the framing honest: a card decline is *not* the same UX as
+ * a 503 from the proxy service. Both originate in `utils/proxy.ts` (mint
+ * failures) and `utils/runErrorRenderer.ts` (mid-run keylimit reclassify).
+ *
+ * Renderers return markdown bodies that are written into both the GitHub
+ * Actions job summary and the PR progress comment.
+ *
+ * Lives outside `main.ts` so adding a new error `code` branch is a one-file
+ * edit that does not retrigger the full LLM CI matrix (`action/main.ts` is
+ * in `action/test/coverage.ts::ALWAYS_RUN_ALL`).
+ */
+/**
+ * Billing-layer error surfaced from `/api/proxy-token` as a 402. User-actionable
+ * — distinct from TransientError (503 / transient sync issue) so the job
+ * summary + PR comment can use affirmative "you need to do X" copy rather than
+ * the ambiguous "billing error" label that makes transient outages look like
+ * the user's fault.
+ *
+ * `code` is a server-side discriminator: `router_requires_card` (no card + no
+ * wallet balance on Router), or null for unclassified. `declineCode` is
+ * Stripe's more specific sub-reason on `card_declined` (e.g.
+ * `insufficient_funds`, `lost_card`). `needsReauthentication` is the 3DS case
+ * broken out for convenience.
+ */
+export declare class BillingError extends Error {
+    code: string | null;
+    declineCode: string | null;
+    needsReauthentication: boolean;
+    constructor(message: string, opts?: {
+        code?: string | null;
+        declineCode?: string | null;
+        needsReauthentication?: boolean;
+    });
+}
+/**
+ * Transient service failures from `/api/proxy-token` (503: partial OpenRouter
+ * usage sync, DB flake, in-flight payment intent). Not the user's fault — the
+ * summary uses "temporarily unavailable" framing, and the non-zero exit lets
+ * GH Actions apply whatever retry policy the workflow has configured.
+ */
+export declare class TransientError extends Error {
+    constructor(message: string);
+}
+/**
+ * Render a BillingError as user-facing markdown (shared between GH job summary
+ * and the PR progress comment). Goals:
+ *
+ *   - quiet, not alarmist — bold first line instead of an `### ❌` H3, since
+ *     the comment already has Pullfrog branding in the footer
+ *   - actionable — every branch ends in a single CTA deep-linked to the
+ *     correct section of the owner's console
+ *   - honest — say what actually went wrong (card declined vs. balance
+ *     empty vs. 3DS required), don't lump them under "billing error"
+ *
+ * Branches:
+ *   - `router_requires_card`: user is on Router mode with no card AND no
+ *     wallet balance (signup credit exhausted or not granted). Frame as
+ *     "add a card to continue", link to `#model-access` where the Add
+ *     Card flow lives.
+ *   - `router_balance_exhausted`: user has a card on file but auto-reload is
+ *     disabled and they've spent past their $5 overdraft buffer. Frame as
+ *     "balance ran out" and surface both remediation paths (top up, or flip
+ *     on auto-reload).
+ *   - `router_keylimit_exhausted`: OpenRouter rejected mid-run because the
+ *     per-run key budget was exhausted while the agent was working. The
+ *     wallet is now negative; same remediation as `router_balance_exhausted`
+ *     but framed for the after-the-fact case ("this run was cut short").
+ *   - `needsReauthentication`: issuer requires 3DS on every off-session
+ *     charge. Re-adding the card won't help — the only escape is a manual
+ *     top-up where 3DS runs interactively in Stripe Checkout.
+ *   - `declineCode` set: Stripe declined a real charge. Show the sub-code
+ *     so support can act on it; tell the user we'll retry on next dispatch.
+ *   - default: balance hit zero with no in-flight charge (auto-reload off
+ *     or amount below threshold). Direct them to top up or enable auto-reload.
+ */
+export declare function formatBillingErrorSummary(error: BillingError, owner: string): string;
+/**
+ * Render a TransientError as user-facing markdown. Distinct framing from
+ * BillingError so the user doesn't read an alarm and assume their card
+ * failed — this branch is "our fault, retry shortly", not theirs.
+ */
+export declare function formatTransientErrorSummary(error: TransientError, owner: string): string;

package/dist/utils/buildPullfrogFooter.d.ts CHANGED Viewed

@@ -17,6 +17,13 @@ export interface BuildPullfrogFooterParams {
     customParts?: string[] | undefined;
     /** model slug from payload (e.g., "anthropic/claude-opus"). shown in footer as "Using `Model Name`" */
     model?: string | undefined;
+    /**
+     * When the action engaged the BYOK fallback, this is the slug the user
+     * had configured (e.g. "anthropic/claude-opus") — the footer renders
+     * `Using <free model> (credentials for <configured> not configured)`
+     * so the substitution is visible in PR comments + reviews.
+     */
+    fallbackFrom?: string | undefined;
 }
 /**
  * build a pullfrog footer with configurable parts

package/dist/utils/byokFallback.d.ts ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * Slug we fall back to when a BYOK-required model is configured but the
+ * runner has no provider key in env. Picked because it's free
+ * (`isFree: true`, `envVars: []` — see `action/models.ts`), stable, and
+ * currently the strongest free OpenCode model in the catalog. If a
+ * smarter free model is added later, update this single constant.
+ *
+ * The slug is intentionally hard-coded and not a config knob — the
+ * fallback is a safety net, not a user-facing preference, and adding a
+ * config surface here would just push the same "what to fall back to"
+ * decision into another setting that goes stale the same way.
+ */
+export declare const FREE_FALLBACK_SLUG = "opencode/minimax-m2.5-free";
+export type FallbackDecision = {
+    fallback: false;
+} | {
+    fallback: true;
+    from: string;
+    to: string;
+};
+/**
+ * If the resolved model requires a BYOK key but no provider key is
+ * available in env, return `fallback: true` with a free OpenCode slug
+ * so the run can still succeed. Caller is responsible for swapping the
+ * model state and surfacing the fallback (log line + run summary).
+ *
+ * Gates on `resolvedModel` directly (not the configured slug) so the
+ * decision matches both code paths that reach this point: payload-based
+ * config (`repo.model` from DB) and `PULLFROG_MODEL` env var. Both end
+ * up in `resolvedModel` after `resolveModel()` runs upstream.
+ *
+ * Skip cases:
+ *   - Router / proxy runs (`proxyModel` set): Pullfrog mints the key,
+ *     no BYOK in play — never fall back.
+ *   - No resolved model: keeps the existing auto-select-with-throw
+ *     behavior in `validateAgentApiKey` for the "neither model nor
+ *     key" case (genuine misconfig the user should see).
+ *   - Resolved model is itself the free fallback: avoid suggesting we
+ *     fell back to the model we're already running.
+ *   - Resolved model is a Bedrock raw ID (no `/`): Bedrock has its own
+ *     auth shape (`AWS_BEARER_TOKEN_BEDROCK` + region + model ID), and
+ *     `validateBedrockSetup` already surfaces a tailored error. Skipping
+ *     here also avoids `parseModel`'s slash requirement crashing inside
+ *     `hasProviderKey`.
+ *   - Resolved model has its provider key present: no fallback needed.
+ */
+export declare function selectFallbackModelIfNeeded(input: {
+    resolvedModel: string | undefined;
+    proxyModel: string | undefined;
+}): FallbackDecision;

package/dist/utils/codexHome.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+export interface InstalledCodexAuth {
+    /** absolute path of the auth.json we wrote — caller passes this to the
+     * post-hook via core.saveState for refresh-detection later. */
+    authPath: string;
+    /** value to set as XDG_DATA_HOME for the OpenCode subprocess. */
+    xdgDataHome: string;
+    /** refresh_token from the env at materialization time. post-hook compares
+     * against the on-disk file after the run to detect whether OpenCode
+     * refreshed during the session. */
+    originalRefresh: string;
+}
+/** materialize CODEX_AUTH_JSON from env into a disk path OpenCode reads from.
+ * returns null when the env var is absent, malformed, or wrong auth mode —
+ * caller treats null as "no codex auth, fall through to API key flow". */
+export declare function installCodexAuth(): InstalledCodexAuth | null;
+/** convert an on-disk OpenCode auth.json back to the Codex CLI shape so the
+ * post-hook can write it to the Pullfrog secret store. returns null when the
+ * file's `openai` entry is missing, has the wrong type, or hasn't actually
+ * refreshed (refresh token unchanged from `originalRefresh`). */
+export declare function detectCodexRefresh(params: {
+    authFileContent: string;
+    originalRefresh: string;
+}): string | null;

package/dist/utils/errorReport.d.ts CHANGED Viewed

@@ -3,6 +3,15 @@ interface ReportErrorParams {
     toolState: ToolState;
     error: string;
     title?: string;
+    /**
+     * When the run has no pre-existing progress comment to update (silent
+     * IncrementalReview / pull_request_synchronize, mode-less polls), create
+     * a fresh issue comment on `toolState.issueNumber` instead of returning
+     * silently. Used for terminal errors (BillingError, TransientError) where
+     * the GH job summary is the only other surface and most users never open
+     * it. see #775.
+     */
+    createIfMissing?: boolean;
 }
 export declare function reportErrorToComment(ctx: ReportErrorParams): Promise<void>;
 export {};

package/dist/utils/learnings.d.ts CHANGED Viewed

@@ -1,3 +1,6 @@
+import type { ToolContext } from "../mcp/server.ts";
+import { MAX_LEARNINGS_LENGTH, truncateAtLineBoundary } from "./learningsTruncate.ts";
+export { MAX_LEARNINGS_LENGTH, truncateAtLineBoundary };
 /**
  * Repo-level learnings — operational facts about a repo (setup steps, test
  * commands, conventions, gotchas) that accumulate across agent runs and feed
@@ -40,3 +43,20 @@ export declare function seedLearningsFile(params: {
  * missing or unreadable (treated as "no change"). caps content at the
  * server's max length to avoid a 400 round-trip. */
 export declare function readLearningsFile(path: string): Promise<string | null>;
+/**
+ * Read the agent-edited repo-level learnings tmpfile and PATCH it to
+ * `Repo.learnings`.
+ *
+ * Best-effort: any failure is logged and does not affect the run's success
+ * status. Skips the PATCH when the file is byte-trim-identical to its seed —
+ * the agent didn't touch it, so writing the same content back would just
+ * burn a `LearningsRevision` row and an API round-trip.
+ *
+ * `ctx.toolState.model` is forwarded so `LearningsRevision.model` keeps
+ * populating; it powers the per-revision attribution badge in the UI
+ * history view.
+ *
+ * `learningsPersistAttempted` guards against double-execution between the
+ * normal end-of-run path and the SIGINT/SIGTERM handler.
+ */
+export declare function persistLearnings(ctx: ToolContext): Promise<void>;

package/dist/utils/learningsTruncate.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * pure string helpers for capping and line-boundary-truncating the
+ * `Repo.learnings` body. lives in its own module (vs alongside
+ * `learnings.ts`) so the proprietary root app can re-export it through
+ * `action/internal/index.ts` without dragging the entire MCP type graph
+ * along — `learnings.ts` imports `ToolContext` for its runtime helpers,
+ * and pulling that into the SDK-facing `internal` barrel expands the
+ * type graph reachable from root `tsc` and `cf-worker-indexing` to every
+ * tool module under `action/mcp/`. keeping these helpers MCP-free is the
+ * cheap structural fix.
+ *
+ * see `action/utils/learnings.ts` for the full learnings-file lifecycle.
+ */
+/** maximum size of `Repo.learnings` body in chars. action truncates the
+ * read-back BEFORE the PATCH to avoid sending an oversized payload; the
+ * server applies the same truncation as a defense-in-depth backstop (any
+ * caller that misses the client-side step would otherwise persist a
+ * mid-line tail, breaking the next-run TOC parse).
+ *
+ * raised from 10k → 100k once the TOC affordance landed: with line-range
+ * reads via the server-parsed TOC the agent doesn't ingest the whole
+ * file, so the cap is governed by curation discipline rather than a
+ * tight byte ceiling. 100k holds ~400-500 short bullets. */
+export declare const MAX_LEARNINGS_LENGTH = 100000;
+export declare function truncateAtLineBoundary(body: string, cap: number): string;

package/dist/utils/lifecycle.d.ts CHANGED Viewed

@@ -2,20 +2,40 @@ export interface ExecuteLifecycleHookParams {
     event: string;
     script: string | null;
 }
+/** structured failure info — `output` on the `exit` variant is trimmed
+ * stderr, falling back to stdout when stderr is empty. */
+export type LifecycleHookFailure = {
+    kind: "exit";
+    exitCode: number;
+    output: string;
+} | {
+    kind: "timeout";
+} | {
+    kind: "spawn";
+    spawnError: string;
+};
 export interface LifecycleHookResult {
     /**
      * human-readable warning when the hook failed. includes retry guidance:
      * transient spawn/exit errors are worth retrying, timeouts and
      * persistent failures are not. absent when the hook succeeded or was
-     * skipped.
+     * skipped. setup/post-checkout callers surface this verbatim; prepush
+     * builds its own message from `failure` instead.
      */
     warning?: string;
+    /**
+     * structured failure info — undefined when the hook succeeded or was
+     * skipped. lets callers compose their own messaging without parsing the
+     * `warning` string.
+     */
+    failure?: LifecycleHookFailure;
 }
 /**
  * execute a lifecycle hook script if one is configured.
  *
  * soft-fails: instead of throwing on hook errors, returns a warning string
- * so callers can choose whether to surface it (mcp tools) or upgrade it to
- * a fatal error (setup/prepush). timeouts are flagged as non-retryable.
+ * (and structured failure info) so callers can choose whether to surface
+ * it (mcp tools) or upgrade it to a fatal error (setup). timeouts are
+ * flagged as non-retryable in the warning text.
  */
 export declare function executeLifecycleHook(params: ExecuteLifecycleHookParams): Promise<LifecycleHookResult>;

package/dist/utils/overrides.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * Parse + apply the action's `unsafe_overrides` input — a JSON object of env
+ * var overrides that mutate `process.env` at the start of a run. Designed for
+ * e2e testing / debugging from `workflow_dispatch`; only callers with
+ * `actions:write` on the repo can supply it.
+ *
+ * The `unsafe` prefix is load-bearing: GH Actions echoes the value verbatim
+ * in the runner's step-header log, so the raw JSON (including any values
+ * passed in) is visible to anyone with `actions:read` on the calling repo.
+ * Treat the run log as compromised for any value placed in `unsafe_overrides`.
+ */
+/**
+ * Names refused even when present in the input. Overriding these would let a
+ * caller escape pullfrog's scope (GITHUB_TOKEN), break runner internals
+ * (ACTIONS_RUNTIME_*), forge OIDC tokens (ACTIONS_ID_TOKEN_REQUEST_*), or
+ * substitute our server-side auth (PULLFROG_API_SECRET). Customer-facing
+ * provider keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, CLAUDE_CODE_OAUTH_TOKEN,
+ * etc.) are intentionally NOT denied — overriding those is the use case.
+ */
+export declare const DENIED_OVERRIDE_NAMES: ReadonlySet<string>;
+export interface ApplyOverridesResult {
+    applied: string[];
+    denied: string[];
+}
+/** Parse the JSON input. Returns `{}` for empty/whitespace. Throws on shape errors. */
+export declare function parseOverrides(raw: string): Record<string, string>;
+/**
+ * Mutate `params.env` in place with the supplied JSON overrides, skipping any
+ * names in `DENIED_OVERRIDE_NAMES`. Each applied value is registered with
+ * `core.setSecret` so the runner masks it in subsequent log output, and the
+ * raw `UNSAFE_OVERRIDES` env var is deleted so spawned subprocesses don't
+ * inherit the original JSON (which would defeat both the deny-list and the
+ * masking by exposing the values verbatim).
+ *
+ * Returns the applied/denied breakdown so the caller can render an audit log.
+ */
+export declare function applyOverrides(params: {
+    raw: string;
+    env: NodeJS.ProcessEnv;
+}): ApplyOverridesResult;

package/dist/utils/payload.d.ts CHANGED Viewed

@@ -49,3 +49,10 @@ export declare function resolvePayload(resolvedPromptInput: ResolvedPromptInput,
     proxyModel: string | undefined;
 };
 export type ResolvedPayload = ReturnType<typeof resolvePayload>;
+/**
+ * Parse and validate the optional `output_schema` action input. Returns the
+ * parsed object when present, or `undefined` when absent. Throws on invalid
+ * JSON or non-object payloads — these are workflow-author errors that should
+ * surface immediately, not silently degrade to "no schema".
+ */
+export declare function resolveOutputSchema(): Record<string, unknown> | undefined;

package/dist/utils/prSummary.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import type { ToolContext } from "../mcp/server.ts";
 /**
  * The PR-level summary snapshot is a markdown file the agent edits in place
  * during a Review / IncrementalReview run. The server seeds the file with
@@ -38,3 +39,23 @@ export declare function seedSummaryFile(params: {
 /** read + validate the summary file written by the agent.
  * returns null when the file is missing or fails sanity checks. */
 export declare function readSummaryFile(path: string): Promise<string | null>;
+/**
+ * Fetch the most recent persisted PR summary snapshot for this PR.
+ * Returns null on first-time PRs, when summary is disabled, or on any error.
+ * Best-effort: a transient API failure should not block the run.
+ */
+export declare function fetchPreviousSnapshot(ctx: ToolContext, prNumber: number): Promise<string | null>;
+/**
+ * Read the agent-edited PR summary tmpfile and persist to
+ * `WorkflowRun.summarySnapshot`.
+ *
+ * Best-effort: any failure is logged and does not affect the run's success
+ * status. Skips the PATCH when the file is byte-identical to its seed —
+ * persisting the seed verbatim would either re-write what the DB already has
+ * (on incremental runs) or serialize the placeholder scaffold (on first
+ * runs), neither of which is useful.
+ *
+ * Funnels through both the success path and the SIGINT/SIGTERM handler;
+ * `summaryPersistAttempted` guards against double-execution.
+ */
+export declare function persistSummary(ctx: ToolContext): Promise<void>;

package/dist/utils/proxy.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Mint an OpenRouter proxy key via `/api/proxy-token` and inject it as
+ * `OPENROUTER_API_KEY` for runs that route through Pullfrog Router (managed
+ * billing accounts) or OSS-grant paths.
+ *
+ * Authenticates one of two ways:
+ *   - production: GitHub Actions OIDC token via `core.getIDToken`
+ *   - local dev (`API_URL` is localhost): `x-dev-repo` header bypass
+ *
+ * `runProxyResolution` is the entrypoint `main.ts` calls. It wraps
+ * `resolveProxyModel` and renders the user-facing copy itself (job summary
+ * + PR progress comment) before rethrowing the structured error — handled
+ * here, not in the outer `main()` catch, because `toolContext` doesn't
+ * exist yet at this point in the pipeline.
+ *
+ *   - 402 → `BillingError` (card declined, balance empty, 3DS, etc.)
+ *   - 503 → `TransientError` (transient sync issue — retry next dispatch)
+ */
+import type { ToolState } from "../toolState.ts";
+import type { ResolvedPayload } from "./payload.ts";
+export interface OidcCredentials {
+    requestUrl: string;
+    requestToken: string;
+}
+/**
+ * Run `resolveProxyModel`; if it throws a Billing or Transient error, render
+ * the user-facing summary, mirror it to the PR progress comment, and rethrow.
+ *
+ * The rethrow is intentional: these errors are terminal for the run, and
+ * letting them surface lets `runMain` exit non-zero so GH Actions applies
+ * the workflow's retry policy. We catch them *here* (before the main try)
+ * because the outer catch needs `toolContext` (which isn't built yet) for
+ * its general-purpose rendering path — a BillingError landing in the outer
+ * catch would get rendered with `core.setFailed` only, losing the
+ * actionable copy + the PR-comment mirror.
+ */
+export declare function runProxyResolution(ctx: {
+    payload: ResolvedPayload;
+    oss: boolean;
+    proxyModel?: string | undefined;
+    oidcCredentials: OidcCredentials | null;
+    repo: {
+        owner: string;
+        name: string;
+    };
+    toolState: ToolState;
+}): Promise<void>;

package/dist/utils/runContext.d.ts CHANGED Viewed

@@ -42,15 +42,6 @@ export interface RepoSettings {
  * `"payg"` = card on file / pay-as-you-go.
  */
 export type AccountPlan = "none" | "payg";
-/**
- * "Is Pullfrog absorbing marginal infra cost for this repo?" — composite
- * predicate over the two orthogonal dimensions (repo-level OSS, account-level
- * plan). Mirrors `isInfraCovered` in the server's `utils/billing.ts`.
- */
-export declare function isInfraCovered(params: {
-    isOss: boolean;
-    plan: AccountPlan;
-}): boolean;
 export interface RunContext {
     settings: RepoSettings;
     apiToken: string;

package/dist/utils/runErrorRenderer.d.ts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Classify + render the error thrown out of the main run try-block into a
+ * pair of user-facing markdown bodies — one for the GitHub Actions job
+ * summary tab, one for the PR progress comment.
+ *
+ * Four classifications, in priority order:
+ *
+ *   1. `BillingError` — either the proxy-token mint already threw one (402
+ *      handled inline) or the agent runtime surfaced an OpenRouter
+ *      "key budget exhausted" string mid-run. Both render via
+ *      `formatBillingErrorSummary` so the user sees actionable copy.
+ *
+ *   2. Activity-timeout hang — `errorMessage` starts with
+ *      `"activity timeout"` or `"agent still pending"`. The harness keeps
+ *      structured diagnostic state on `toolState.agentDiagnostic`;
+ *      `formatAgentHangBody` renders that as a markdown block.
+ *
+ *   3. API-key auth error — `isApiKeyAuthError` sniffs the raw error string;
+ *      `formatApiKeyErrorSummary` renders provider + console-link copy.
+ *
+ *   4. Default — a generic `❌ Pullfrog failed` block with the raw error
+ *      message in a fenced code block. Same body for both surfaces.
+ *
+ * The hang body and the API-key body diverge between the two surfaces only
+ * in that the job summary wraps them in the `### ❌ Pullfrog failed` H3
+ * banner; the PR comment uses the bare body since it already has Pullfrog
+ * branding in its footer.
+ */
+import type { AgentDiagnostic } from "./agentHangReport.ts";
+export type RenderedRunError = {
+    summary: string;
+    comment: string;
+};
+export declare function renderRunError(input: {
+    errorMessage: string;
+    repo: {
+        owner: string;
+        name: string;
+    };
+    agentDiagnostic: AgentDiagnostic | undefined;
+}): RenderedRunError;