pullfrog 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,8 @@ export type { Mode } from "../modes.ts";
8
8
  export { modes } from "../modes.ts";
9
9
  export type { BuildPullfrogFooterParams, WorkflowRunFooterInfo, } from "../utils/buildPullfrogFooter.ts";
10
10
  export { buildPullfrogFooter, PULLFROG_DIVIDER, stripExistingFooter, } from "../utils/buildPullfrogFooter.ts";
11
+ export type { CodexAuthBody } from "../utils/codexOAuth.ts";
12
+ export { decodeJwtExpMs, OAuthInvalidGrantError, parseCodexAuthBody, refreshCodexAuthBody, stringifyCodexAuthBody, } from "../utils/codexOAuth.ts";
11
13
  export type { ResourceUsage, UsageSummary } from "../utils/github.ts";
12
14
  export { isLeapingIntoActionCommentBody, LEAPING_INTO_ACTION_PREFIX, } from "../utils/leapingComment.ts";
13
15
  export { MAX_LEARNINGS_LENGTH, truncateAtLineBoundary } from "../utils/learningsTruncate.ts";
package/dist/internal.js CHANGED
@@ -101,7 +101,7 @@ var providers = {
101
101
  "gemini-flash": {
102
102
  displayName: "Gemini Flash",
103
103
  resolve: "google/gemini-3.5-flash",
104
- openRouterResolve: "openrouter/google/gemini-3-flash-preview"
104
+ openRouterResolve: "openrouter/google/gemini-3.5-flash"
105
105
  }
106
106
  }
107
107
  }),
@@ -249,8 +249,8 @@ var providers = {
249
249
  },
250
250
  "gemini-flash": {
251
251
  displayName: "Gemini Flash",
252
- resolve: "opencode/gemini-3-flash",
253
- openRouterResolve: "openrouter/google/gemini-3-flash-preview"
252
+ resolve: "opencode/gemini-3.5-flash",
253
+ openRouterResolve: "openrouter/google/gemini-3.5-flash"
254
254
  },
255
255
  "kimi-k2": {
256
256
  displayName: "Kimi K2",
@@ -388,8 +388,8 @@ var providers = {
388
388
  },
389
389
  "gemini-flash": {
390
390
  displayName: "Gemini Flash",
391
- resolve: "openrouter/google/gemini-3-flash-preview",
392
- openRouterResolve: "openrouter/google/gemini-3-flash-preview"
391
+ resolve: "openrouter/google/gemini-3.5-flash",
392
+ openRouterResolve: "openrouter/google/gemini-3.5-flash"
393
393
  },
394
394
  grok: {
395
395
  displayName: "Grok",
@@ -704,6 +704,8 @@ function computeModes(agentId) {
704
704
 
705
705
  Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
706
706
 
707
+ Before dispatching, ensure \`origin/<base>\` is locally available \u2014 the runner is often a shallow single-branch \`actions/checkout\` (depth=1, head-only refspec), and the reviewer's \`git diff --merge-base origin/<base>\` will fail with \`ambiguous argument\` or \`no merge base\` otherwise. Run \`git fetch --no-tags --deepen=1000 origin <base>\` once; it's a no-op if the ref already has enough history. (The reviewer is read-only by contract, so it cannot do this itself \u2014 fetching is the orchestrator's job.)
708
+
707
709
  Compose your \`${REVIEWER_AGENT_NAME}\` dispatch prompt using this template verbatim, substituting the \`<...>\` placeholders. The preamble aligns the orchestrator side of the dispatch contract with the reviewer's baked-in system prompt \u2014 both ends say the same thing about where the work lives and what to do on an empty diff.
708
710
 
709
711
  \`\`\`
@@ -711,9 +713,11 @@ function computeModes(agentId) {
711
713
  This is a PRE-COMMIT Build-mode self-review. The work to review lives in the working tree (uncommitted), NOT in committed history.
712
714
 
713
715
  Branch: <branch> (off <base>)
714
- Canonical diff command: git diff origin/<base>
716
+ Canonical diff command: git diff --merge-base origin/<base>
717
+
718
+ Use \`--merge-base\` (single MCP \`git\` call, no shell substitution required). NOT bare \`git diff origin/<base>\` or two-dot \`git diff origin/<base>..HEAD\` \u2014 the symmetric forms include the inverse of every commit landed on \`<base>\` since this branch forked, which is noise (and the git tool will reject those forms when the divergence is detected). \`origin/<base>...HEAD\` (three-dot) and \`--cached\` both miss the uncommitted edits self-review runs on, so they're also wrong here.
715
719
 
716
- If that command returns empty, treat it as "no changes \u2014 nothing to review" and stop per your system prompt. Do not search for the work elsewhere.
720
+ If the merge-base diff returns empty, treat it as "no changes \u2014 nothing to review" and stop per your system prompt. Do not search for the work elsewhere.
717
721
 
718
722
  ## Your task
719
723
  <YOUR TASK content>
@@ -722,7 +726,7 @@ function computeModes(agentId) {
722
726
  <tight summary \u2014 what broke, root cause, the fix \u2014 or "no build-phase failures">
723
727
  \`\`\`
724
728
 
725
- Follow the template with the diff content (\`git diff origin/<base-branch>\`, single-rev form \u2014 \`main...HEAD\` and \`--cached\` both miss the uncommitted edits self-review runs on) and your task brief. Instruct the subagent to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
729
+ Follow the template with the diff content (\`git diff --merge-base origin/<base-branch>\` \u2014 single MCP \`git\` call, captures committed + staged + unstaged, excludes base-branch progress) and your task brief. Instruct the subagent to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
726
730
 
727
731
  Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
728
732
  - Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
@@ -864,7 +868,7 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
864
868
  You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches \u2014 concurrent context-pulling on the orchestrator side runs in parallel with the lens fan-out and costs zero extra wall time.
865
869
 
866
870
  if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip the fan-out entirely on a single subagent failure. each subagent gets:
867
- - the diff path / target \u2014 reading the diff and the codebase is its job
871
+ - **the absolute \`diffPath\` (and \`incrementalDiffPath\` if available) from step 2's \`${t("checkout_pr")}\` return, named verbatim in the dispatch prompt** (e.g. \`diffPath: /tmp/pullfrog-XXXX/pr-NNN-SHA.diff\`). the reviewer's baked-in system prompt selects its FIRST action on this token \u2014 paraphrasing ("review the diff", "look at this PR") sends it down the \`git diff origin/<base>\` fallback, which fails on shallow GHA checkouts. the subagent \`read\`s those files for scope; it must NOT re-derive the diff via \`git diff\` (bare \`git diff origin/<base>\` is symmetric and pulls in the inverse of any commits that landed on \`<base>\` since the branch forked \u2014 pure noise, and the git tool rejects it). reading and codebase exploration are still its job.
868
872
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
869
873
  - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
870
874
  - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
@@ -974,7 +978,7 @@ ${PR_SUMMARY_FORMAT}`
974
978
  You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches.
975
979
 
976
980
  if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body. each subagent gets:
977
- - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 8), not in the subagent prompt
981
+ - **the absolute diff path(s) from step 2's \`${t("checkout_pr")}\` return, named verbatim in the dispatch prompt.** when \`incrementalDiffPath\` is present, name BOTH (\`incrementalDiffPath: /tmp/.../pr-NNN-SHA-incremental.diff\` then \`diffPath: /tmp/.../pr-NNN-SHA.diff\`) \u2014 the reviewer's baked-in prompt reads incremental first and uses full for context; when only \`diffPath\` exists, name it alone. the subagent \`read\`s those files; it must NOT re-derive via \`git diff\` (bare \`git diff origin/<base>\` is symmetric and pulls in the inverse of base-branch progress \u2014 pure noise, and the git tool rejects it), and paraphrasing ("review the new commits") sends it down that fallback, which also fails on shallow GHA checkouts. do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 8), not in the subagent prompt.
978
982
  - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
979
983
  - **a Task \`description\` set to the lens name** \u2014 the harness reads this field to label log lines so parallel runs can be told apart.
980
984
  - if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs.
@@ -1157,6 +1161,92 @@ function stripExistingFooter(body) {
1157
1161
  return body.substring(0, dividerIndex).trimEnd();
1158
1162
  }
1159
1163
 
1164
+ // utils/codexOAuth.ts
1165
+ var CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann";
1166
+ var CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token";
1167
+ var OAuthInvalidGrantError = class extends Error {
1168
+ status;
1169
+ constructor(status, body) {
1170
+ super(`Codex token refresh failed: ${status} ${body}`);
1171
+ this.name = "OAuthInvalidGrantError";
1172
+ this.status = status;
1173
+ }
1174
+ };
1175
+ async function refreshCodexAuthBody(body) {
1176
+ const response = await fetch(CODEX_OAUTH_TOKEN_URL, {
1177
+ method: "POST",
1178
+ headers: { "Content-Type": "application/x-www-form-urlencoded" },
1179
+ body: new URLSearchParams({
1180
+ grant_type: "refresh_token",
1181
+ refresh_token: body.tokens.refresh_token,
1182
+ client_id: CODEX_OAUTH_CLIENT_ID
1183
+ }).toString(),
1184
+ signal: AbortSignal.timeout(1e4)
1185
+ });
1186
+ if (!response.ok) {
1187
+ const text = await response.text().catch(() => "");
1188
+ if (response.status >= 400 && response.status < 500) {
1189
+ throw new OAuthInvalidGrantError(response.status, text);
1190
+ }
1191
+ throw new Error(`Codex token refresh failed: ${response.status} ${text}`);
1192
+ }
1193
+ const tokens = await response.json();
1194
+ const idToken = tokens.id_token ?? body.tokens.id_token;
1195
+ const accountId = body.tokens.account_id;
1196
+ return {
1197
+ auth_mode: "chatgpt",
1198
+ tokens: {
1199
+ access_token: tokens.access_token,
1200
+ refresh_token: tokens.refresh_token,
1201
+ ...idToken ? { id_token: idToken } : {},
1202
+ ...accountId ? { account_id: accountId } : {}
1203
+ },
1204
+ last_refresh: (/* @__PURE__ */ new Date()).toISOString()
1205
+ };
1206
+ }
1207
+ function decodeJwtExpMs(token) {
1208
+ const parts = token.split(".");
1209
+ if (parts.length !== 3) return null;
1210
+ let payload;
1211
+ try {
1212
+ payload = JSON.parse(Buffer.from(parts[1], "base64url").toString("utf8"));
1213
+ } catch {
1214
+ return null;
1215
+ }
1216
+ if (typeof payload.exp !== "number" || !Number.isFinite(payload.exp)) return null;
1217
+ return payload.exp * 1e3;
1218
+ }
1219
+ function parseCodexAuthBody(raw) {
1220
+ let parsed;
1221
+ try {
1222
+ parsed = JSON.parse(raw);
1223
+ } catch {
1224
+ return null;
1225
+ }
1226
+ if (!parsed || typeof parsed !== "object") return null;
1227
+ const v = parsed;
1228
+ if (v.auth_mode !== "chatgpt") return null;
1229
+ const tokens = v.tokens;
1230
+ if (!tokens || typeof tokens !== "object") return null;
1231
+ const t = tokens;
1232
+ if (typeof t.access_token !== "string" || t.access_token.length === 0) return null;
1233
+ if (typeof t.refresh_token !== "string" || t.refresh_token.length === 0) return null;
1234
+ return {
1235
+ auth_mode: "chatgpt",
1236
+ tokens: {
1237
+ access_token: t.access_token,
1238
+ refresh_token: t.refresh_token,
1239
+ ...typeof t.id_token === "string" ? { id_token: t.id_token } : {},
1240
+ ...typeof t.account_id === "string" ? { account_id: t.account_id } : {}
1241
+ },
1242
+ ...typeof v.last_refresh === "string" ? { last_refresh: v.last_refresh } : {}
1243
+ };
1244
+ }
1245
+ function stringifyCodexAuthBody(body) {
1246
+ return `${JSON.stringify(body, null, 2)}
1247
+ `;
1248
+ }
1249
+
1160
1250
  // utils/leapingComment.ts
1161
1251
  var LEAPING_INTO_ACTION_PREFIX = "Leaping into action";
1162
1252
  function isLeapingIntoActionCommentBody(body) {
@@ -1292,10 +1382,12 @@ export {
1292
1382
  DEFAULT_PROXY_MODEL,
1293
1383
  LEAPING_INTO_ACTION_PREFIX,
1294
1384
  MAX_LEARNINGS_LENGTH,
1385
+ OAuthInvalidGrantError,
1295
1386
  PULLFROG_DIVIDER,
1296
1387
  TIMEOUT_DISABLED,
1297
1388
  buildPullfrogFooter,
1298
1389
  createLeapingProgressComment,
1390
+ decodeJwtExpMs,
1299
1391
  deleteProgressCommentApi,
1300
1392
  getModelEnvVars,
1301
1393
  getModelManagedCredentials,
@@ -1306,14 +1398,17 @@ export {
1306
1398
  isValidTimeString,
1307
1399
  modelAliases,
1308
1400
  modes,
1401
+ parseCodexAuthBody,
1309
1402
  parseModel,
1310
1403
  parseTimeString,
1311
1404
  providers,
1312
1405
  pullfrogMcpName,
1406
+ refreshCodexAuthBody,
1313
1407
  resolveCliModel,
1314
1408
  resolveDisplayAlias,
1315
1409
  resolveModelSlug,
1316
1410
  resolveOpenRouterModel,
1411
+ stringifyCodexAuthBody,
1317
1412
  stripExistingFooter,
1318
1413
  truncateAtLineBoundary,
1319
1414
  updateProgressComment
@@ -102,7 +102,7 @@ export interface ToolState {
102
102
  learningsFilePath?: string;
103
103
  learningsSeed?: string;
104
104
  learningsPersistAttempted?: boolean;
105
- output?: string;
105
+ output?: string | undefined;
106
106
  usageEntries: AgentUsage[];
107
107
  model?: string | undefined;
108
108
  modelFallback?: {
@@ -1,10 +1,19 @@
1
- /** check if the user has a BYOK key for the given model's provider (does not throw) */
2
- export declare function hasProviderKey(model: string): boolean;
1
+ /**
2
+ * Validate that the resolved model can actually be served by the chosen
3
+ * agent. For routing slugs (Bedrock / Vertex) the auth shape is multi-var
4
+ * (auth + region/location + model-id) and `opencode models` doesn't catch
5
+ * gaps in the latter two — keep dedicated setup validators. For the
6
+ * opencode path, the authoritative answer comes from OpenCode's own model
7
+ * introspection (`authorized` set captured in `openCodeModels.ts`). For
8
+ * the claude path, fall back to the static check (`ANTHROPIC_API_KEY` /
9
+ * `CLAUDE_CODE_OAUTH_TOKEN`).
10
+ */
3
11
  export declare function validateAgentApiKey(params: {
4
12
  agent: {
5
13
  name: string;
6
14
  };
7
15
  model: string | undefined;
16
+ authorized: Set<string>;
8
17
  owner: string;
9
18
  name: string;
10
19
  }): void;
@@ -1,7 +1,6 @@
1
1
  /**
2
2
  * Slug we fall back to when a BYOK-required model is configured but the
3
- * runner has no provider key in env. Picked because it's free
4
- * (`isFree: true`, `envVars: []` — see `action/models.ts`), stable, and
3
+ * runner has no provider key in env. Picked because it's free, stable, and
5
4
  * currently served by OpenCode Zen without a key.
6
5
  *
7
6
  * The slug is intentionally hard-coded and not a config knob — the
@@ -18,32 +17,22 @@ export type FallbackDecision = {
18
17
  to: string;
19
18
  };
20
19
  /**
21
- * If the resolved model requires a BYOK key but no provider key is
22
- * available in env, return `fallback: true` with a free OpenCode slug
23
- * so the run can still succeed. Caller is responsible for swapping the
24
- * model state and surfacing the fallback (log line + run summary).
20
+ * If the resolved model is NOT in OpenCode's `authorized` set (the
21
+ * authoritative "what can OpenCode route right now" snapshot captured
22
+ * after dbSecrets + Codex auth.json are in place), swap to a free
23
+ * OpenCode slug so the run can still produce value. Caller is responsible
24
+ * for surfacing the swap (log line + run summary).
25
25
  *
26
- * Gates on `resolvedModel` directly (not the configured slug) so the
27
- * decision matches both code paths that reach this point: payload-based
28
- * config (`repo.model` from DB) and `PULLFROG_MODEL` env var. Both end
29
- * up in `resolvedModel` after `resolveModel()` runs upstream.
30
- *
31
- * Skip cases:
32
- * - Router / proxy runs (`proxyModel` set): Pullfrog mints the key,
33
- * no BYOK in play — never fall back.
34
- * - No resolved model: keeps the existing auto-select-with-throw
35
- * behavior in `validateAgentApiKey` for the "neither model nor
36
- * key" case (genuine misconfig the user should see).
37
- * - Resolved model is itself the free fallback: avoid suggesting we
38
- * fell back to the model we're already running.
39
- * - Resolved model is a Bedrock raw ID (no `/`): Bedrock has its own
40
- * auth shape (`AWS_BEARER_TOKEN_BEDROCK` + region + model ID), and
41
- * `validateBedrockSetup` already surfaces a tailored error. Skipping
42
- * here also avoids `parseModel`'s slash requirement crashing inside
43
- * `hasProviderKey`.
44
- * - Resolved model has its provider key present: no fallback needed.
26
+ * Skip cases (return `fallback: false` without consulting `authorized`):
27
+ * - Router / proxy runs (`proxyModel` set): Pullfrog mints the key.
28
+ * - No resolved model: auto-select handles it downstream.
29
+ * - Resolved model is the free fallback already.
30
+ * - Resolved model is a raw Bedrock / Vertex ID (no `/`): the routing
31
+ * validators (`validateBedrockSetup` / `validateVertexSetup`) cover
32
+ * auth + region/location/model-id; `opencode models` does not.
45
33
  */
46
34
  export declare function selectFallbackModelIfNeeded(input: {
47
35
  resolvedModel: string | undefined;
48
36
  proxyModel: string | undefined;
37
+ authorized: Set<string>;
49
38
  }): FallbackDecision;
@@ -1,15 +1,28 @@
1
+ /** sandbox-hidden home for pullfrog-managed on-disk secrets in CI. bash via
2
+ * MCP shell tmpfs-overlays this path; opencode's internal auth module
3
+ * bypasses external_directory and reaches the real file. mirrors the
4
+ * pattern in action/agents/claude.ts installManagedSettings.
5
+ *
6
+ * not used for codex auth in local dev — the sandbox is no-op there, so
7
+ * the path doesn't matter. local dev keeps the existing $HOME path. */
8
+ export declare const PULLFROG_DATA_DIR = "/var/lib/pullfrog";
1
9
  export interface InstalledCodexAuth {
2
10
  /** absolute path of the auth.json we wrote — caller passes this to the
3
11
  * post-hook via core.saveState for refresh-detection later. */
4
12
  authPath: string;
5
13
  /** value to set as XDG_DATA_HOME for the OpenCode subprocess. */
6
14
  xdgDataHome: string;
7
- /** refresh_token from the env at materialization time. post-hook compares
8
- * against the on-disk file after the run to detect whether OpenCode
9
- * refreshed during the session. */
15
+ /** refresh_token from the env at materialization time. post-hook
16
+ * compares against the on-disk file after the run to detect whether
17
+ * OpenCode refreshed during the session (only happens on long runs
18
+ * that span >50min — see wiki/codex-auth.md "Concurrency"). */
10
19
  originalRefresh: string;
11
20
  }
12
21
  /** materialize CODEX_AUTH_JSON from env into a disk path OpenCode reads from.
13
22
  * returns null when the env var is absent, malformed, or wrong auth mode —
14
- * caller treats null as "no codex auth, fall through to API key flow". */
23
+ * caller treats null as "no codex auth, fall through to API key flow".
24
+ *
25
+ * The env value is server-side guaranteed fresh by `maybeRotateCodexSecret`
26
+ * in the run-context endpoint. We only parse + write it here; no refresh,
27
+ * no DB interaction. */
15
28
  export declare function installCodexAuth(): InstalledCodexAuth | null;
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Pure-stdlib (fetch + Buffer) Codex OAuth refresh + JWT exp decoding.
3
+ *
4
+ * Lives here (not in codexAuth.ts) so the Next.js server side can import it
5
+ * via pullfrog/internal without dragging in node:child_process / spawn /
6
+ * mkdtemp from the rest of codexAuth.ts. Used by:
7
+ * - action/utils/codexAuth.ts (re-exports refreshCodexAuthBody)
8
+ * - utils/codexSecretRotation.ts (server-side maybeRotate at run-context)
9
+ *
10
+ * See wiki/codex-auth.md for the end-to-end refresh lifecycle.
11
+ */
12
+ export interface CodexAuthBody {
13
+ auth_mode: "chatgpt";
14
+ tokens: {
15
+ access_token: string;
16
+ refresh_token: string;
17
+ id_token?: string;
18
+ account_id?: string;
19
+ };
20
+ last_refresh?: string;
21
+ }
22
+ /** OAuth client id Codex CLI and OpenCode both use against `auth.openai.com`.
23
+ * Same chain — a refresh token minted via `codex login --device-auth` can be
24
+ * refreshed against this client_id. */
25
+ export declare const CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann";
26
+ export declare const CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token";
27
+ /** thrown when the OAuth provider rejects the refresh token (4xx). callers
28
+ * can distinguish "race-lost / token revoked" from network errors via
29
+ * `instanceof OAuthInvalidGrantError`. */
30
+ export declare class OAuthInvalidGrantError extends Error {
31
+ readonly status: number;
32
+ constructor(status: number, body: string);
33
+ }
34
+ /** force one refresh round-trip against the OAuth provider. returns the
35
+ * rotated Codex-shaped blob (the auth.json body verbatim). does NOT persist
36
+ * — caller is responsible for writing back to wherever the token lives.
37
+ *
38
+ * server-side callers (maybeRotateCodexSecret) hold a DB row lock around
39
+ * this call so concurrent runs serialize: first one rotates, subsequent
40
+ * ones see the fresh value and skip. The 10s timeout is critical for that
41
+ * use: it caps how long a stalled auth.openai.com holds the row lock,
42
+ * keeping us well under the enclosing 30s transaction budget so the lock
43
+ * always releases and queued callers get a turn instead of timing out on
44
+ * the tx wrapper. Real OAuth latency is sub-second; 10s is generous. */
45
+ export declare function refreshCodexAuthBody(body: CodexAuthBody): Promise<CodexAuthBody>;
46
+ /** decode the access_token's JWT payload and return its `exp` claim in ms
47
+ * since epoch. returns null if the token isn't a parseable JWT or has no
48
+ * `exp` claim — caller falls back to "treat as expired".
49
+ *
50
+ * We don't verify the JWT signature (we'd need OpenAI's JWKS); we're only
51
+ * using the claim as a freshness hint. The actual auth check happens
52
+ * server-side at OpenAI when the token is used — trusting a fake JWT here
53
+ * would just delay the inevitable 401 from OpenAI. No security boundary
54
+ * at this decode step. */
55
+ export declare function decodeJwtExpMs(token: string): number | null;
56
+ /** parse + validate a Codex auth.json body from its JSON-string form.
57
+ * returns null on any shape mismatch — caller treats as "no codex auth". */
58
+ export declare function parseCodexAuthBody(raw: string): CodexAuthBody | null;
59
+ /** serialize a CodexAuthBody to its canonical on-disk form. */
60
+ export declare function stringifyCodexAuthBody(body: CodexAuthBody): string;
@@ -1,9 +1,14 @@
1
1
  /**
2
2
  * git authentication via GIT_ASKPASS.
3
3
  *
4
- * a localhost HTTP server serves tokens via single-use UUID codes.
5
- * each $git() call writes a unique askpass script with the server
6
- * port+code baked into the file bodyno secrets in subprocess env.
4
+ * a localhost HTTP server serves tokens via UUID codes whose lifetime is
5
+ * bounded by the parent $git() invocation: register() makes the code active,
6
+ * the script (and any sibling subprocesse.g. git-lfs pre-push) can fetch
7
+ * the token any number of times, and $git()'s finally calls revoke() to
8
+ * close the window. each $git() call writes a unique askpass script with
9
+ * the server port+code baked into the file body — no secrets in subprocess
10
+ * env. a replay of a revoked code trips a 409 and revokes the underlying
11
+ * github installation token.
7
12
  *
8
13
  * see wiki/askpass.md for full security documentation.
9
14
  */
@@ -35,9 +40,13 @@ export declare function setGitAuthServer(server: GitAuthServer): void;
35
40
  * a remote and need credentials. working-tree operations (checkout, merge)
36
41
  * use $() from shell.ts which has no token.
37
42
  *
38
- * per call: registers a one-time code with the auth server, writes a
39
- * unique askpass script with port+code baked in, spawns git with
40
- * GIT_ASKPASS pointing to the script, and deletes the script in finally.
43
+ * per call: registers a code with the auth server (valid for the lifetime
44
+ * of this invocation), writes a unique askpass script with port+code baked
45
+ * in, spawns git with GIT_ASKPASS pointing to the script. on completion,
46
+ * revokes the code and deletes the script in finally. multiple sibling
47
+ * askpass calls within one invocation (e.g. git itself + git-lfs pre-push)
48
+ * all see a valid code; replay attempts after finally trip a 409 and the
49
+ * server revokes the underlying github token as a tamper signal.
41
50
  *
42
51
  * @example
43
52
  * await $git("fetch", ["origin", "main"], { token });
@@ -1,16 +1,22 @@
1
1
  /**
2
2
  * ASKPASS-based git authentication server.
3
3
  *
4
- * serves tokens via a localhost HTTP server with single-use UUID codes.
4
+ * serves tokens via a localhost HTTP server with per-$git()-call UUID codes.
5
5
  * each $git() call gets a unique askpass script with the port+code baked in.
6
6
  * the token never appears in subprocess env — only the script file path.
7
7
  *
8
- * tamper-evident: if a code is used twice, the second request triggers
9
- * immediate token revocation via the GitHub API as a precaution.
8
+ * lifetime: the code is valid for as long as the $git() invocation is
9
+ * running. multiple askpass calls within one invocation (e.g. git's own
10
+ * fetch/push + a git-lfs pre-push hook that also authenticates) all
11
+ * succeed. $git() calls revoke(code) in finally; subsequent requests for
12
+ * a revoked code trigger immediate token revocation via the GitHub API
13
+ * as a tamper-evidence precaution (an agent replaying the code after the
14
+ * legitimate window has closed is the realistic attack we still catch).
10
15
  */
11
16
  export type GitAuthServer = {
12
17
  port: number;
13
18
  register: (token: string) => string;
19
+ revoke: (code: string) => void;
14
20
  writeAskpassScript: (code: string) => string;
15
21
  close: () => Promise<void>;
16
22
  [Symbol.asyncDispose]: () => Promise<void>;
@@ -1,6 +1,18 @@
1
1
  export interface ExecuteLifecycleHookParams {
2
2
  event: string;
3
3
  script: string | null;
4
+ /**
5
+ * when true, after the hook runs (success or failure), discard tracked-file
6
+ * mods so the agent doesn't see hook-generated drift (e.g. `pnpm install`
7
+ * rewriting a lockfile). untracked files are preserved — hooks that
8
+ * intentionally materialize files (e.g. a `.env` from a template) stay
9
+ * visible to the agent. skipped (with a warning) if the tree had
10
+ * pre-existing tracked changes before the hook ran, so we never clobber
11
+ * pre-existing work; pre-existing untracked files are ignored for this
12
+ * gate because `git restore --staged --worktree .` doesn't touch them
13
+ * anyway. no-op when no script was configured.
14
+ */
15
+ normalizeWorkingTreeAfter?: boolean;
4
16
  }
5
17
  /** structured failure info — `output` on the `exit` variant is trimmed
6
18
  * stderr, falling back to stdout when stderr is empty. */
@@ -0,0 +1,11 @@
1
+ /** Snapshot the set of models OpenCode can serve from the current env, BEFORE
2
+ * Pullfrog-stored credentials are merged in. Call once early in `main.ts`. */
3
+ export declare function captureBaselineModels(cliPath: string): void;
4
+ /** Snapshot the set of models OpenCode can serve AFTER dbSecrets +
5
+ * Codex auth.json are in place. Logs the diff against the baseline as
6
+ * `» BYOK auth enabled N model(s): …`. */
7
+ export declare function captureAuthorizedModels(cliPath: string): void;
8
+ /** Authorized set captured after Pullfrog-stored auth is applied. Throws if
9
+ * called before `captureAuthorizedModels` — the call sites (fallback gate,
10
+ * api-key validation, auto-select) all run strictly after capture. */
11
+ export declare function getAuthorizedModels(): Set<string>;
@@ -0,0 +1,32 @@
1
+ export type SupportedPackageManager = "npm" | "pnpm" | "yarn" | "bun";
2
+ export interface PackageManagerSpec {
3
+ name: SupportedPackageManager;
4
+ /**
5
+ * either a concrete semver (e.g. "11.1.1") or a range (e.g. "^11.0.0").
6
+ * `concrete` distinguishes — corepack only accepts concrete versions.
7
+ */
8
+ version: string;
9
+ concrete: boolean;
10
+ /** which package.json field this came from */
11
+ source: "devEngines" | "packageManager";
12
+ }
13
+ /**
14
+ * resolve the project's intended package manager from package.json. precedence
15
+ * matches pnpm 11+: `devEngines.packageManager` wins over `packageManager`.
16
+ * when both are present, a concrete `packageManager` that satisfies a
17
+ * `devEngines` range is preferred (we can pin it via corepack); otherwise
18
+ * we warn on disagreement and stick with `devEngines`.
19
+ */
20
+ export declare function resolvePackageManagerSpec(cwd: string): Promise<PackageManagerSpec | null>;
21
+ /**
22
+ * ensure the requested package manager is on PATH at the declared version,
23
+ * provisioning via corepack when applicable. returns true if PATH now
24
+ * resolves to that version, false if we couldn't pin it (in which case
25
+ * the caller should treat PATH as untrusted and may fall back to its
26
+ * legacy install path).
27
+ *
28
+ * never throws: network failure, missing corepack, range-only versions —
29
+ * all degrade to "log warning, return false". the existing PATH binary
30
+ * still works; we just don't get our version guarantee.
31
+ */
32
+ export declare function ensurePackageManager(spec: PackageManagerSpec): Promise<boolean>;
@@ -1,3 +1,5 @@
1
+ /** Stable label for the BYOK provider-billing-exhausted classification. */
2
+ export declare const PROVIDER_BILLING_EXHAUSTED_LABEL = "provider billing exhausted";
1
3
  /**
2
4
  * Result of a provider-error scan: the classification label plus a
3
5
  * human-readable excerpt centered on the matched line. The excerpt is what
@@ -11,3 +13,19 @@ export type ProviderErrorMatch = {
11
13
  export declare function findProviderErrorMatch(text: string): ProviderErrorMatch | null;
12
14
  export declare function detectProviderError(text: string): string | null;
13
15
  export declare function isRouterKeylimitExhaustedError(text: string): boolean;
16
+ /**
17
+ * BYOK billing-exhausted: provider rejected the request because the user's
18
+ * provider wallet is empty (DeepSeek "Insufficient Balance", Anthropic
19
+ * "credit balance is too low", OpenCode Zen `CreditsError` /
20
+ * `FreeUsageLimitError`, Gemini "spending cap"). Distinct from
21
+ * `isRouterKeylimitExhaustedError` — that's Pullfrog's Router wallet, this
22
+ * is the user's own provider account.
23
+ */
24
+ export declare function isProviderBillingExhausted(text: string): boolean;
25
+ /**
26
+ * Extract `providerID=foo` from agent error logs (OpenCode emits this on
27
+ * `provider error detected (...)` lines). Returns the lowercase provider
28
+ * slug, or null when absent. Used to render a provider-specific dashboard
29
+ * link in the BYOK billing-exhausted summary.
30
+ */
31
+ export declare function extractProviderId(text: string): string | null;
@@ -3,22 +3,37 @@
3
3
  * pair of user-facing markdown bodies — one for the GitHub Actions job
4
4
  * summary tab, one for the PR progress comment.
5
5
  *
6
- * Four classifications, in priority order:
6
+ * Classifications, in dispatch order (first match wins; the api-key
7
+ * branch additionally folds in the activity-timeout hang body as a
8
+ * sub-source so a hang masking an api-key error still surfaces the api-key
9
+ * CTA):
7
10
  *
8
11
  * 1. `BillingError` — either the proxy-token mint already threw one (402
9
12
  * handled inline) or the agent runtime surfaced an OpenRouter
10
13
  * "key budget exhausted" string mid-run. Both render via
11
14
  * `formatBillingErrorSummary` so the user sees actionable copy.
12
15
  *
13
- * 2. Activity-timeout hang`errorMessage` starts with
14
- * `"activity timeout"` or `"agent still pending"`. The harness keeps
15
- * structured diagnostic state on `toolState.agentDiagnostic`;
16
- * `formatAgentHangBody` renders that as a markdown block.
16
+ * 2. BYOK provider billing-exhausted (#835)DeepSeek "Insufficient
17
+ * Balance", Anthropic "credit balance is too low", OpenCode Zen
18
+ * `CreditsError`, Gemini "spending cap". Checked before api-key auth
19
+ * because billing-exhausted responses often carry 401 status codes
20
+ * that `isApiKeyAuthError` would otherwise mis-classify.
17
21
  *
18
- * 3. API-key auth error — `isApiKeyAuthError` sniffs the raw error string;
19
- * `formatApiKeyErrorSummary` renders provider + console-link copy.
22
+ * 3. API-key auth error — `isApiKeyAuthError` sniffs the raw error string
23
+ * (or the activity-timeout hang body when present, since that's where
24
+ * the underlying provider error often lands); `formatApiKeyErrorSummary`
25
+ * renders provider + console-link copy.
20
26
  *
21
- * 4. Defaulta generic `❌ Pullfrog failed` block with the raw error
27
+ * 4. ProviderModelNotFoundErrorstale free-fallback model id no longer
28
+ * in the OpenCode catalog; renders a nudge to add a BYOK key.
29
+ *
30
+ * 5. Activity-timeout hang — `errorMessage` starts with
31
+ * `"activity timeout"` or `"agent still pending"` AND none of the
32
+ * above matched. The harness keeps structured diagnostic state on
33
+ * `toolState.agentDiagnostic`; `formatAgentHangBody` renders that as
34
+ * a markdown block.
35
+ *
36
+ * 6. Default — a generic `❌ Pullfrog failed` block with the raw error
22
37
  * message in a fenced code block. Same body for both surfaces.
23
38
  *
24
39
  * The hang body and the API-key body diverge between the two surfaces only
@@ -71,6 +71,13 @@ export declare function finalizeSuccessRun(input: {
71
71
  *
72
72
  * `lastProgressBody` and the usage table are appended to the summary so the
73
73
  * partial work the agent did before failing isn't lost.
74
+ *
75
+ * `createIfMissing: true` is symmetric with `finalizeSuccessRun` — silent
76
+ * triggers (IncrementalReview / pull_request_synchronize / auto-label) that
77
+ * throw past `finalizeSuccessRun` (e.g. timeout race kills the agent
78
+ * mid-billing-exhausted-retry) reach this catch path with no progress
79
+ * comment to update, and without `createIfMissing` the terminal error
80
+ * lands only in the GH job summary that most users never open. see #835.
74
81
  */
75
82
  export declare function writeRunErrorOutputs(input: {
76
83
  rendered: RenderedRunError;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pullfrog",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "pullfrog": "dist/cli.mjs",
@@ -31,6 +31,7 @@
31
31
  "@ark/util": "0.56.0",
32
32
  "@clack/prompts": "^1.2.0",
33
33
  "@modelcontextprotocol/sdk": "^1.26.0",
34
+ "@opencode-ai/sdk": "1.2.27",
34
35
  "@octokit/plugin-throttling": "^11.0.3",
35
36
  "@octokit/rest": "^22.0.0",
36
37
  "@octokit/webhooks-types": "^7.6.1",