pullfrog 0.0.200 → 0.0.202

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/internal.js CHANGED
@@ -11,7 +11,7 @@ var providers = {
11
11
  models: {
12
12
  "claude-opus": {
13
13
  displayName: "Claude Opus",
14
- resolve: "anthropic/claude-opus-4-6",
14
+ resolve: "anthropic/claude-opus-4-7",
15
15
  openRouterResolve: "openrouter/anthropic/claude-opus-4.6",
16
16
  preferred: true
17
17
  },
@@ -39,7 +39,7 @@ var providers = {
39
39
  },
40
40
  "gpt-codex-mini": {
41
41
  displayName: "GPT Codex Mini",
42
- resolve: "openai/codex-mini-latest",
42
+ resolve: "openai/gpt-5.1-codex-mini",
43
43
  openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini"
44
44
  },
45
45
  o3: {
@@ -129,7 +129,7 @@ var providers = {
129
129
  },
130
130
  "claude-opus": {
131
131
  displayName: "Claude Opus",
132
- resolve: "opencode/claude-opus-4-6",
132
+ resolve: "opencode/claude-opus-4-7",
133
133
  openRouterResolve: "openrouter/anthropic/claude-opus-4.6"
134
134
  },
135
135
  "claude-sonnet": {
@@ -319,7 +319,7 @@ function formatMcpToolRef(agentId, toolName) {
319
319
  switch (agentId) {
320
320
  case "claude":
321
321
  return `mcp__${pullfrogMcpName}__${toolName}`;
322
- case "opentoad":
322
+ case "opencode":
323
323
  return `${pullfrogMcpName}_${toolName}`;
324
324
  default:
325
325
  return agentId;
@@ -368,7 +368,7 @@ GitHub's markdown parser requires a blank line between ALL block-level elements.
368
368
  Rules:
369
369
  - \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
370
370
  - ALL variable names, identifiers, and file names in body text must be in backticks
371
- - ALL file references MUST link to the PR Files Changed view. Compute anchors by running \`echo -n 'path/to/file.ts' | sha256sum\` via shell for each file. NEVER fabricate hex strings \u2014 run the actual command. If shell is unavailable, omit the #diff- anchor rather than guessing.
371
+ - ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
372
372
  - Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
373
373
  - Do NOT include raw diff stats like '+123 / -45' or line counts
374
374
  - Do NOT include code blocks or repeat diff contents
@@ -443,7 +443,7 @@ ${learningsStep(t, 6)}`
443
443
  description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
444
444
  prompt: `### Checklist
445
445
 
446
- 1. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. Read the diff to identify the major areas of change.
446
+ 1. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC first and treat its file line ranges as your coverage checklist.
447
447
 
448
448
  2. For each area of change:
449
449
  - read the diff and trace data flow, check boundaries, and verify assumptions
@@ -460,6 +460,7 @@ ${learningsStep(t, 6)}`
460
460
  4. Submit \u2014 ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`.
461
461
  Do NOT call \`report_progress\` \u2014 the review is the final record and the progress
462
462
  comment will be cleaned up automatically.
463
+ note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
463
464
 
464
465
  - **critical issues** (blocks merge \u2014 bugs, security, data loss):
465
466
  \`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
@@ -477,7 +478,7 @@ ${learningsStep(t, 6)}`
477
478
  description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
478
479
  prompt: `### Checklist
479
480
 
480
- 1. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available).
481
+ 1. Checkout the PR via \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
481
482
 
482
483
  2. If \`incrementalDiffPath\` is present, read it to see what changed since the last review. This is a range-diff that isolates the net changes, filtering out base branch noise. If not present, fall back to reviewing the full PR diff.
483
484
 
@@ -501,6 +502,7 @@ ${learningsStep(t, 6)}`
501
502
  - in some cases you may receive a complete diff for the whole pull request instead of an incremental one. when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
502
503
 
503
504
  7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. the review body always includes the reviewed changes from step 6a. append \`Prior review feedback:\\n\` with the checklist from step 6b only if any prior comments were addressed. Follow these rules:
505
+ - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
504
506
  - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
505
507
  - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the reviewed changes summary and prior feedback (if any).
506
508
  - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the reviewed changes summary and prior feedback (if any).
@@ -605,7 +607,7 @@ ${PR_SUMMARY_FORMAT}`
605
607
  }
606
608
  ];
607
609
  }
608
- var modes = computeModes("opentoad");
610
+ var modes = computeModes("opencode");
609
611
 
610
612
  // utils/buildPullfrogFooter.ts
611
613
  var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
@@ -1,2 +1,2 @@
1
1
  /** timeout for lifecycle hook scripts */
2
- export declare const LIFECYCLE_HOOK_TIMEOUT_MS = 120000;
2
+ export declare const LIFECYCLE_HOOK_TIMEOUT_MS = 600000;
@@ -5,6 +5,9 @@ export type FormatFilesResult = {
5
5
  content: string;
6
6
  toc: string;
7
7
  };
8
+ export type FetchAndFormatPrDiffResult = FormatFilesResult & {
9
+ files: PullFile[];
10
+ };
8
11
  /**
9
12
  * formats PR files with explicit line numbers for each code line.
10
13
  * preserves all original diff info (file headers, hunk headers) and adds:
@@ -19,6 +22,7 @@ export type CheckoutPrResult = {
19
22
  success: true;
20
23
  number: number;
21
24
  title: string;
25
+ body: string | null;
22
26
  base: string;
23
27
  localBranch: string;
24
28
  remoteBranch: string;
@@ -29,13 +33,21 @@ export type CheckoutPrResult = {
29
33
  diffPath: string;
30
34
  incrementalDiffPath?: string | undefined;
31
35
  toc: string;
36
+ commitCount: number;
37
+ commitLog: string;
38
+ /** true when commitLog was capped because the PR has more commits than we render */
39
+ commitLogTruncated: boolean;
40
+ /** true when commit metadata could not be computed (e.g. base ref unreachable after shallow fetch). commitCount/commitLog are zero/empty in that case, not "no commits". */
41
+ commitLogUnavailable: boolean;
42
+ /** non-fatal warning from the post-checkout lifecycle hook, if any */
43
+ hookWarning?: string | undefined;
32
44
  instructions: string;
33
45
  };
34
46
  /**
35
47
  * fetches PR files from GitHub and formats them with line numbers and TOC.
36
48
  * this is the core diff formatting logic, extracted for testability.
37
49
  */
38
- export declare function fetchAndFormatPrDiff(ctx: ToolContext, pullNumber: number): Promise<FormatFilesResult>;
50
+ export declare function fetchAndFormatPrDiff(ctx: ToolContext, pullNumber: number): Promise<FetchAndFormatPrDiffResult>;
39
51
  import type { GitContext } from "../utils/setup.ts";
40
52
  export type PrData = {
41
53
  number: number;
@@ -54,7 +66,9 @@ type CheckoutPrBranchParams = GitContext & {
54
66
  * Assumes origin remote is already configured with authentication.
55
67
  * Updates toolState.issueNumber, toolState.checkoutSha, and toolState.pushUrl (for fork PRs).
56
68
  */
57
- export declare function checkoutPrBranch(pr: PrData, params: CheckoutPrBranchParams): Promise<void>;
69
+ export declare function checkoutPrBranch(pr: PrData, params: CheckoutPrBranchParams): Promise<{
70
+ hookWarning?: string | undefined;
71
+ }>;
58
72
  export declare function CheckoutPrTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
59
73
  pull_number: number;
60
74
  }, {
@@ -5,6 +5,7 @@ import type { ToolContext } from "./server.ts";
5
5
  * and hasn't been updated with progress or error messages.
6
6
  */
7
7
  export declare const LEAPING_INTO_ACTION_PREFIX = "Leaping into action";
8
+ export declare function isLeapingIntoActionCommentBody(body: string): boolean;
8
9
  export declare function addFooter(ctx: ToolContext, body: string): string;
9
10
  export declare const Comment: import("arktype/internal/variants/object.ts").ObjectType<{
10
11
  issueNumber: number;
@@ -0,0 +1,17 @@
1
+ import type { StandardSchemaV1 } from "@standard-schema/spec";
2
+ import type { Tool } from "fastmcp";
3
+ import type { ToolContext } from "./server.ts";
4
+ /**
5
+ * Recursively transform a JSON schema to gemini's stricter subset.
6
+ * See module header for the exact transforms applied.
7
+ */
8
+ export declare function sanitizeForGemini(schema: unknown): unknown;
9
+ export declare function wrapSchemaForGemini(schema: StandardSchemaV1<any>): StandardSchemaV1<any>;
10
+ export declare function sanitizeToolForGemini<T extends Tool<any, any>>(tool: T): T;
11
+ /**
12
+ * true when the effective upstream model is served by google's generative
13
+ * language API — directly (`google/*`), via opencode (`opencode/gemini-*`),
14
+ * or via openrouter (`openrouter/google/gemini-*`). slug-substring match
15
+ * works because every gemini route's model id contains "gemini".
16
+ */
17
+ export declare function isGeminiRouted(ctx: ToolContext): boolean;
package/dist/mcp/git.d.ts CHANGED
@@ -1,4 +1,7 @@
1
1
  import type { ToolContext } from "./server.ts";
2
+ export declare function rejectIfLeadingDash(value: string, kind: string): void;
3
+ export declare function rejectSpecialRef(value: string, kind: string): void;
4
+ export declare function validateTagName(tag: string): void;
2
5
  export declare const PushBranch: import("arktype/internal/variants/object.ts").ObjectType<{
3
6
  force: import("arktype/internal/attributes.ts").Default<boolean, false>;
4
7
  branchName?: string;
@@ -10,11 +13,14 @@ export declare function PushBranchTool(ctx: ToolContext): import("fastmcp").Tool
10
13
  branchName?: string;
11
14
  force?: boolean;
12
15
  }>>;
16
+ export declare const AUTH_REQUIRED_REDIRECT: Record<string, string>;
17
+ export declare const NOSHELL_BLOCKED_SUBCOMMANDS: Record<string, string>;
18
+ export declare const NOSHELL_BLOCKED_ARGS: string[];
13
19
  export declare function GitTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
14
- subcommand: string;
20
+ command: string;
15
21
  args?: string[];
16
22
  }, {
17
- subcommand: string;
23
+ command: string;
18
24
  args?: string[];
19
25
  }>>;
20
26
  export declare function GitFetchTool(ctx: ToolContext): import("fastmcp").Tool<any, import("@standard-schema/spec").StandardSchemaV1<{
@@ -1,4 +1,67 @@
1
+ import type { RestEndpointMethodTypes } from "@octokit/rest";
1
2
  import type { ToolContext } from "./server.ts";
3
+ export type CommentableLines = {
4
+ RIGHT: Set<number>;
5
+ LEFT: Set<number>;
6
+ };
7
+ /**
8
+ * parse a PR file's patch to determine which line numbers on each side are
9
+ * valid anchors for inline comments. GitHub only accepts comments on lines
10
+ * inside a diff hunk: added/context lines on RIGHT, removed/context lines
11
+ * on LEFT.
12
+ */
13
+ export declare function commentableLinesForFile(patch: string | undefined): CommentableLines;
14
+ export declare function buildCommentableMap(ctx: ToolContext, pullNumber: number): Promise<Map<string, CommentableLines>>;
15
+ export type ReviewCommentInput = NonNullable<RestEndpointMethodTypes["pulls"]["createReview"]["parameters"]["comments"]>[number];
16
+ export interface DroppedComment {
17
+ path: string;
18
+ line: number;
19
+ startLine?: number | undefined;
20
+ side: "LEFT" | "RIGHT";
21
+ reason: string;
22
+ }
23
+ export declare function validateInlineComments(comments: ReviewCommentInput[], map: Map<string, CommentableLines>): {
24
+ valid: ReviewCommentInput[];
25
+ dropped: DroppedComment[];
26
+ };
27
+ export declare const MAX_DROPPED_COMMENT_LINES = 50;
28
+ /**
29
+ * reason a create_pull_request_review call should be skipped without hitting
30
+ * GitHub. returned by reviewSkipDecision; null means submit normally.
31
+ */
32
+ export type ReviewSkipDecision = {
33
+ kind: "no-issues";
34
+ reason: string;
35
+ } | {
36
+ kind: "empty-downgraded-approve";
37
+ reason: string;
38
+ };
39
+ /**
40
+ * decide whether to skip a review submission before any network call.
41
+ *
42
+ * GitHub rejects `event: "COMMENT"` reviews with no body and no inline comments
43
+ * with HTTP 422 "Unprocessable Entity". two paths produce that shape:
44
+ *
45
+ * 1. `!approved` + empty body/comments: agent's "no issues found" result.
46
+ * skipping preserves the agent's intent (nothing to post is a fine
47
+ * outcome for a review run) without a spurious 422.
48
+ * 2. `approved` + `!prApproveEnabled` + empty body/comments: the runtime
49
+ * downgrades APPROVE to COMMENT when prApproveEnabled is off, and the
50
+ * resulting empty-COMMENT is exactly the shape GitHub 422s. skipping
51
+ * here surfaces the cause (downgrade + nothing to say) instead of an
52
+ * opaque 422 the agent can't recover from.
53
+ *
54
+ * legitimate bare approvals (`approved` + `prApproveEnabled`, no body/comments)
55
+ * are never skipped — GitHub accepts empty APPROVE reviews and the approval
56
+ * stamp itself is the review's content.
57
+ */
58
+ export declare function reviewSkipDecision(params: {
59
+ approved: boolean;
60
+ body: string | null | undefined;
61
+ hasComments: boolean;
62
+ prApproveEnabled: boolean;
63
+ }): ReviewSkipDecision | null;
64
+ export declare function formatDroppedCommentsNote(dropped: DroppedComment[]): string;
2
65
  export declare const CreatePullRequestReview: import("arktype/internal/variants/object.ts").ObjectType<{
3
66
  pull_number: number;
4
67
  body?: string;
@@ -40,6 +103,47 @@ export declare function CreatePullRequestReviewTool(ctx: ToolContext): import("f
40
103
  start_line?: number;
41
104
  }[];
42
105
  }>>;
106
+ /**
107
+ * clear a pending review draft stranded on the PR by a prior hard-killed run
108
+ * (workflow timeout, OOM) so the next createReview can succeed.
109
+ *
110
+ * GitHub enforces one-pending-review-per-user-per-PR. if the previous process
111
+ * died between createReview(PENDING) and submitReview, the draft remains and
112
+ * the next run's createReview 422s with "already has a pending review".
113
+ * listReviews only exposes PENDING reviews to their author, so filtering on
114
+ * state === "PENDING" is already scoped to the authed token's own draft.
115
+ *
116
+ * if `originalErr` is not a pending-review 422, or no leftover is found, this
117
+ * function rethrows `originalErr` so the caller surfaces the original failure.
118
+ * delete failures with 404 (draft already gone) or 422 (draft submitted by a
119
+ * concurrent caller) are swallowed — the caller's retry will succeed in both
120
+ * cases. any other delete error is rethrown unchanged.
121
+ *
122
+ * known limitation: if two runs on the SAME PR share the authed token and
123
+ * overlap in time, the loser's createReview 422s on the winner's still-active
124
+ * draft. recovery would then delete the winner's active draft and the
125
+ * winner's submitReview would 404. this is not distinguishable from a
126
+ * genuinely-stranded draft via the review object alone (PENDING reviews
127
+ * expose no created_at timestamp, and both reviews are authored by the same
128
+ * bot user). rely on workflow-level concurrency controls (e.g. a concurrency
129
+ * key keyed to the PR number) to prevent overlap.
130
+ */
131
+ export declare function clearStrandedPendingReview(ctx: ToolContext, params: {
132
+ owner: string;
133
+ repo: string;
134
+ pull_number: number;
135
+ originalErr: unknown;
136
+ }): Promise<void>;
137
+ /**
138
+ * single-step createReview (event != PENDING) with stranded-draft recovery.
139
+ * the body path goes through createAndSubmitWithFooter which already recovers
140
+ * from a stranded PENDING draft at its own createReview call. the no-body path
141
+ * used to call createReview directly with no recovery — so a PR whose previous
142
+ * body-path run crashed between createReview(PENDING) and submitReview would
143
+ * permanently 422 any subsequent no-body review (approve-with-no-feedback or
144
+ * comments-only) until a body-path run happened to clear the draft.
145
+ */
146
+ export declare function createReviewWithStrandedRecovery(ctx: ToolContext, params: RestEndpointMethodTypes["pulls"]["createReview"]["parameters"]): Promise<Awaited<ReturnType<typeof ctx.octokit.rest.pulls.createReview>>>;
43
147
  /**
44
148
  * report the review node ID so the WorkflowRun is marked as "review submitted".
45
149
  * exported for use in main.ts post-agent cleanup.
@@ -3,10 +3,12 @@ import type { AgentUsage } from "../agents/index.ts";
3
3
  import { type AgentId } from "../external.ts";
4
4
  import type { Mode } from "../modes.ts";
5
5
  import type { PrepResult } from "../prep/index.ts";
6
+ import type { DiffCoverageState } from "../utils/diffCoverage.ts";
6
7
  import type { OctokitWithPlugins } from "../utils/github.ts";
7
8
  import type { ResolvedPayload } from "../utils/payload.ts";
8
9
  import type { RunContextData } from "../utils/runContextData.ts";
9
10
  import type { TodoTracker } from "../utils/todoTracking.ts";
11
+ import type { CommentableLines } from "./review.ts";
10
12
  export type BackgroundProcess = {
11
13
  pid: number;
12
14
  outputPath: string;
@@ -29,6 +31,9 @@ export interface ToolState {
29
31
  pushDest?: StoredPushDest;
30
32
  issueNumber?: number;
31
33
  checkoutSha?: string;
34
+ commentableLinesByFile?: Map<string, CommentableLines>;
35
+ commentableLinesPullNumber?: number;
36
+ commentableLinesCheckoutSha?: string | undefined;
32
37
  beforeSha?: string;
33
38
  selectedMode?: string;
34
39
  backgroundProcesses: Map<string, BackgroundProcess>;
@@ -55,6 +60,7 @@ export interface ToolState {
55
60
  usageEntries: AgentUsage[];
56
61
  model?: string | undefined;
57
62
  todoTracker?: TodoTracker | undefined;
63
+ diffCoverage?: DiffCoverageState | undefined;
58
64
  }
59
65
  interface InitToolStateParams {
60
66
  progressCommentId: string | undefined;
@@ -78,6 +84,7 @@ export interface ToolContext {
78
84
  jobId: string | undefined;
79
85
  mcpServerUrl: string;
80
86
  tmpdir: string;
87
+ resolvedModel: string | undefined;
81
88
  }
82
89
  type JsonSchema = Record<string, unknown>;
83
90
  type McpHttpServerOptions = {
@@ -85,6 +92,11 @@ type McpHttpServerOptions = {
85
92
  };
86
93
  /**
87
94
  * Start the MCP HTTP server.
95
+ *
96
+ * The returned disposer is idempotent — safe to call multiple times.
97
+ * Callers (e.g. the inner activity-timeout handler in main.ts) may need to
98
+ * stop the server before the `await using` block exits; a subsequent
99
+ * automatic dispose is then a no-op.
88
100
  */
89
101
  export declare function startMcpHttpServer(ctx: ToolContext, options?: McpHttpServerOptions): Promise<{
90
102
  url: string;
@@ -18,4 +18,4 @@ export declare const handleToolError: (error: unknown) => ToolResult;
18
18
  * @param toolName - optional tool name for error logging
19
19
  */
20
20
  export declare const execute: <T, R extends Record<string, any> | string>(fn: (params: T) => Promise<R>, toolName?: string) => (params: T) => Promise<ToolResult>;
21
- export declare const addTools: (_ctx: ToolContext, server: FastMCP<any>, tools: Tool<any, any>[]) => FastMCP<any>;
21
+ export declare const addTools: (ctx: ToolContext, server: FastMCP<any>, tools: Tool<any, any>[]) => FastMCP<any>;
package/dist/modes.d.ts CHANGED
@@ -4,6 +4,6 @@ export interface Mode {
4
4
  description: string;
5
5
  prompt?: string | undefined;
6
6
  }
7
- export declare const PR_SUMMARY_FORMAT = "### Default format\n\nFollow this structure exactly:\n\n<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.\nNOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.\n\n### Key changes\n\n- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add `TodoTracker` for live checklists**). A reviewer should understand the full PR from this list alone.\n\n<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: `{base}` \u2190 `{head}`</sub>\nNOTE: the metadata line goes AFTER the bullet list, not before it.\n\nThen for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).\n\n<br/>\n\n## Example readable section title\n\n> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]\nIMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate `>` lines creates a double line break.\n\n1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.\n\nIf a change warrants deeper explanation, use a blockquoted details/summary framed as a question:\n> <details><summary>How does X work?</summary>\n> Extended explanation here.\n> </details>\n\nEnd each section with a file links trail (3-4 key files max):\n[`file.ts`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \u00B7 ...\n\nSingle-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.\n\nCRITICAL \u2014 GitHub markdown rendering rule:\nGitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n\nRules:\n- `##` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title\n- ALL variable names, identifiers, and file names in body text must be in backticks\n- ALL file references MUST link to the PR Files Changed view. Compute anchors by running `echo -n 'path/to/file.ts' | sha256sum` via shell for each file. NEVER fabricate hex strings \u2014 run the actual command. If shell is unavailable, omit the #diff- anchor rather than guessing.\n- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)\n- Do NOT include raw diff stats like '+123 / -45' or line counts\n- Do NOT include code blocks or repeat diff contents\n- Do NOT include a changelog section \u2014 the key changes list serves this purpose\n- Focus on *intent*, not *what* \u2014 the diff already shows what changed\n- Get the file count and commit count from the checkout_pr metadata, not by counting manually";
7
+ export declare const PR_SUMMARY_FORMAT = "### Default format\n\nFollow this structure exactly:\n\n<b>TL;DR</b> \u2014 1-3 sentences on what the PR does and why. Focus on intent, not mechanics.\nNOTE: use HTML bold <b>TL;DR</b>, NOT markdown bold **TL;DR**.\n\n### Key changes\n\n- **Short human-readable title** \u2014 1 sentence per change. Write a short prose phrase (title case or sentence case); when you name a file, type, or function, put that name in backticks (e.g. **Add `TodoTracker` for live checklists**). A reviewer should understand the full PR from this list alone.\n\n<sub><b>Summary</b> \uFF5C {file_count} files \uFF5C {commit_count} commits \uFF5C base: `{base}` \u2190 `{head}`</sub>\nNOTE: the metadata line goes AFTER the bullet list, not before it.\n\nThen for each key change, a ## section with a short descriptive title that reads like a documentation heading (e.g. ## Live todo checklist tracking).\n\n<br/>\n\n## Example readable section title\n\n> **Before:** [old behavior/state]<br/>**After:** [new behavior/state]\nIMPORTANT: Before and After MUST be on a SINGLE blockquote line with an inline <br/> between them. Two separate `>` lines creates a double line break.\n\n1-2 sentences of explanation. Break up text with tables, blockquotes, or lists \u2014 NEVER 3+ plain paragraphs in a row.\n\nIf a change warrants deeper explanation, use a blockquoted details/summary framed as a question:\n> <details><summary>How does X work?</summary>\n> Extended explanation here.\n> </details>\n\nEnd each section with a file links trail (3-4 key files max):\n[`file.ts`](https://github.com/{owner}/{repo}/pull/{number}/files#diff-{sha256hex_of_filepath}) \u00B7 ...\n\nSingle-feature PRs: skip the ## sections. Fold before/after and explanation into the header after key changes.\n\nCRITICAL \u2014 GitHub markdown rendering rule:\nGitHub's markdown parser requires a blank line between ALL block-level elements. This includes transitions between: HTML tags (<br/>, <sub>, <details>, <b>, etc.) and markdown syntax (headings, lists, blockquotes, paragraphs). Without a blank line, GitHub treats the following content as a continuation of the HTML block and renders markdown syntax as literal text. ALWAYS separate block-level elements with a blank line.\n\nRules:\n- `##` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title\n- ALL variable names, identifiers, and file names in body text must be in backticks\n- ALL file references MUST link to the PR Files Changed view. Use the `diff-<hex>` anchor precomputed next to each filename in the `checkout_pr` TOC \u2014 do NOT run `sha256sum` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the `#diff-` anchor rather than guessing.\n- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)\n- Do NOT include raw diff stats like '+123 / -45' or line counts\n- Do NOT include code blocks or repeat diff contents\n- Do NOT include a changelog section \u2014 the key changes list serves this purpose\n- Focus on *intent*, not *what* \u2014 the diff already shows what changed\n- Get the file count and commit count from the checkout_pr metadata, not by counting manually";
8
8
  export declare function computeModes(agentId: AgentId): Mode[];
9
9
  export declare const modes: Mode[];
@@ -1,5 +1,7 @@
1
1
  export declare const DEFAULT_ACTIVITY_TIMEOUT_MS = 300000;
2
2
  export declare const DEFAULT_ACTIVITY_CHECK_INTERVAL_MS = 5000;
3
+ export declare const ACTIVITY_NOISE_PATTERNS: readonly RegExp[];
4
+ export declare function isActivityNoise(chunk: string | Uint8Array): boolean;
3
5
  type ActivityTimeoutContext = {
4
6
  timeoutMs: number;
5
7
  checkIntervalMs: number;
@@ -7,6 +9,8 @@ type ActivityTimeoutContext = {
7
9
  export type ActivityTimeout = {
8
10
  promise: Promise<never>;
9
11
  stop: () => void;
12
+ /** force the timeout to reject immediately with a custom reason */
13
+ forceReject: (reason: string) => void;
10
14
  };
11
15
  /**
12
16
  * mark activity to reset the no-output timeout.
@@ -3,7 +3,9 @@ import type { Agent } from "../agents/index.ts";
3
3
  * resolve the effective model for this run.
4
4
  *
5
5
  * priority:
6
- * 1. PULLFROG_MODEL env var (explicit specifier override)
6
+ * 1. PULLFROG_MODEL env var resolved through the alias registry first,
7
+ * so values like "anthropic/claude-opus" become "anthropic/claude-opus-4-7".
8
+ * raw specifiers (e.g. "anthropic/claude-opus-4-6") pass through unchanged.
7
9
  * 2. slug from repo config / payload → alias registry
8
10
  * 3. undefined — agent will auto-select
9
11
  */
@@ -0,0 +1,62 @@
1
+ export type DiffLineRange = {
2
+ startLine: number;
3
+ endLine: number;
4
+ };
5
+ export type DiffTocEntry = {
6
+ filename: string;
7
+ startLine: number;
8
+ endLine: number;
9
+ };
10
+ export type DiffCoverageFileBreakdown = {
11
+ filename: string;
12
+ startLine: number;
13
+ endLine: number;
14
+ totalLines: number;
15
+ coveredLines: number;
16
+ coveredRanges: DiffLineRange[];
17
+ unreadRanges: DiffLineRange[];
18
+ };
19
+ export type DiffCoverageBreakdown = {
20
+ totalLines: number;
21
+ coveredLines: number;
22
+ unreadLines: number;
23
+ coveragePercent: number;
24
+ coveredRanges: DiffLineRange[];
25
+ unreadRanges: DiffLineRange[];
26
+ files: DiffCoverageFileBreakdown[];
27
+ };
28
+ export type DiffCoverageState = {
29
+ diffPath: string;
30
+ totalLines: number;
31
+ tocEntries: DiffTocEntry[];
32
+ coveredRanges: DiffLineRange[];
33
+ coveragePreflightRan: boolean;
34
+ lastBreakdown?: string | undefined;
35
+ };
36
+ export declare function countLines(params: {
37
+ content: string;
38
+ }): number;
39
+ export declare function parseDiffTocEntries(params: {
40
+ toc: string;
41
+ }): DiffTocEntry[];
42
+ export declare function createDiffCoverageState(params: {
43
+ diffPath: string;
44
+ totalLines: number;
45
+ toc: string;
46
+ }): DiffCoverageState;
47
+ export declare function recordDiffReadFromToolUse(params: {
48
+ state: DiffCoverageState | undefined;
49
+ toolName: string;
50
+ input: unknown;
51
+ cwd: string;
52
+ }): boolean;
53
+ export declare function getDiffCoverageBreakdown(params: {
54
+ state: DiffCoverageState;
55
+ }): DiffCoverageBreakdown;
56
+ export declare function renderDiffCoverageBreakdown(params: {
57
+ diffPath: string;
58
+ breakdown: DiffCoverageBreakdown;
59
+ }): string;
60
+ export declare function countLinesInRanges(params: {
61
+ ranges: DiffLineRange[];
62
+ }): number;
@@ -2,8 +2,20 @@ export interface ExecuteLifecycleHookParams {
2
2
  event: string;
3
3
  script: string | null;
4
4
  }
5
+ export interface LifecycleHookResult {
6
+ /**
7
+ * human-readable warning when the hook failed. includes retry guidance:
8
+ * transient spawn/exit errors are worth retrying, timeouts and
9
+ * persistent failures are not. absent when the hook succeeded or was
10
+ * skipped.
11
+ */
12
+ warning?: string;
13
+ }
5
14
  /**
6
15
  * execute a lifecycle hook script if one is configured.
7
- * runs the script in a bash shell with a timeout.
16
+ *
17
+ * soft-fails: instead of throwing on hook errors, returns a warning string
18
+ * so callers can choose whether to surface it (mcp tools) or upgrade it to
19
+ * a fatal error (setup/prepush). timeouts are flagged as non-retryable.
8
20
  */
9
- export declare function executeLifecycleHook(params: ExecuteLifecycleHookParams): Promise<void>;
21
+ export declare function executeLifecycleHook(params: ExecuteLifecycleHookParams): Promise<LifecycleHookResult>;
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Logging utilities that work well in both local and GitHub Actions environments
3
3
  */
4
- import type { AgentUsage } from "../agents/shared.ts";
4
+ import { type AgentUsage } from "../agents/shared.ts";
5
5
  /** run `fn` with every log line prefixed by `prefix` (e.g. "[task-label]") in magenta */
6
6
  export declare function withLogPrefix<T>(prefix: string, fn: () => Promise<T>): Promise<T>;
7
7
  /**
@@ -86,7 +86,18 @@ export declare function formatJsonValue(value: unknown): string;
86
86
  */
87
87
  export declare function formatIndentedField(label: string, content: string): string;
88
88
  /**
89
- * format aggregated usage data as a markdown table for the GitHub step summary
89
+ * format aggregated usage data as a markdown table for the GitHub step summary.
90
+ *
91
+ * columns mirror the per-run stdout token table emitted by `logTokenTable`
92
+ * (Input / Cache Read / Cache Write / Output / Total / Cost ($)) so the job
93
+ * summary and the in-run logs can be compared row-for-row.
94
+ *
95
+ * notes:
96
+ * - `AgentUsage.inputTokens` is the sum of non-cached input + cache read
97
+ * + cache write (set that way by both agent harnesses' `buildUsage`),
98
+ * so the non-cached Input column is recovered by subtracting cache fields.
99
+ * - `costUsd` is sourced from models.dev (OpenCode) or `total_cost_usd`
100
+ * (Claude CLI). absent rows show `—` so per-agent coverage is obvious.
90
101
  */
91
102
  export declare function formatUsageSummary(entries: AgentUsage[]): string;
92
103
  export {};
@@ -1,6 +1,29 @@
1
+ import type { AgentUsage } from "../agents/shared.ts";
1
2
  import type { ToolContext } from "../mcp/server.ts";
2
- /** Keys accepted by PATCH /api/workflow-run/[runId] — keep in sync with `ALLOWED_FIELDS` in `app/api/workflow-run/[runId]/route.ts`. */
3
+ /**
4
+ * Artifact tracking fields — one-off PATCHes from MCP tools as GitHub entities
5
+ * are created during the run. Strings only (GraphQL node IDs).
6
+ * Keep in sync with `STRING_FIELDS` in `app/api/workflow-run/[runId]/route.ts`.
7
+ */
3
8
  export type WorkflowRunArtifactPatchKey = "prNodeId" | "issueNodeId" | "reviewNodeId" | "planCommentNodeId" | "summaryCommentNodeId";
4
- export type WorkflowRunArtifactPatch = Partial<Record<WorkflowRunArtifactPatchKey, string>>;
5
- /** PATCH workflow-run artifact fields (Pullfrog JWT, not GitHub). */
6
- export declare function patchWorkflowRunFields(ctx: ToolContext, fields: WorkflowRunArtifactPatch): Promise<void>;
9
+ /**
10
+ * Usage fields aggregated across all agent calls and PATCHed once at
11
+ * end-of-run. Token counts are Int4 on the DB side (ample for any realistic
12
+ * run); `costUsd` is a Decimal populated by provider-reported dollar amounts.
13
+ * Keep in sync with `INT_FIELDS` + `DECIMAL_FIELDS` in the server route.
14
+ */
15
+ export type WorkflowRunUsagePatchKey = "inputTokens" | "outputTokens" | "cacheReadTokens" | "cacheWriteTokens" | "costUsd";
16
+ export type WorkflowRunPatch = Partial<Record<WorkflowRunArtifactPatchKey, string>> & Partial<Record<WorkflowRunUsagePatchKey, number>>;
17
+ /** PATCH workflow-run fields (Pullfrog JWT, not GitHub). */
18
+ export declare function patchWorkflowRunFields(ctx: ToolContext, fields: WorkflowRunPatch): Promise<void>;
19
+ /**
20
+ * Sum per-agent usage entries into a single WorkflowRunPatch payload.
21
+ * Returns an empty object when there's nothing to report, which causes
22
+ * `patchWorkflowRunFields` to no-op — safe to call unconditionally from
23
+ * end-of-run paths. Zero-valued fields are dropped so the DB only stores
24
+ * positive sums (and NULL means "not reported").
25
+ *
26
+ * Token sums are clamped to INT4_MAX to guarantee the payload the server
27
+ * sees is always self-consistent across all numeric columns.
28
+ */
29
+ export declare function aggregateUsage(entries: AgentUsage[]): WorkflowRunPatch;
@@ -17,6 +17,7 @@ export interface RepoSettings {
17
17
  prApproveEnabled: boolean;
18
18
  modeInstructions: Record<string, string>;
19
19
  learnings: string | null;
20
+ envAllowlist: string | null;
20
21
  }
21
22
  export interface RunContext {
22
23
  settings: RepoSettings;
@@ -1,14 +1,21 @@
1
1
  /**
2
2
  * Secret detection and env filtering utilities
3
+ *
4
+ * subprocess env filtering: default-deny allowlist model.
5
+ * only vars in the safe set or user allowlist are passed to child processes.
6
+ *
7
+ * log redaction: SENSITIVE_PATTERNS are used to identify secret values
8
+ * for redaction in logs and GHA masking (independent of subprocess filtering).
3
9
  */
4
10
  export declare const SENSITIVE_PATTERNS: RegExp[];
5
11
  export declare function isSensitiveEnvName(key: string): boolean;
6
- /** filter env vars, removing sensitive values (tokens, keys, secrets) */
12
+ export declare function setEnvAllowlist(raw: string): void;
13
+ /** filter env vars using default-deny allowlist: safe set + user allowlist */
7
14
  export declare function filterEnv(): Record<string, string>;
8
15
  export type EnvMode = "restricted" | "inherit" | Record<string, string>;
9
16
  /**
10
17
  * resolve env mode to actual env object
11
- * - "restricted" (default): filterEnv() to prevent secret leakage
18
+ * - "restricted" (default): filterEnv() only safe set + user allowlist
12
19
  * - "inherit": full process.env
13
20
  * - object: custom env merged with restricted base
14
21
  */
@@ -12,6 +12,19 @@ export declare function createTempDirectory(): string;
12
12
  * Setup the test repository for running actions
13
13
  */
14
14
  export declare function setupTestRepo(options: SetupOptions): void;
15
+ /**
16
+ * remove any `[includeIf ...]` entries from the local git config so that
17
+ * actions/checkout-persisted credentials don't ride alongside ASKPASS-provided
18
+ * auth for subsequent git operations.
19
+ *
20
+ * SECURITY: git config subsection values can contain arbitrary characters
21
+ * including `$(...)` command substitutions, and `${IFS}` spacing tricks defeat
22
+ * naive split-on-space filtering. we read keys via the `-z` (null-terminated)
23
+ * output format and feed them to a spawn-array `git config --unset-all` so
24
+ * the shell never interpolates key contents — closing the RCE path that a
25
+ * string-interpolated `execSync(...)` would expose.
26
+ */
27
+ export declare function removeIncludeIfEntries(repoDir: string): void;
15
28
  export interface GitContext {
16
29
  gitToken: string;
17
30
  owner: string;