pullfrog 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,7 +14,7 @@
14
14
 
15
15
  <br/>
16
16
 
17
- > **🚀 Pullfrog is in beta!** We're onboarding users in waves. [Get on the waitlist →](https://pullfrog.com/join-waitlist)
17
+ > **🚀 Pullfrog is now generally available!** [Get started →](https://pullfrog.com/console)
18
18
 
19
19
  <br/>
20
20
 
@@ -2,6 +2,7 @@ import type { TodoTracker } from "../utils/todoTracking.ts";
2
2
  import { type AgentResult } from "./shared.ts";
3
3
  type RunParams = {
4
4
  label: string;
5
+ cmd: string;
5
6
  args: string[];
6
7
  cwd: string;
7
8
  env: Record<string, string | undefined>;
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Claude Code `PreToolUse` hook source — written into `ctx.tmpdir` at runtime
3
+ * and registered via a tmpdir-scoped `settings.json` referenced by
4
+ * `--settings <path>` (see action/agents/claude.ts).
5
+ *
6
+ * Closes the subagent → state-mutating MCP tool path that motivated the
7
+ * 2026-05-18 zed-industries/cloud incident (`reviewfrog` lens called
8
+ * `checkout_pr` mid-review and the orchestrator's next push clobbered an
9
+ * unrelated branch). Pairs with the `tool.execute.before` hook in
10
+ * action/agents/opencodePlugin.ts; both runtimes share the deny list at
11
+ * action/agents/subagentToolGates.ts.
12
+ *
13
+ * PreToolUse hook contract (verified against yasasbanukaofficial/claude-code
14
+ * `src/utils/hooks/hooksConfigManager.ts` and `src/utils/hooks.ts`):
15
+ * - stdin: JSON with `hook_event_name: "PreToolUse"`, `tool_name`,
16
+ * `tool_input`, `tool_use_id`, `session_id`, `cwd`, `transcript_path`,
17
+ * and crucially `agent_id` / `agent_type` populated when the call
18
+ * originates from a subagent (set by the SDK when a Task/Agent
19
+ * dispatches a tool — see `createBaseHookInput` in claude-code source).
20
+ * - exit 0 → allow, no output shown
21
+ * - exit 2 → block tool call AND show stderr to model (this is the path
22
+ * we want for the deny case — the subagent gets a clear refusal it can
23
+ * reason about and pick a different action)
24
+ * - other → show stderr to user only, continue with tool call
25
+ *
26
+ * The hook itself is intentionally tiny: stdin → JSON → check `agent_id`
27
+ * presence + `tool_name` against the deny list → exit 0 or 2. No deps.
28
+ *
29
+ * Why the script source is a string template, not a separate `.ts` file
30
+ * shipped with the action: the action runs as a published npm package; at
31
+ * install time we don't have the source on disk in a stable place. Embedding
32
+ * the source into `dist/main.mjs` and writing it out per-run keeps the path
33
+ * inside `ctx.tmpdir` (where `--settings` can find it) and survives bundle
34
+ * minification.
35
+ */
36
+ /**
37
+ * Source written to `<ctx.tmpdir>/pullfrog-pretool-gate.mjs`. Plain ESM,
38
+ * no TypeScript, no dependencies — node executes it directly via the
39
+ * `#!/usr/bin/env node` shebang and the executable bit set by the harness.
40
+ */
41
+ export declare const CLAUDE_PRETOOL_GATE_FILENAME: "pullfrog-pretool-gate.mjs";
42
+ export declare const CLAUDE_PRETOOL_GATE_SOURCE: string;
43
+ /**
44
+ * Settings JSON shape registered via `claude --settings <path>`. The
45
+ * matcher `^mcp__pullfrog__` is treated as a regex by claude-code's
46
+ * `matchesPattern` helper (anything outside `[a-zA-Z0-9_|]` triggers the
47
+ * regex branch — verified in src/utils/hooks.ts), so this anchors at the
48
+ * start of the tool name and fires for every Pullfrog MCP tool. We narrow
49
+ * inside the script itself rather than declaring per-tool matchers because
50
+ * the deny list is the source of truth.
51
+ *
52
+ * The hook process inherits the parent's PATH, so `node` resolves to the
53
+ * runner's node binary; the `--settings` flag accepts either a path or a
54
+ * literal JSON string per claude-code source `src/main.tsx` (`Path to a
55
+ * settings JSON file or a JSON string`), but we use a path so the script
56
+ * and its config sit side-by-side under `ctx.tmpdir`.
57
+ */
58
+ export declare function buildClaudePretoolGateSettings(scriptAbsolutePath: string): {
59
+ hooks: {
60
+ PreToolUse: Array<{
61
+ matcher: string;
62
+ hooks: Array<{
63
+ type: "command";
64
+ command: string;
65
+ timeout?: number;
66
+ }>;
67
+ }>;
68
+ };
69
+ };
@@ -0,0 +1,6 @@
1
+ import { type AgentRunContext } from "./shared.ts";
2
+ export interface GateServerHandle {
3
+ url: string;
4
+ [Symbol.asyncDispose]: () => Promise<void>;
5
+ }
6
+ export declare function startGateServer(ctx: AgentRunContext): Promise<GateServerHandle>;
@@ -0,0 +1,28 @@
1
+ /** worktree-relative blanket WRITE deny for the entire `.git` tree, in
2
+ * OpenCode Wildcard dialect (`*` compiles to regex `.*`, matching `/`
3
+ * recursively — see packages/core/src/util/wildcard.ts). spread into the
4
+ * `edit` ruleset after a `"*": "allow"` baseline — `evaluate` is
5
+ * last-match-wins by key order, so the deny keys must follow the wildcard
6
+ * allow.
7
+ *
8
+ * four patterns, because the root-anchored descendants glob only matches
9
+ * paths under a root `.git` *directory* — it misses `.git` when it's a gitfile
10
+ * (worktree / submodule layouts: a regular file whose `gitdir:` line redirects
11
+ * git metadata) and misses nested gitfiles (a `.git` inside a subdirectory).
12
+ * rewriting either pointer is the same code-exec surface (`core.hooksPath`,
13
+ * clean/smudge filters, credential.helper) the blanket deny exists to seal, so
14
+ * we cover the gitfile itself and any nested `.git` too. */
15
+ export declare const GIT_NATIVE_WRITE_DENY_OPENCODE: Record<string, "deny">;
16
+ /** worktree-relative narrow READ deny (`.git/config` only), in OpenCode
17
+ * Wildcard dialect. spread into the `read` ruleset after the `"*": "allow"`
18
+ * baseline. */
19
+ export declare const GIT_NATIVE_READ_DENY_OPENCODE: Record<string, "deny">;
20
+ /** Claude `permissions.deny` entries for the blanket `.git` WRITE deny —
21
+ * mirrors {@link GIT_NATIVE_WRITE_DENY_OPENCODE}. `**` is recursive. the exact
22
+ * `.git` entry plus the recursive-prefix gitfile entry cover the gitfile
23
+ * pointer (root + nested) that the root-anchored descendants glob alone misses;
24
+ * the recursive-prefix descendants entry covers nested gitdirs. */
25
+ export declare const GIT_NATIVE_WRITE_DENY_CLAUDE: string[];
26
+ /** Claude `permissions.deny` entries for the narrow `.git/config` READ deny,
27
+ * one per read/enumerate tool — mirrors {@link GIT_NATIVE_READ_DENY_OPENCODE}. */
28
+ export declare const GIT_NATIVE_READ_DENY_CLAUDE: string[];
@@ -15,22 +15,33 @@
15
15
  * XDG_CONFIG_HOME is so harness-side files (config, plugins, scratch state)
16
16
  * land in the tmpdir.
17
17
  *
18
- * Why this plugin exists: opencode's `task` tool runs subagents in-process and
19
- * the CLI's `cli/cmd/run.ts` event loop filters `part.sessionID !== sessionID`,
20
- * so subagent-internal `message.part.updated` events are silently discarded
21
- * before reaching our parent NDJSON stream. plugins, by contrast, receive
22
- * EVERY bus event via `bus.subscribeAll()` regardless of session.
23
- *
24
- * The plugin re-emits every relevant bus event onto opencode's stdout as a
25
- * single JSON line wrapped in a sentinel envelope. our `runOpenCode` parser
26
- * recognises the envelope, unpacks it, and routes the inner part through the
27
- * existing handlers with a per-session label from `SessionLabeler` so each
28
- * subagent's tool calls / text appear inline alongside the orchestrator's.
29
- *
30
- * Dumb plugin / smart parent split: the plugin emits every part for every
31
- * session. the parent dedupes against the orchestrator's own session id (which
32
- * it already knows from the `init` event). this keeps the plugin trivial and
33
- * keeps the per-session attribution logic on the parent side where the
18
+ * Why the events plugin exists: opencode's `task` tool runs subagents
19
+ * in-process and the CLI's `cli/cmd/run.ts` event loop filters
20
+ * `part.sessionID !== sessionID`, so subagent-internal `message.part.updated`
21
+ * events are silently discarded before reaching our parent NDJSON stream.
22
+ * plugins, by contrast, receive EVERY bus event via `bus.subscribeAll()`
23
+ * regardless of session.
24
+ *
25
+ * The events plugin re-emits every relevant bus event onto opencode's stdout
26
+ * as a single JSON line wrapped in a sentinel envelope. our `runOpenCode`
27
+ * parser recognises the envelope, unpacks it, and routes the inner part
28
+ * through the existing handlers with a per-session label from `SessionLabeler`
29
+ * so each subagent's tool calls / text appear inline alongside the
30
+ * orchestrator's.
31
+ *
32
+ * The subagent gate (the `tool.execute.before` hook that hard-blocks
33
+ * state-mutating MCP tool calls from a subagent session) lives in a SEPARATE
34
+ * plugin — `PULLFROG_OPENCODE_GATE_PLUGIN_SOURCE` below — because it's the
35
+ * load-bearing security fence and must ship into both opencode harnesses,
36
+ * whereas this events re-emitter is only needed by the legacy `opencode.ts`
37
+ * CLI-parsing path (the active `opencode_v2.ts` reads subagent events directly
38
+ * off the SDK event stream, so it installs ONLY the gate plugin). Deny-list
39
+ * source of truth: `action/agents/subagentToolGates.ts`.
40
+ *
41
+ * Dumb plugin / smart parent split: the events plugin emits every part for
42
+ * every session. the parent dedupes against the orchestrator's own session id
43
+ * (which it already knows from the `init` event). this keeps the plugin trivial
44
+ * and keeps the per-session attribution logic on the parent side where the
34
45
  * SessionLabeler already lives.
35
46
  *
36
47
  * Event-name prefixing: the wrapped event-type sentinel is
@@ -39,6 +50,7 @@
39
50
  */
40
51
  export declare const PULLFROG_BUS_EVENT_TYPE: "pullfrog_bus_event";
41
52
  export declare const PULLFROG_OPENCODE_PLUGIN_FILENAME: "pullfrog-events.ts";
53
+ export declare const PULLFROG_OPENCODE_GATE_PLUGIN_FILENAME: "pullfrog-subagent-gate.ts";
42
54
  /**
43
55
  * Source written verbatim to `<XDG_CONFIG_HOME>/opencode/plugin/pullfrog-events.ts`.
44
56
  *
@@ -58,3 +70,16 @@ export declare const PULLFROG_OPENCODE_PLUGIN_FILENAME: "pullfrog-events.ts";
58
70
  * missed event, not a crash.
59
71
  */
60
72
  export declare const PULLFROG_OPENCODE_PLUGIN_SOURCE: string;
73
+ /**
74
+ * Standalone subagent gate plugin written to
75
+ * `<XDG_CONFIG_HOME>/opencode/plugin/pullfrog-subagent-gate.ts`. Installed by
76
+ * BOTH opencode harnesses (the legacy `opencode.ts` and the active in-process
77
+ * `opencode_v2.ts`) — the gate is the load-bearing security fence, so it ships
78
+ * independently of the events re-emitter above (which v2 doesn't need).
79
+ *
80
+ * Hard-blocks state-mutating MCP tool calls originating from a subagent
81
+ * session via `tool.execute.before`, complementing the runtime backstops from
82
+ * PR #796 (action/mcp/checkout.ts, action/mcp/git.ts). Deny-list source of
83
+ * truth: `action/agents/subagentToolGates.ts`.
84
+ */
85
+ export declare const PULLFROG_OPENCODE_GATE_PLUGIN_SOURCE: string;
@@ -37,4 +37,4 @@ export declare function buildReviewerAgentConfig(orchestratorModel: string | und
37
37
  export declare function installOpencodeCli(params: {
38
38
  binPath: string;
39
39
  }): Promise<string>;
40
- export declare function autoSelectModel(cliPath: string): string | undefined;
40
+ export declare function autoSelectModel(): string | undefined;
@@ -54,6 +54,19 @@ export declare function collectPostRunIssues(ctx: AgentRunContext, options?: {
54
54
  skipSummaryStale?: boolean;
55
55
  }): Promise<PostRunIssues>;
56
56
  export declare function buildPostRunPrompt(issues: PostRunIssues): string;
57
+ /**
58
+ * terminal-only post-run finalize: re-checks the hard-fail gates after the
59
+ * agent has exited and converts a successful result to a hard-fail when
60
+ * `stopHook` or `unsubmittedReview` is still failing. used by harnesses
61
+ * that inject follow-up turns via a mechanism other than the resume
62
+ * callback (e.g. the Claude managed Stop hook + gate server). soft gates
63
+ * (`dirtyTree`, `summaryStale`) are intentionally not re-checked here —
64
+ * they never flip a successful run to failed.
65
+ */
66
+ export declare function finalizeAgentResult<R extends AgentResult>(params: {
67
+ ctx: AgentRunContext;
68
+ result: R;
69
+ }): Promise<R>;
57
70
  export declare function shouldRunReflection(mode: string | undefined): boolean;
58
71
  /**
59
72
  * prompt for a dedicated post-run reflection turn nudging the agent to edit
@@ -7,20 +7,26 @@
7
7
  * allow: file reads, grep/glob, web search/fetch, read-only MCP queries
8
8
  * deny: state-changing MCP tools, file writes, shell, nested subagent dispatch
9
9
  *
10
- * Enforcement is prose-only. We previously hand-maintained a deny-list of
11
- * mutating MCP tools against action/mcp/server.ts and wired it into per-agent
12
- * `disallowedTools` (claude) / `tools` deny map (opencode), but the list was
13
- * fragile a future mutating tool added to the MCP server without a
14
- * corresponding update here would silently grant write access to the reviewer.
15
- * Rather than invert to an allowlist (smaller surface but still drifts) or add
16
- * a structural test, we lean on the system prompt below: it states the rule
17
- * as a no-op-if-reverted invariant the model can apply to any tool, including
18
- * ones added after this comment was written.
10
+ * Enforcement is now belt-and-suspenders:
11
+ * 1. Machine-enforced PreToolUse gates intercept every state-mutating MCP
12
+ * tool call originating from a subagent session and refuse it before
13
+ * MCP runs. See action/agents/subagentToolGates.ts (the deny list),
14
+ * action/agents/claudePretoolGate.ts (Claude Code's PreToolUse hook),
15
+ * and action/agents/opencodePlugin.ts (opencode's tool.execute.before
16
+ * hook). Followed PR #796 which added runtime backstops inside
17
+ * checkout_pr / push_branch after a subagent-originated tool call
18
+ * clobbered an unrelated PR branch in zed-industries/cloud.
19
+ * 2. The prose system prompt below as a backup against (a) tools added
20
+ * to the MCP server without a corresponding deny-list update, and
21
+ * (b) shell/git read-vs-write distinctions the static gate can't see.
22
+ * It states the rule as a no-op-if-reverted invariant the model can
23
+ * apply to any tool, including ones added after this comment was
24
+ * written.
19
25
  *
20
- * Note: per-agent `disallowedTools` in claude-code is also upstream-broken
21
- * for subagent-spawned tool calls (anthropics/claude-agent-sdk-typescript#172,
22
- * open as of latest update Mar 2026), so even a maintained list would not
23
- * have provided a real fence on that runtime.
26
+ * Historical note: per-agent `disallowedTools` in claude-code is upstream-
27
+ * broken for subagent-spawned tool calls (anthropics/claude-agent-sdk-
28
+ * typescript#172, open as of Mar 2026), which is why the gate runs at
29
+ * PreToolUse rather than tool-registration time.
24
30
  */
25
31
  export declare const REVIEWER_AGENT_NAME = "reviewfrog";
26
32
  /**
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Single source of truth for MCP tools subagents are forbidden from calling.
3
+ *
4
+ * Subagents share the orchestrator's in-process git working tree, `toolState`,
5
+ * progress comment, and run-scoped pr/branch context. A subagent that calls
6
+ * `checkout_pr` switches the orchestrator's HEAD; one that calls `push_branch`
7
+ * pushes whatever the orchestrator happens to have committed. The 2026-05-18
8
+ * `zed-industries/cloud` incident hit exactly this: a `reviewfrog` lens
9
+ * dispatched `checkout_pr({2582})` mid-review, the orchestrator's next push
10
+ * clobbered an unrelated engineer's branch. PR #796 added runtime backstops
11
+ * inside `checkout_pr`/`push_branch`; this list is the upstream gate that
12
+ * stops the call from ever reaching MCP when it originates from a subagent.
13
+ *
14
+ * The gate is enforced at two pre-tool hooks:
15
+ * - opencode: `tool.execute.before` (action/agents/opencodePlugin.ts)
16
+ * - claude: `PreToolUse` settings hook (action/agents/claudePretoolGate.ts)
17
+ *
18
+ * Names are stored in their canonical bare form (the FastMCP tool `name`
19
+ * field). Each runtime presents them with a different prefix:
20
+ * - claude: `mcp__pullfrog__<name>`
21
+ * - opencode: `pullfrog_<name>`
22
+ * The hooks strip those prefixes before comparing.
23
+ *
24
+ * Read-only MCP tools (`get_*`, `list_*`, `git_fetch`, `get_check_suite_logs`,
25
+ * `await_dependency_installation`, etc.) and the `git`/`shell` tools stay off
26
+ * this list — denying them would make review work impossible. The reviewer system prompt
27
+ * (`action/agents/reviewer.ts`) already forbids state-changing shell/git
28
+ * subcommands as a prose constraint; this list is the belt-and-suspenders
29
+ * machine fence for the high-stakes mutations we can identify by name alone.
30
+ *
31
+ * When adding a state-changing MCP tool to `action/mcp/server.ts`, add its
32
+ * canonical name here too. Inclusions justified inline.
33
+ */
34
+ export declare const SUBAGENT_DENIED_TOOLS: readonly ["checkout_pr", "push_branch", "push_tags", "delete_branch", "create_pull_request", "update_pull_request_body", "create_issue", "create_issue_comment", "edit_issue_comment", "reply_to_review_comment", "create_pull_request_review", "resolve_review_thread", "add_labels", "set_output", "report_progress", "select_mode", "start_dependency_installation", "kill_background", "upload_file"];
35
+ export type SubagentDeniedTool = (typeof SUBAGENT_DENIED_TOOLS)[number];
36
+ /**
37
+ * Strip the runtime-specific MCP prefix from a tool name and return the
38
+ * canonical bare name (matching FastMCP's `name:` field). Returns the input
39
+ * unchanged if it doesn't carry a known prefix — keeping comparison simple
40
+ * for native (non-MCP) tools, which never appear on the deny list anyway.
41
+ */
42
+ export declare function stripMcpPrefix(toolName: string): string;
43
+ /**
44
+ * Whether `toolName` (in any runtime's prefix style) names a tool that
45
+ * subagents must not call.
46
+ */
47
+ export declare function isSubagentDeniedTool(toolName: string): boolean;
48
+ /**
49
+ * Human-readable refusal surfaced to the model when a denied tool is gated.
50
+ * Phrased so a halfway-attentive subagent realises (a) the tool is denied to
51
+ * it specifically, (b) why (shared in-process state with the orchestrator),
52
+ * and (c) what to do instead (report findings; the orchestrator can call the
53
+ * tool directly).
54
+ */
55
+ export declare function buildSubagentDenyMessage(toolName: string): string;