pullfrog 0.0.202 → 0.0.204
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/postRun.d.ts +66 -0
- package/dist/agents/reviewer.d.ts +32 -0
- package/dist/agents/sessionLabeler.d.ts +77 -0
- package/dist/agents/shared.d.ts +22 -3
- package/dist/cli.mjs +1231 -339
- package/dist/external.d.ts +6 -3
- package/dist/index.js +1146 -306
- package/dist/internal/index.d.ts +3 -1
- package/dist/internal.js +381 -70
- package/dist/mcp/comment.d.ts +6 -3
- package/dist/mcp/git.d.ts +2 -0
- package/dist/mcp/review.d.ts +35 -0
- package/dist/mcp/reviewComments.d.ts +29 -0
- package/dist/mcp/server.d.ts +9 -2
- package/dist/models.d.ts +17 -0
- package/dist/skills/git-archaeology/SKILL.md +188 -0
- package/dist/utils/payload.d.ts +8 -2
- package/dist/utils/progressComment.d.ts +146 -0
- package/dist/utils/runContext.d.ts +17 -0
- package/dist/utils/runContextData.d.ts +2 -1
- package/dist/utils/skills.d.ts +10 -0
- package/package.json +1 -1
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { type AgentId } from "../external.ts";
|
|
2
|
+
import { type AgentResult, type AgentUsage, type PostRunIssues, type StopHookFailure } from "./shared.ts";
|
|
3
|
+
/**
|
|
4
|
+
* run the user-configured stop hook.
|
|
5
|
+
*
|
|
6
|
+
* parallel to `executeLifecycleHook` (which soft-fails with a warning), but
|
|
7
|
+
* returns structured output so agent harnesses can feed the failure back into
|
|
8
|
+
* the session as a resume prompt.
|
|
9
|
+
*
|
|
10
|
+
* - non-zero exit → `StopHookFailure`, actionable: the output is fed to the
|
|
11
|
+
* agent so it can fix the underlying issue.
|
|
12
|
+
* - timeout / spawn error → null, treated as passed: we can't usefully ask the
|
|
13
|
+
* agent to fix an infrastructure problem, and retrying would risk infinite
|
|
14
|
+
* loops.
|
|
15
|
+
*/
|
|
16
|
+
export declare function executeStopHook(script: string): Promise<StopHookFailure | null>;
|
|
17
|
+
export declare function buildStopHookPrompt(failure: StopHookFailure): string;
|
|
18
|
+
/**
|
|
19
|
+
* check the two post-run gates: did the stop hook pass and is the working
|
|
20
|
+
* tree clean? returns everything that still needs fixing so the caller can
|
|
21
|
+
* render a single combined resume prompt.
|
|
22
|
+
*/
|
|
23
|
+
export declare function collectPostRunIssues(params: {
|
|
24
|
+
stopScript: string | null | undefined;
|
|
25
|
+
}): Promise<PostRunIssues>;
|
|
26
|
+
export declare function buildPostRunPrompt(issues: PostRunIssues): string;
|
|
27
|
+
/**
|
|
28
|
+
* prompt for a dedicated post-run reflection turn nudging the agent to call
|
|
29
|
+
* `update_learnings` if it discovered anything worth persisting.
|
|
30
|
+
*
|
|
31
|
+
* this exists because the learnings step baked into mode checklists is
|
|
32
|
+
* frequently ignored — the agent stays focused on the task and the meta-ask
|
|
33
|
+
* falls through. delivering it as its own resume turn, with nothing competing
|
|
34
|
+
* for attention, raises the fire rate substantially.
|
|
35
|
+
*/
|
|
36
|
+
export declare function buildLearningsReflectionPrompt(agentId: AgentId): string;
|
|
37
|
+
/**
|
|
38
|
+
* shared post-run retry loop used by every agent harness.
|
|
39
|
+
*
|
|
40
|
+
* checks the post-run gates (stop hook + dirty tree), and if either is
|
|
41
|
+
* failing, invokes `resume` to let the agent fix and push in the same turn.
|
|
42
|
+
* bails at `MAX_POST_RUN_RETRIES` attempts. the `canResume` predicate is
|
|
43
|
+
* consulted before each retry — harnesses that can't re-enter the session
|
|
44
|
+
* (e.g. claude without a sessionId) return false here.
|
|
45
|
+
*
|
|
46
|
+
* an optional `reflectionPrompt` fires exactly once, after the gates first
|
|
47
|
+
* observe a clean state. it's a one-shot nudge (e.g. "update learnings if
|
|
48
|
+
* relevant"), not a gate, so it does not consume the gate-retry budget. if
|
|
49
|
+
* the reflection turn dirties the tree, the loop picks that up on the next
|
|
50
|
+
* iteration via the normal dirty-tree gate.
|
|
51
|
+
*
|
|
52
|
+
* stop hook must pass for the run to succeed; persistent hook failures are
|
|
53
|
+
* surfaced as `AgentResult.error`. dirty-tree-only failures preserve prior
|
|
54
|
+
* behavior: they're logged but don't fail the run.
|
|
55
|
+
*/
|
|
56
|
+
export declare function runPostRunRetryLoop<R extends AgentResult>(params: {
|
|
57
|
+
initialResult: R;
|
|
58
|
+
initialUsage: AgentUsage | undefined;
|
|
59
|
+
stopScript: string | null | undefined;
|
|
60
|
+
resume: (context: {
|
|
61
|
+
prompt: string;
|
|
62
|
+
previousResult: R;
|
|
63
|
+
}) => Promise<R>;
|
|
64
|
+
canResume?: ((result: R) => boolean) | undefined;
|
|
65
|
+
reflectionPrompt?: string | undefined;
|
|
66
|
+
}): Promise<AgentResult>;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Definition of the `reviewfrog` named subagent — the constrained
|
|
3
|
+
* read-only worker dispatched by Build mode self-review and the in-Pullfrog
|
|
4
|
+
* /anneal multi-lens review.
|
|
5
|
+
*
|
|
6
|
+
* The contract: non-mutative + non-recursive.
|
|
7
|
+
* allow: file reads, grep/glob, web search/fetch, read-only MCP queries
|
|
8
|
+
* deny: state-changing MCP tools, file writes, shell, nested subagent dispatch
|
|
9
|
+
*
|
|
10
|
+
* Enforcement is prose-only. We previously hand-maintained a deny-list of
|
|
11
|
+
* mutating MCP tools against action/mcp/server.ts and wired it into per-agent
|
|
12
|
+
* `disallowedTools` (claude) / `tools` deny map (opencode), but the list was
|
|
13
|
+
* fragile — a future mutating tool added to the MCP server without a
|
|
14
|
+
* corresponding update here would silently grant write access to the reviewer.
|
|
15
|
+
* Rather than invert to an allowlist (smaller surface but still drifts) or add
|
|
16
|
+
* a structural test, we lean on the system prompt below: it states the rule
|
|
17
|
+
* as a no-op-if-reverted invariant the model can apply to any tool, including
|
|
18
|
+
* ones added after this comment was written.
|
|
19
|
+
*
|
|
20
|
+
* Note: per-agent `disallowedTools` in claude-code is also upstream-broken
|
|
21
|
+
* for subagent-spawned tool calls (anthropics/claude-agent-sdk-typescript#172,
|
|
22
|
+
* open as of latest update Mar 2026), so even a maintained list would not
|
|
23
|
+
* have provided a real fence on that runtime.
|
|
24
|
+
*/
|
|
25
|
+
export declare const REVIEWER_AGENT_NAME = "reviewfrog";
|
|
26
|
+
/**
|
|
27
|
+
* System prompt baked into the named reviewer subagent. The orchestrator
|
|
28
|
+
* supplies the per-call task content (YOUR TASK, the diff, the lens) at
|
|
29
|
+
* dispatch time; this preamble enforces the role and constraints regardless
|
|
30
|
+
* of what the orchestrator sends.
|
|
31
|
+
*/
|
|
32
|
+
export declare const REVIEWER_SYSTEM_PROMPT: string;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Track per-session labels so log lines from parallel subagents can be
|
|
3
|
+
* differentiated. The orchestrator dispatches lens subagents (e.g. reviewfrog)
|
|
4
|
+
* via the Task tool; each subagent runs in its own opencode/claude Session
|
|
5
|
+
* with its own `sessionID` (or `session_id`) tag on the NDJSON event stream.
|
|
6
|
+
*
|
|
7
|
+
* Without per-session prefixing, parallel subagent tool_use / tool_result /
|
|
8
|
+
* text events appear as a single interleaved stream tagged with `[Pullfrog]`,
|
|
9
|
+
* making it impossible for a human reading the logs to attribute work to a
|
|
10
|
+
* specific lens.
|
|
11
|
+
*
|
|
12
|
+
* The labeler is deliberately runtime-agnostic — both opencode.ts and
|
|
13
|
+
* claude.ts feed it the same shape. The contract is FIFO: when the orchestrator
|
|
14
|
+
* dispatches N task tool_use blocks in a single assistant turn (the parallel
|
|
15
|
+
* fan-out the multi-lens prompt requires), the i-th new sessionID is assumed
|
|
16
|
+
* to belong to the i-th task dispatch. This is correct as long as parallel
|
|
17
|
+
* dispatches are emitted in source-order and the runtimes respect that order
|
|
18
|
+
* when assigning child sessions; we do not depend on it for correctness of
|
|
19
|
+
* the read-only contract — only for log readability.
|
|
20
|
+
*/
|
|
21
|
+
export interface TaskDispatchInput {
|
|
22
|
+
description?: string | undefined;
|
|
23
|
+
subagent_type?: string | undefined;
|
|
24
|
+
prompt?: string | undefined;
|
|
25
|
+
}
|
|
26
|
+
export declare const ORCHESTRATOR_LABEL = "orchestrator";
|
|
27
|
+
/**
|
|
28
|
+
* Extract a human-readable label from a Task tool's input. Tries (in order):
|
|
29
|
+
* 1. explicit `lens: <name>` marker on a line in the prompt — preferred,
|
|
30
|
+
* lets the orchestrator name the lens deterministically
|
|
31
|
+
* 2. the Task tool's `description` field — short, written by orchestrator
|
|
32
|
+
* per call, usually enough
|
|
33
|
+
* 3. the `subagent_type` (e.g. `reviewfrog`) — falls back to the named
|
|
34
|
+
* subagent identity when description is missing
|
|
35
|
+
* 4. generic "subagent" — last resort
|
|
36
|
+
*/
|
|
37
|
+
export declare function deriveLabelFromTaskInput(input: TaskDispatchInput): string;
|
|
38
|
+
/**
|
|
39
|
+
* Stateful tracker mapping sessionIDs to human labels.
|
|
40
|
+
*
|
|
41
|
+
* Lifecycle:
|
|
42
|
+
* - First call to `labelFor()` returns ORCHESTRATOR_LABEL and binds that
|
|
43
|
+
* sessionID to it. Every subsequent event from that session gets the
|
|
44
|
+
* same label.
|
|
45
|
+
* - When the orchestrator emits a Task tool_use, the harness calls
|
|
46
|
+
* `recordTaskDispatch()` to push the dispatch's derived label onto a
|
|
47
|
+
* pending FIFO queue.
|
|
48
|
+
* - The next previously-unseen sessionID consumes the head of the queue.
|
|
49
|
+
* - If `labelFor()` is called for a new session with an empty queue
|
|
50
|
+
* (e.g. a subagent emitted events before the parent's tool_use was
|
|
51
|
+
* parsed, or the runtime spawned a session we didn't expect), the
|
|
52
|
+
* labeler falls back to `subagent#N` so log lines remain attributable.
|
|
53
|
+
*/
|
|
54
|
+
export declare class SessionLabeler {
|
|
55
|
+
private readonly labels;
|
|
56
|
+
private readonly pendingLabels;
|
|
57
|
+
private fallbackCounter;
|
|
58
|
+
recordTaskDispatch(input: TaskDispatchInput): string;
|
|
59
|
+
/**
|
|
60
|
+
* Return a label for the given sessionID. Binds on first call.
|
|
61
|
+
* Pass undefined/empty for events that lack a session id — the caller
|
|
62
|
+
* gets ORCHESTRATOR_LABEL so the line is still attributable.
|
|
63
|
+
*/
|
|
64
|
+
labelFor(sessionID: string | undefined | null): string;
|
|
65
|
+
/** number of distinct sessions seen so far (for diagnostics) */
|
|
66
|
+
size(): number;
|
|
67
|
+
/** all (sessionID, label) pairs, oldest first */
|
|
68
|
+
entries(): Array<[string, string]>;
|
|
69
|
+
/** how many pending labels are queued waiting to bind to a new session */
|
|
70
|
+
pendingDispatchCount(): number;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Format a log message with a session label prefix in magenta. Mirrors the
|
|
74
|
+
* style of utils/log.ts:prefixLines() so per-session prefixes look the same
|
|
75
|
+
* as the dormant withLogPrefix-based ones.
|
|
76
|
+
*/
|
|
77
|
+
export declare function formatWithLabel(label: string, message: string): string;
|
package/dist/agents/shared.d.ts
CHANGED
|
@@ -3,9 +3,22 @@ import type { ResolvedInstructions } from "../utils/instructions.ts";
|
|
|
3
3
|
import type { ResolvedPayload } from "../utils/payload.ts";
|
|
4
4
|
import type { TodoTracker } from "../utils/todoTracking.ts";
|
|
5
5
|
export declare const MAX_STDERR_LINES = 20;
|
|
6
|
-
|
|
6
|
+
/**
|
|
7
|
+
* how many times the post-run loop may resume the agent to fix a dirty tree
|
|
8
|
+
* or a failing stop hook before giving up.
|
|
9
|
+
*/
|
|
10
|
+
export declare const MAX_POST_RUN_RETRIES = 3;
|
|
7
11
|
export declare function getGitStatus(): string;
|
|
8
|
-
export declare function buildCommitPrompt(
|
|
12
|
+
export declare function buildCommitPrompt(status: string): string;
|
|
13
|
+
export interface StopHookFailure {
|
|
14
|
+
exitCode: number;
|
|
15
|
+
output: string;
|
|
16
|
+
}
|
|
17
|
+
export interface PostRunIssues {
|
|
18
|
+
stopHook?: StopHookFailure;
|
|
19
|
+
dirtyTree?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare function hasPostRunIssues(issues: PostRunIssues): boolean;
|
|
9
22
|
/**
|
|
10
23
|
* token/cost usage data from a single agent run.
|
|
11
24
|
*
|
|
@@ -52,6 +65,12 @@ export interface AgentRunContext {
|
|
|
52
65
|
tmpdir: string;
|
|
53
66
|
instructions: ResolvedInstructions;
|
|
54
67
|
todoTracker?: TodoTracker | undefined;
|
|
68
|
+
/**
|
|
69
|
+
* user-configured stop hook script. runs after the agent finishes each
|
|
70
|
+
* attempt; non-zero exit resumes the agent with the hook output as
|
|
71
|
+
* guidance. null when the repo has no stop hook configured.
|
|
72
|
+
*/
|
|
73
|
+
stopScript?: string | null | undefined;
|
|
55
74
|
/**
|
|
56
75
|
* called synchronously when the agent subprocess is killed for inner
|
|
57
76
|
* activity timeout. lets main.ts tear down shared resources (MCP HTTP
|
|
@@ -72,7 +91,7 @@ export declare function formatCostUsd(costUsd: number): string;
|
|
|
72
91
|
* merge two AgentUsage snapshots into one running total.
|
|
73
92
|
*
|
|
74
93
|
* both agent harnesses invoke their runner multiple times per `run()` when the
|
|
75
|
-
* post-run
|
|
94
|
+
* post-run retry loop kicks in (MAX_POST_RUN_RETRIES). each invocation
|
|
76
95
|
* produces its own AgentUsage; we sum them so downstream callers (usage
|
|
77
96
|
* summary, WorkflowRun persistence) see the whole session — not just the
|
|
78
97
|
* final retry's slice.
|