pullfrog 0.0.202 → 0.0.204

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ import { type AgentId } from "../external.ts";
2
+ import { type AgentResult, type AgentUsage, type PostRunIssues, type StopHookFailure } from "./shared.ts";
3
+ /**
4
+ * run the user-configured stop hook.
5
+ *
6
+ * parallel to `executeLifecycleHook` (which soft-fails with a warning), but
7
+ * returns structured output so agent harnesses can feed the failure back into
8
+ * the session as a resume prompt.
9
+ *
10
+ * - non-zero exit → `StopHookFailure`, actionable: the output is fed to the
11
+ * agent so it can fix the underlying issue.
12
+ * - timeout / spawn error → null, treated as passed: we can't usefully ask the
13
+ * agent to fix an infrastructure problem, and retrying would risk infinite
14
+ * loops.
15
+ */
16
+ export declare function executeStopHook(script: string): Promise<StopHookFailure | null>;
17
+ export declare function buildStopHookPrompt(failure: StopHookFailure): string;
18
+ /**
19
+ * check the two post-run gates: did the stop hook pass and is the working
20
+ * tree clean? returns everything that still needs fixing so the caller can
21
+ * render a single combined resume prompt.
22
+ */
23
+ export declare function collectPostRunIssues(params: {
24
+ stopScript: string | null | undefined;
25
+ }): Promise<PostRunIssues>;
26
+ export declare function buildPostRunPrompt(issues: PostRunIssues): string;
27
+ /**
28
+ * prompt for a dedicated post-run reflection turn nudging the agent to call
29
+ * `update_learnings` if it discovered anything worth persisting.
30
+ *
31
+ * this exists because the learnings step baked into mode checklists is
32
+ * frequently ignored — the agent stays focused on the task and the meta-ask
33
+ * falls through. delivering it as its own resume turn, with nothing competing
34
+ * for attention, raises the fire rate substantially.
35
+ */
36
+ export declare function buildLearningsReflectionPrompt(agentId: AgentId): string;
37
+ /**
38
+ * shared post-run retry loop used by every agent harness.
39
+ *
40
+ * checks the post-run gates (stop hook + dirty tree), and if either is
41
+ * failing, invokes `resume` to let the agent fix and push in the same turn.
42
+ * bails at `MAX_POST_RUN_RETRIES` attempts. the `canResume` predicate is
43
+ * consulted before each retry — harnesses that can't re-enter the session
44
+ * (e.g. claude without a sessionId) return false here.
45
+ *
46
+ * an optional `reflectionPrompt` fires exactly once, after the gates first
47
+ * observe a clean state. it's a one-shot nudge (e.g. "update learnings if
48
+ * relevant"), not a gate, so it does not consume the gate-retry budget. if
49
+ * the reflection turn dirties the tree, the loop picks that up on the next
50
+ * iteration via the normal dirty-tree gate.
51
+ *
52
+ * stop hook must pass for the run to succeed; persistent hook failures are
53
+ * surfaced as `AgentResult.error`. dirty-tree-only failures preserve prior
54
+ * behavior: they're logged but don't fail the run.
55
+ */
56
+ export declare function runPostRunRetryLoop<R extends AgentResult>(params: {
57
+ initialResult: R;
58
+ initialUsage: AgentUsage | undefined;
59
+ stopScript: string | null | undefined;
60
+ resume: (context: {
61
+ prompt: string;
62
+ previousResult: R;
63
+ }) => Promise<R>;
64
+ canResume?: ((result: R) => boolean) | undefined;
65
+ reflectionPrompt?: string | undefined;
66
+ }): Promise<AgentResult>;
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Definition of the `reviewfrog` named subagent — the constrained
3
+ * read-only worker dispatched by Build mode self-review and the in-Pullfrog
4
+ * /anneal multi-lens review.
5
+ *
6
+ * The contract: non-mutative + non-recursive.
7
+ * allow: file reads, grep/glob, web search/fetch, read-only MCP queries
8
+ * deny: state-changing MCP tools, file writes, shell, nested subagent dispatch
9
+ *
10
+ * Enforcement is prose-only. We previously hand-maintained a deny-list of
11
+ * mutating MCP tools against action/mcp/server.ts and wired it into per-agent
12
+ * `disallowedTools` (claude) / `tools` deny map (opencode), but the list was
13
+ * fragile — a future mutating tool added to the MCP server without a
14
+ * corresponding update here would silently grant write access to the reviewer.
15
+ * Rather than invert to an allowlist (smaller surface but still drifts) or add
16
+ * a structural test, we lean on the system prompt below: it states the rule
17
+ * as a no-op-if-reverted invariant the model can apply to any tool, including
18
+ * ones added after this comment was written.
19
+ *
20
+ * Note: per-agent `disallowedTools` in claude-code is also upstream-broken
21
+ * for subagent-spawned tool calls (anthropics/claude-agent-sdk-typescript#172,
22
+ * open as of latest update Mar 2026), so even a maintained list would not
23
+ * have provided a real fence on that runtime.
24
+ */
25
+ export declare const REVIEWER_AGENT_NAME = "reviewfrog";
26
+ /**
27
+ * System prompt baked into the named reviewer subagent. The orchestrator
28
+ * supplies the per-call task content (YOUR TASK, the diff, the lens) at
29
+ * dispatch time; this preamble enforces the role and constraints regardless
30
+ * of what the orchestrator sends.
31
+ */
32
+ export declare const REVIEWER_SYSTEM_PROMPT: string;
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Track per-session labels so log lines from parallel subagents can be
3
+ * differentiated. The orchestrator dispatches lens subagents (e.g. reviewfrog)
4
+ * via the Task tool; each subagent runs in its own opencode/claude Session
5
+ * with its own `sessionID` (or `session_id`) tag on the NDJSON event stream.
6
+ *
7
+ * Without per-session prefixing, parallel subagent tool_use / tool_result /
8
+ * text events appear as a single interleaved stream tagged with `[Pullfrog]`,
9
+ * making it impossible for a human reading the logs to attribute work to a
10
+ * specific lens.
11
+ *
12
+ * The labeler is deliberately runtime-agnostic — both opencode.ts and
13
+ * claude.ts feed it the same shape. The contract is FIFO: when the orchestrator
14
+ * dispatches N task tool_use blocks in a single assistant turn (the parallel
15
+ * fan-out the multi-lens prompt requires), the i-th new sessionID is assumed
16
+ * to belong to the i-th task dispatch. This is correct as long as parallel
17
+ * dispatches are emitted in source-order and the runtimes respect that order
18
+ * when assigning child sessions; we do not depend on it for correctness of
19
+ * the read-only contract — only for log readability.
20
+ */
21
+ export interface TaskDispatchInput {
22
+ description?: string | undefined;
23
+ subagent_type?: string | undefined;
24
+ prompt?: string | undefined;
25
+ }
26
+ export declare const ORCHESTRATOR_LABEL = "orchestrator";
27
+ /**
28
+ * Extract a human-readable label from a Task tool's input. Tries (in order):
29
+ * 1. explicit `lens: <name>` marker on a line in the prompt — preferred,
30
+ * lets the orchestrator name the lens deterministically
31
+ * 2. the Task tool's `description` field — short, written by orchestrator
32
+ * per call, usually enough
33
+ * 3. the `subagent_type` (e.g. `reviewfrog`) — falls back to the named
34
+ * subagent identity when description is missing
35
+ * 4. generic "subagent" — last resort
36
+ */
37
+ export declare function deriveLabelFromTaskInput(input: TaskDispatchInput): string;
38
+ /**
39
+ * Stateful tracker mapping sessionIDs to human labels.
40
+ *
41
+ * Lifecycle:
42
+ * - First call to `labelFor()` returns ORCHESTRATOR_LABEL and binds that
43
+ * sessionID to it. Every subsequent event from that session gets the
44
+ * same label.
45
+ * - When the orchestrator emits a Task tool_use, the harness calls
46
+ * `recordTaskDispatch()` to push the dispatch's derived label onto a
47
+ * pending FIFO queue.
48
+ * - The next previously-unseen sessionID consumes the head of the queue.
49
+ * - If `labelFor()` is called for a new session with an empty queue
50
+ * (e.g. a subagent emitted events before the parent's tool_use was
51
+ * parsed, or the runtime spawned a session we didn't expect), the
52
+ * labeler falls back to `subagent#N` so log lines remain attributable.
53
+ */
54
+ export declare class SessionLabeler {
55
+ private readonly labels;
56
+ private readonly pendingLabels;
57
+ private fallbackCounter;
58
+ recordTaskDispatch(input: TaskDispatchInput): string;
59
+ /**
60
+ * Return a label for the given sessionID. Binds on first call.
61
+ * Pass undefined/empty for events that lack a session id — the caller
62
+ * gets ORCHESTRATOR_LABEL so the line is still attributable.
63
+ */
64
+ labelFor(sessionID: string | undefined | null): string;
65
+ /** number of distinct sessions seen so far (for diagnostics) */
66
+ size(): number;
67
+ /** all (sessionID, label) pairs, oldest first */
68
+ entries(): Array<[string, string]>;
69
+ /** how many pending labels are queued waiting to bind to a new session */
70
+ pendingDispatchCount(): number;
71
+ }
72
+ /**
73
+ * Format a log message with a session label prefix in magenta. Mirrors the
74
+ * style of utils/log.ts:prefixLines() so per-session prefixes look the same
75
+ * as the dormant withLogPrefix-based ones.
76
+ */
77
+ export declare function formatWithLabel(label: string, message: string): string;
@@ -3,9 +3,22 @@ import type { ResolvedInstructions } from "../utils/instructions.ts";
3
3
  import type { ResolvedPayload } from "../utils/payload.ts";
4
4
  import type { TodoTracker } from "../utils/todoTracking.ts";
5
5
  export declare const MAX_STDERR_LINES = 20;
6
- export declare const MAX_COMMIT_RETRIES = 3;
6
+ /**
7
+ * how many times the post-run loop may resume the agent to fix a dirty tree
8
+ * or a failing stop hook before giving up.
9
+ */
10
+ export declare const MAX_POST_RUN_RETRIES = 3;
7
11
  export declare function getGitStatus(): string;
8
- export declare function buildCommitPrompt(_agentId: AgentId, status: string): string;
12
+ export declare function buildCommitPrompt(status: string): string;
13
+ export interface StopHookFailure {
14
+ exitCode: number;
15
+ output: string;
16
+ }
17
+ export interface PostRunIssues {
18
+ stopHook?: StopHookFailure;
19
+ dirtyTree?: string;
20
+ }
21
+ export declare function hasPostRunIssues(issues: PostRunIssues): boolean;
9
22
  /**
10
23
  * token/cost usage data from a single agent run.
11
24
  *
@@ -52,6 +65,12 @@ export interface AgentRunContext {
52
65
  tmpdir: string;
53
66
  instructions: ResolvedInstructions;
54
67
  todoTracker?: TodoTracker | undefined;
68
+ /**
69
+ * user-configured stop hook script. runs after the agent finishes each
70
+ * attempt; non-zero exit resumes the agent with the hook output as
71
+ * guidance. null when the repo has no stop hook configured.
72
+ */
73
+ stopScript?: string | null | undefined;
55
74
  /**
56
75
  * called synchronously when the agent subprocess is killed for inner
57
76
  * activity timeout. lets main.ts tear down shared resources (MCP HTTP
@@ -72,7 +91,7 @@ export declare function formatCostUsd(costUsd: number): string;
72
91
  * merge two AgentUsage snapshots into one running total.
73
92
  *
74
93
  * both agent harnesses invoke their runner multiple times per `run()` when the
75
- * post-run dirty-tree loop kicks in (MAX_COMMIT_RETRIES). each invocation
94
+ * post-run retry loop kicks in (MAX_POST_RUN_RETRIES). each invocation
76
95
  * produces its own AgentUsage; we sum them so downstream callers (usage
77
96
  * summary, WorkflowRun persistence) see the whole session — not just the
78
97
  * final retry's slice.