npm - pullfrog - Versions diffs - 0.1.1 → 0.1.3 - Mend

pullfrog 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/agents/claude.d.ts +19 -0
package/dist/agents/opencodePlugin.d.ts +60 -0
package/dist/agents/postRun.d.ts +35 -30
package/dist/agents/shared.d.ts +26 -13
package/dist/cli.mjs +678 -241
package/dist/index.js +675 -238
package/dist/internal.js +89 -67
package/dist/mcp/comment.d.ts +35 -0
package/dist/mcp/review.d.ts +2 -4
package/dist/mcp/server.d.ts +1 -68
package/dist/modes.d.ts +10 -0
package/dist/toolState.d.ts +109 -0
package/dist/utils/apiUrl.d.ts +8 -0
package/dist/utils/browser.d.ts +1 -1
package/dist/utils/errorReport.d.ts +1 -1
package/dist/utils/instructions.d.ts +4 -1
package/dist/utils/learnings.d.ts +31 -0
package/dist/utils/run.d.ts +1 -1
package/dist/utils/setup.d.ts +1 -1
package/dist/utils/subprocess.d.ts +0 -1
package/package.json +1 -1
package/dist/mcp/learnings.d.ts +0 -6

package/dist/agents/claude.d.ts CHANGED Viewed

@@ -1 +1,20 @@
+import type { TodoTracker } from "../utils/todoTracking.ts";
+import { type AgentResult } from "./shared.ts";
+type RunParams = {
+    label: string;
+    args: string[];
+    cwd: string;
+    env: Record<string, string | undefined>;
+    todoTracker?: TodoTracker | undefined;
+    onActivityTimeout?: (() => void) | undefined;
+    onToolUse?: ((event: {
+        toolName: string;
+        input: unknown;
+    }) => void) | undefined;
+};
+type ClaudeRunResult = AgentResult & {
+    sessionId?: string | undefined;
+};
+export declare function runClaude(params: RunParams): Promise<ClaudeRunResult>;
 export declare const claude: import("./shared.ts").Agent;
+export {};

package/dist/agents/opencodePlugin.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Source for the opencode plugin we drop into the per-run tmpdir at
+ * `<XDG_CONFIG_HOME>/opencode/plugin/pullfrog-events.ts`. The harness already
+ * redirects `XDG_CONFIG_HOME` to `ctx.tmpdir/.config` (see `opencode.ts`
+ * `homeEnv`), so opencode's auto-discovery scans the tmpdir, never the user's
+ * working tree. opencode's `Global.Path.config` resolves to
+ * `path.join(xdgConfig, "opencode")` and the config layer auto-discovers
+ * plugins from every directory in its scan list — including
+ * `Global.Path.config` — by globbing `{plugin,plugins}/*.{ts,js}` via
+ * `ConfigPlugin.load(dir)`.
+ *
+ * We MUST NOT write into the user's repo working tree. The repo is a checkout
+ * the agent operates on; only the agent's own tools (gated by
+ * `OPENCODE_PERMISSION`) may modify it. The whole reason we redirect HOME and
+ * XDG_CONFIG_HOME is so harness-side files (config, plugins, scratch state)
+ * land in the tmpdir.
+ *
+ * Why this plugin exists: opencode's `task` tool runs subagents in-process and
+ * the CLI's `cli/cmd/run.ts` event loop filters `part.sessionID !== sessionID`,
+ * so subagent-internal `message.part.updated` events are silently discarded
+ * before reaching our parent NDJSON stream. plugins, by contrast, receive
+ * EVERY bus event via `bus.subscribeAll()` regardless of session.
+ *
+ * The plugin re-emits every relevant bus event onto opencode's stdout as a
+ * single JSON line wrapped in a sentinel envelope. our `runOpenCode` parser
+ * recognises the envelope, unpacks it, and routes the inner part through the
+ * existing handlers with a per-session label from `SessionLabeler` so each
+ * subagent's tool calls / text appear inline alongside the orchestrator's.
+ *
+ * Dumb plugin / smart parent split: the plugin emits every part for every
+ * session. the parent dedupes against the orchestrator's own session id (which
+ * it already knows from the `init` event). this keeps the plugin trivial and
+ * keeps the per-session attribution logic on the parent side where the
+ * SessionLabeler already lives.
+ *
+ * Event-name prefixing: the wrapped event-type sentinel is
+ * `pullfrog_bus_event` — picked to be unmistakably ours so a future opencode
+ * release that introduces a coincidentally-named event type won't collide.
+ */
+export declare const PULLFROG_BUS_EVENT_TYPE: "pullfrog_bus_event";
+export declare const PULLFROG_OPENCODE_PLUGIN_FILENAME: "pullfrog-events.ts";
+/**
+ * Source written verbatim to `<XDG_CONFIG_HOME>/opencode/plugin/pullfrog-events.ts`.
+ *
+ * - Structural typing only (no runtime import of `@opencode-ai/plugin`):
+ *   opencode installs that dep into the directory containing the plugin
+ *   alongside discovery, but a) the dep isn't required for the structural
+ *   shape we use, and b) keeping zero imports avoids any module-resolution
+ *   coupling to opencode's plugin-loader internals across versions.
+ * - default export is the plugin factory (opencode's plugin loader accepts
+ *   default exports as the server entrypoint).
+ * - we only forward `message.part.updated`. that's where the user-visible
+ *   subagent activity (tool calls, text, step transitions) lives. add more
+ *   event types here if the parent needs them.
+ * - JSON.stringify+single write keeps the line atomic up to PIPE_BUF (4KB on
+ *   Linux). longer parts may interleave with concurrent stdout writers; the
+ *   parser tolerates non-JSON lines (logs them at debug) so a torn line is a
+ *   missed event, not a crash.
+ */
+export declare const PULLFROG_OPENCODE_PLUGIN_SOURCE: string;

package/dist/agents/postRun.d.ts CHANGED Viewed

@@ -1,5 +1,15 @@
-import { type AgentId } from "../external.ts";
-import { type AgentResult, type AgentUsage, type PostRunIssues, type StopHookFailure } from "./shared.ts";
+import type { ToolState } from "../toolState.ts";
+import { type AgentResult, type AgentRunContext, type AgentUsage, type PostRunIssues, type StopHookFailure } from "./shared.ts";
+/**
+ * derive "agent picked a review mode but never produced visible output" from
+ * the literal facts on `toolState`. returns the selected mode when the gate
+ * should fire, `null` otherwise — pure read, no side effects, safe to invoke
+ * after every agent attempt.
+ *
+ * the gate is anchored to `hadProgressComment` so silent runs (non-issue
+ * events, dispatcher skipped seeding) don't fire a nudge there's no UI for.
+ */
+export declare function getUnsubmittedReview(toolState: ToolState): "Review" | "IncrementalReview" | null;
 /**
  * run the user-configured stop hook.
  *
@@ -16,35 +26,38 @@ import { type AgentResult, type AgentUsage, type PostRunIssues, type StopHookFai
 export declare function executeStopHook(script: string): Promise<StopHookFailure | null>;
 export declare function buildStopHookPrompt(failure: StopHookFailure): string;
 export declare function buildSummaryStalePrompt(filePath: string): string;
+export declare function buildUnsubmittedReviewPrompt(mode: "Review" | "IncrementalReview"): string;
 /**
  * check the post-run gates: did the stop hook pass, is the working tree
  * clean, and (when applicable) did the agent touch the rolling PR summary
- * snapshot? returns everything that still needs nudging so the caller can
- * render a single combined resume prompt.
+ * snapshot or produce review output? returns everything that still needs
+ * nudging so the caller can render a single combined resume prompt.
  *
- * the summary-stale check is skipped when `summaryFilePath` / `summarySeed`
- * are not provided; this is the common case (non-PR runs, runs where the
- * dispatcher didn't request snapshot generation, runs where the seed step
- * failed). loop callers also pass these as undefined after the agent has
- * already been nudged once, to avoid burning the retry budget on a soft
- * non-blocking gate.
+ * reads run state directly off `ctx.toolState` so each invocation sees the
+ * latest mutations from MCP tool calls. `skipSummaryStale` lets the loop
+ * suppress the summary-stale check after the one-shot nudge has been
+ * delivered (re-firing it would burn the retry budget on a soft gate the
+ * agent has already decided not to act on).
  */
-export declare function collectPostRunIssues(params: {
-    stopScript: string | null | undefined;
-    summaryFilePath?: string | undefined;
-    summarySeed?: string | undefined;
+export declare function collectPostRunIssues(ctx: AgentRunContext, options?: {
+    skipSummaryStale?: boolean;
 }): Promise<PostRunIssues>;
 export declare function buildPostRunPrompt(issues: PostRunIssues): string;
 /**
- * prompt for a dedicated post-run reflection turn nudging the agent to call
- * `update_learnings` if it discovered anything worth persisting.
+ * prompt for a dedicated post-run reflection turn nudging the agent to edit
+ * the rolling learnings file if it discovered anything worth persisting.
+ *
+ * this exists because passive "if you learned something, write it down"
+ * instructions baked into mode checklists are frequently ignored — the agent
+ * stays focused on the task and the meta-ask falls through. delivering it
+ * as its own resume turn, with nothing competing for attention, raises the
+ * fire rate substantially.
  *
- * this exists because the learnings step baked into mode checklists is
- * frequently ignored — the agent stays focused on the task and the meta-ask
- * falls through. delivering it as its own resume turn, with nothing competing
- * for attention, raises the fire rate substantially.
+ * the file is the single source of truth — there is no separate MCP tool
+ * call. the server reads the file at end-of-run and persists any edits to
+ * `Repo.learnings`.
  */
-export declare function buildLearningsReflectionPrompt(agentId: AgentId): string;
+export declare function buildLearningsReflectionPrompt(filePath: string): string;
 /**
  * shared post-run retry loop used by every agent harness.
  *
@@ -65,17 +78,9 @@ export declare function buildLearningsReflectionPrompt(agentId: AgentId): string
  * behavior: they're logged but don't fail the run.
  */
 export declare function runPostRunRetryLoop<R extends AgentResult>(params: {
+    ctx: AgentRunContext;
     initialResult: R;
     initialUsage: AgentUsage | undefined;
-    stopScript: string | null | undefined;
-    /** absolute path to the seeded PR summary file. when set together with
-     * `summarySeed`, the loop checks after each agent attempt whether the
-     * file has been edited; if not, it nudges the agent ONCE via a resume
-     * turn (subsequent iterations skip the check so we don't keep burning
-     * retries on a soft gate when the agent has decided no edit is warranted). */
-    summaryFilePath?: string | undefined;
-    /** exact bytes of the seeded summary file used for the unchanged-check. */
-    summarySeed?: string | undefined;
     resume: (context: {
         prompt: string;
         previousResult: R;

package/dist/agents/shared.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { AgentId } from "../external.ts";
+import type { ToolState } from "../toolState.ts";
 import type { ResolvedInstructions } from "../utils/instructions.ts";
 import type { ResolvedPayload } from "../utils/payload.ts";
 import type { TodoTracker } from "../utils/todoTracking.ts";
@@ -25,6 +26,17 @@ export interface PostRunIssues {
      * seed, i.e. the agent never touched it. soft gate — nudges once via a
      * resume turn but never fails the run, parallel to dirtyTree semantics. */
     summaryStale?: SummaryStale;
+    /**
+     * populated when the agent selected a review mode but the post-run check
+     * over toolState shows neither a `create_pull_request_review` submission
+     * nor a final `report_progress` write happened. derived inline from
+     * `toolState.selectedMode` + `toolState.review` + `toolState.finalSummaryWritten`
+     * via {@link getUnsubmittedReview} — no parallel toolState flag is stored.
+     * carries the mode name so the resume prompt can reference it. handled like
+     * `stopHook`: nudge via resume, hard-fail if still unsatisfied after
+     * `MAX_POST_RUN_RETRIES`.
+     */
+    unsubmittedReview?: "Review" | "IncrementalReview";
 }
 export declare function hasPostRunIssues(issues: PostRunIssues): boolean;
 /**
@@ -64,7 +76,14 @@ export interface AgentResult {
     usage?: AgentUsage | undefined;
 }
 /**
- * Minimal context passed to agent.run()
+ * Context passed to agent.run() and threaded through the post-run loop.
+ *
+ * design rule: this is the single object that flows through the harness and
+ * downstream utilities by reference. derived predicates (e.g.
+ * `getUnsubmittedReview`), tmpfile paths, and seed bytes live on
+ * `toolState` — read them at the call site, do not duplicate them onto this
+ * interface. utilities that need run state should accept `ctx` whole, not
+ * destructure a narrow subset.
  */
 export interface AgentRunContext {
     payload: ResolvedPayload;
@@ -80,19 +99,13 @@ export interface AgentRunContext {
      */
     stopScript?: string | null | undefined;
     /**
-     * absolute path to the rolling PR summary tmpfile, when one was seeded
-     * for this run (Review / IncrementalReview / pr-summary Task). enables
-     * a post-run sanity nudge that prompts the agent if the file is still
-     * byte-identical to its seed.
-     */
-    summaryFilePath?: string | undefined;
-    /**
-     * exact bytes of the seeded summary file. compared against the current
-     * file content after each agent attempt to detect "agent forgot to edit
-     * the summary" — particularly common with smaller models that lose
-     * track of multi-step instructions.
+     * mutable per-run state shared with the MCP server (by reference). post-run
+     * gates read fresh values from it after each agent attempt — `summaryFilePath`,
+     * `summarySeed`, `selectedMode`, `review`, `finalSummaryWritten`,
+     * `hadProgressComment` are all consulted by `collectPostRunIssues`. see
+     * `action/toolState.ts` for the literal-state design rule.
      */
-    summarySeed?: string | undefined;
+    toolState: ToolState;
     /**
      * called synchronously when the agent subprocess is killed for inner
      * activity timeout. lets main.ts tear down shared resources (MCP HTTP