npm - @booplex/bpx-consult - Versions diffs - 0.1.0 - Mend

@booplex/bpx-consult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +16 -0
package/LICENSE +21 -0
package/README.md +175 -0
package/index.ts +112 -0
package/package.json +54 -0
package/prompts/advisor-system.txt +28 -0
package/src/advisor.ts +137 -0
package/src/cli-backend.ts +256 -0
package/src/config.ts +422 -0
package/src/consensus.ts +173 -0
package/src/context-engine.ts +395 -0
package/src/council.ts +429 -0
package/src/debate.ts +292 -0
package/src/messages.ts +49 -0
package/src/personas.ts +163 -0
package/src/solo.ts +205 -0
package/src/timeout.ts +87 -0
package/src/triggers.ts +190 -0

package/src/solo.ts ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * solo — the default consult mode.
+ *
+ * One advisor model, one response. The rpiv-advisor experience, but routed
+ * through the context engine so it never overflows the advisor's window.
+ *
+ * Flow:
+ *   config → resolve solo model → build compacted session context → re-fit to
+ *   the advisor's window (context-engine) → callAdvisor → return as tool result
+ */
+import type { Message } from "@earendil-works/pi-ai";
+import {
+	type AgentToolResult,
+	type AgentToolUpdateCallback,
+	type ExtensionContext,
+	buildSessionContext,
+	convertToLlm,
+} from "@earendil-works/pi-coding-agent";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+import { callAdvisor, resolveAdvisor } from "./advisor.js";
+import { buildConsultContext, type ContextBudget } from "./context-engine.js";
+import type { BpxConsultConfig } from "./config.js";
+import { resolveBackend } from "./config.js";
+import { callCliAdvisor } from "./cli-backend.js";
+import {
+	ERR_ABORTED_DETAIL,
+	ERR_CALL_ABORTED,
+	ERR_EMPTY_RESPONSE,
+	ERR_EMPTY_RESPONSE_DETAIL,
+	ERR_NO_API_KEY,
+	ERR_NO_API_KEY_DETAIL,
+	ERR_NO_MODEL,
+	ERR_NO_MODEL_DETAIL,
+	errCallFailed,
+	errCallThrew,
+	errMisconfigured,
+	msgConsulting,
+} from "./messages.js";
+// Load the system prompt once, with a fallback so a missing/unreadable file
+// never bricks the extension at import time. Bundled at prompts/advisor-system.txt.
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const ADVISOR_SYSTEM_PROMPT = (() => {
+	const fallback =
+		"You are an advisor model consulted mid-task by a coding executor. Return a PLAN, a CORRECTION, or a STOP signal. Be concrete, cite specifics, never call tools, never manufacture agreement.";
+	try {
+		return readFileSync(join(__dirname, "..", "prompts", "advisor-system.txt"), "utf-8").trim() || fallback;
+	} catch {
+		return fallback;
+	}
+})();
+export interface SoloDetails {
+	advisorModel: string;
+	thinkingLevel?: string;
+	mode: "solo";
+	usage?: { input: number; output: number; total: number };
+	/** Estimated input tokens after the context engine re-fit. */
+	fittedTokens?: number;
+	/** Messages dropped by the sliding window, if any. */
+	omitted?: number;
+	stopReason?: string;
+	errorMessage?: string;
+}
+function ok(text: string, details: SoloDetails): AgentToolResult<SoloDetails> {
+	return { content: [{ type: "text", text }], details };
+}
+function err(text: string, details: SoloDetails): AgentToolResult<SoloDetails> {
+	return { content: [{ type: "text", text }], details };
+}
+export interface ExecuteSoloInput {
+	ctx: ExtensionContext;
+	config: BpxConsultConfig;
+	signal: AbortSignal | undefined;
+	onUpdate: AgentToolUpdateCallback<SoloDetails> | undefined;
+	/** Optional explicit question to inject at the tail of the context. */
+	question?: string;
+}
+export async function executeSolo(input: ExecuteSoloInput): Promise<AgentToolResult<SoloDetails>> {
+	const { ctx, config, signal, onUpdate, question } = input;
+	const soloConfig = config.modes?.solo;
+	const advisor = resolveAdvisor(ctx, soloConfig?.model);
+	const thinkingLevel = soloConfig?.thinkingLevel;
+	if (!advisor) {
+		return err(ERR_NO_MODEL, { advisorModel: "(none)", mode: "solo", thinkingLevel, errorMessage: ERR_NO_MODEL_DETAIL });
+	}
+	onUpdate?.({
+		content: [{ type: "text", text: msgConsulting(advisor.label) }],
+		details: { advisorModel: advisor.label, thinkingLevel, mode: "solo" },
+	});
+	// 1. Pull Pi's already-compacted session context for the active branch.
+	const { messages: sessionMessages } = buildSessionContext(
+		ctx.sessionManager.getEntries(),
+		ctx.sessionManager.getLeafId(),
+	);
+	const branchMessages: Message[] = convertToLlm(sessionMessages);
+	// 2. Re-fit to THIS advisor's window. This is the §P fix.
+	const contextBudget = config.contextBudget as ContextBudget;
+	// terse: cap the response hard so gut-check gets a short read, not an essay.
+	// Honored when gut-check merges its config into solo (modes.gutCheck.terse).
+	const maxTokens = soloConfig?.terse ? Math.min(1024, contextBudget.responseReserveTokens) : contextBudget.responseReserveTokens;
+	const advisorWindow = advisor.model.contextWindow;
+	const directive = question?.trim()
+		? `Specific question from the executor: ${question.trim()}`
+		: undefined;
+	const fit = buildConsultContext({
+		sessionMessages: branchMessages,
+		advisorContextWindow: advisorWindow,
+		budget: contextBudget,
+		directive,
+	});
+	try {
+		// Backend dispatch: if the solo model has a CLI backend configured, route
+		// to the async subprocess path (spawn the CLI, pipe the fitted context to
+		// stdin, parse the reply). Otherwise inline completeSimple. The fitted
+		// context is reused either way — §C ran once, both backends get the same
+		// window-safe payload. CLI uses spawn (non-blocking) so a CLI-backed council
+		// member can run parallel to an inline one (the whole point of async).
+		const backend = resolveBackend(config, soloConfig?.model);
+		let text: string;
+		let usage: { input: number; output: number; total: number } | undefined;
+		let stopReason: string;
+		let errorMessage: string | undefined;
+		if (backend?.type === "cli") {
+			const cliResult = await callCliAdvisor({
+				systemPrompt: ADVISOR_SYSTEM_PROMPT,
+				messages: fit.messages,
+				backend: { type: "cli", command: backend.command, args: backend.args, timeoutMs: backend.timeoutMs },
+				signal,
+				cwd: ctx.cwd,
+			});
+			text = cliResult.text;
+			usage = undefined; // CLIs don't report token usage
+			stopReason = cliResult.text ? "stop" : cliResult.timedOut ? "aborted" : "error";
+			errorMessage = cliResult.errorMessage;
+		} else {
+			const result = await callAdvisor({
+				ctx,
+				advisor,
+				systemPrompt: ADVISOR_SYSTEM_PROMPT,
+				messages: fit.messages,
+				thinkingLevel,
+				signal,
+				sessionId: ctx.sessionManager.getSessionId(),
+				maxTokens,
+			});
+			text = result.text;
+			usage = result.usage;
+			stopReason = result.stopReason;
+			errorMessage = result.errorMessage;
+		}
+		const baseDetails: SoloDetails = {
+			advisorModel: advisor.label,
+			thinkingLevel,
+			mode: "solo",
+			usage,
+			fittedTokens: fit.estimatedTokens,
+			omitted: fit.omittedCount,
+			stopReason,
+			errorMessage,
+		};
+		if (stopReason === "aborted") {
+			return err(ERR_CALL_ABORTED, { ...baseDetails, errorMessage: errorMessage ?? ERR_ABORTED_DETAIL });
+		}
+		if (stopReason === "error") {
+			return err(errCallFailed(errorMessage), baseDetails);
+		}
+		if (!text) {
+			return err(ERR_EMPTY_RESPONSE, { ...baseDetails, errorMessage: ERR_EMPTY_RESPONSE_DETAIL });
+		}
+		return ok(text, baseDetails);
+	} catch (e) {
+		const message = e instanceof Error ? e.message : String(e);
+		return err(errCallThrew(message), {
+			advisorModel: advisor.label,
+			thinkingLevel,
+			mode: "solo",
+			fittedTokens: fit.estimatedTokens,
+			omitted: fit.omittedCount,
+			errorMessage: message,
+		});
+	}
+}
+// Re-export the auth-error helpers so index.ts can use them without importing
+// from two places. (kept for the registration layer's error paths.)
+export { ERR_NO_API_KEY, ERR_NO_API_KEY_DETAIL, errMisconfigured };

package/src/timeout.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * timeout — shared wall-clock budget for consult paths.
+ *
+ * Two consult paths need a wall-clock cap that the call itself can't provide:
+ *   - debate: sequential rounds, total latency = sum-of-rounds, can hang
+ *     mid-round with no human to interrupt (consult() is executor-callable →
+ *     autonomous). The last unprotected path after council (per-member abort)
+ *     and CLI (resolveShellTimeoutMs).
+ *   - cli: subprocess timeout via pi.exec's own `timeout` option, but we wrap
+ *     it here so the budget lives in one place.
+ *
+ * The helper races the operation against a timer that fires an AbortController
+ * — the same controller whose signal propagates into callAdvisor / pi.exec, so
+ * a timeout aborts the in-flight work cleanly rather than leaving it dangling.
+ */
+/**
+ * Run `fn` with a wall-clock timeout. Returns the fn's result, or an error
+ * result if the timeout fired first.
+ *
+ * `parentSignal` (e.g. ctx.signal — user abort) is linked to the controller so
+ * a user-initiated abort still propagates; the timeout is an independent second
+ * way to fire the same controller.
+ *
+ * `timeoutMs <= 0` disables the timeout (fn runs with just the parent signal).
+ */
+export async function withTimeout<T>(
+	timeoutMs: number,
+	parentSignal: AbortSignal | undefined,
+	fn: (signal: AbortSignal) => Promise<T>,
+): Promise<{ ok: true; value: T; timedOut: false } | { ok: false; timedOut: true; signal: AbortSignal } | { ok: false; timedOut: false; error: unknown }> {
+	// No timeout: just link the parent and run.
+	if (!timeoutMs || timeoutMs <= 0) {
+		const ctrl = linkController(parentSignal);
+		try {
+			return { ok: true, timedOut: false, value: await fn(ctrl.signal) };
+		} catch (error) {
+			return { ok: false, timedOut: false, error };
+		}
+	}
+	const ctrl = linkController(parentSignal);
+	const timer = setTimeout(() => ctrl.abort(new TimeoutError(timeoutMs)), timeoutMs);
+	try {
+		const value = await fn(ctrl.signal);
+		return { ok: true, timedOut: false, value };
+	} catch (error) {
+		// Distinguish timeout-abort from any other error. The controller's abort
+		// reason carries our TimeoutError; anything else is a real failure.
+		if (ctrl.signal.aborted && ctrl.signal.reason instanceof TimeoutError) {
+			return { ok: false, timedOut: true, signal: ctrl.signal };
+		}
+		return { ok: false, timedOut: false, error };
+	} finally {
+		clearTimeout(timer);
+	}
+}
+/** Custom error so we can identify our own timeout vs a provider/network error. */
+export class TimeoutError extends Error {
+	constructor(public readonly ms: number) {
+		super(`timed out after ${ms}ms`);
+		this.name = "TimeoutError";
+	}
+}
+/**
+ * Build an AbortController linked to a parent signal: if the parent aborts,
+ * this one aborts too (with the same reason). If the parent is already aborted,
+ * returns an already-aborted controller. Used by withTimeout and by council's
+ * per-member abort isolation (linkSignal re-exported from here for continuity).
+ */
+function linkController(parent: AbortSignal | undefined): AbortController {
+	const ctrl = new AbortController();
+	if (!parent) return ctrl;
+	if (parent.aborted) {
+		ctrl.abort(parent.reason);
+		return ctrl;
+	}
+	parent.addEventListener("abort", () => ctrl.abort(parent.reason), { once: true });
+	return ctrl;
+}
+/** Re-export for council/debate so the abort-linking pattern has one home. */
+export function linkSignal(parent: AbortSignal | undefined): AbortSignal {
+	return linkController(parent).signal;
+}

package/src/triggers.ts ADDED Viewed

@@ -0,0 +1,190 @@
+/**
+ * triggers — automatic consult invocation.
+ *
+ * Two auto-triggers (SPEC §T):
+ *   - onDone:    when the agent finishes a turn, review the work (off by default).
+ *   - whenStuck: after N consecutive tool errors OR N identical tool calls
+ *                (loop detection), review to get unstuck. Default N = 3.
+ *
+ * Plus manual (always available via the consult tool / /consult).
+ *
+ * Two traps both passed-and-bypassed (the things the reviewer is watching for):
+ *
+ *   1. DEADLOCK — never call session-control methods from an event handler.
+ *      pi docs say they deadlock the event loop. The triggered consult runs the
+ *      advisor call (safe — it's just completeSimple), then routes the result
+ *      back via pi.sendUserMessage(text, { deliverAs: "steer" | "followUp" }),
+ *      which is the documented non-deadlocking injection path.
+ *
+ *   2. SELF-TRIGGER — consult() is itself a tool, so it fires its own
+ *      tool_result event. Without a guard, a triggered consult re-trips the
+ *      loop detector (its own result looks like a repeated call). Two defenses:
+ *        a. Skip the fingerprint/error tracking when toolName === "consult".
+ *        b. autoRunning re-entrancy guard — while a triggered consult is in
+ *           flight, every handler bails, so the consult's own events can't
+ *           re-trigger anything.
+ *      Counters also reset on before_agent_start (pi's per-prompt reset point).
+ *
+ * Trust gating: triggers never fire in untrusted projects (an untrusted repo
+ * must not be able to silently invoke the advisor / spend tokens).
+ */
+import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
+import type { BpxConsultConfig } from "./config.js";
+import { loadConfig } from "./config.js";
+import { executeSolo } from "./solo.js";
+import { CONSULT_TOOL_NAME } from "./messages.js";
+interface TriggerState {
+	stuckErrors: number;
+	lastFingerprint: string;
+	loopCount: number;
+	autoReviewedThisRound: boolean;
+	autoRunning: boolean;
+}
+function freshState(): TriggerState {
+	return { stuckErrors: 0, lastFingerprint: "", loopCount: 0, autoReviewedThisRound: false, autoRunning: false };
+}
+export function registerTriggers(pi: ExtensionAPI): void {
+	// One state slot per registered extension instance. pi loads the extension
+	// once per session, so this is effectively per-session. (If pi ever runs
+	// extensions across multiple concurrent sessions in one process, this would
+	// need to key by session id — not the case today.)
+	const state = freshState();
+	// Reset point: clear all counters at the start of each user prompt so a
+	// previous turn's stuck-state can't bleed into the next one.
+	pi.on("before_agent_start", () => {
+		state.stuckErrors = 0;
+		state.loopCount = 0;
+		state.lastFingerprint = "";
+		// NOTE: autoReviewedThisRound is reset here too, but autoRunning must NOT
+		// be — if a triggered consult is still in flight when the next prompt
+		// starts (rare but possible), clearing autoRunning would allow re-entry.
+		if (!state.autoRunning) state.autoReviewedThisRound = false;
+	});
+	// whenStuck: fires on the tool_result event (after we know isError + input).
+	pi.on("tool_result", async (event, ctx) => {
+		// Self-trigger guard (a): our own consult tool's results don't count.
+		if (event.toolName === CONSULT_TOOL_NAME) return;
+		// Re-entrancy + config gates. Bail fast on every condition that disables
+		// the trigger — never let an auto-trigger break the turn.
+		if (state.autoRunning) return;
+		const config = loadConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
+		if (!config.enabled) return;
+		if (!ctx.isProjectTrusted()) return; // trust gate
+		const whenStuck = config.triggers?.whenStuck ?? 3;
+		if (whenStuck <= 0) return;
+		// Loop-detect fingerprint: toolName + full input, UN-TRUNCATED.
+		// (pi-extensions CHANGELOG: an earlier 120-char cap broke detection by
+		// collapsing distinct calls with shared prefixes into false matches.)
+		const fingerprint = `${event.toolName}:${JSON.stringify(event.input ?? "")}`;
+		if (fingerprint === state.lastFingerprint) {
+			state.loopCount++;
+		} else {
+			state.lastFingerprint = fingerprint;
+			state.loopCount = 1;
+		}
+		// Error tracking.
+		if (event.isError) state.stuckErrors++;
+		else state.stuckErrors = 0;
+		// Error trigger: N consecutive errors.
+		if (state.stuckErrors >= whenStuck) {
+			state.stuckErrors = 0;
+			state.loopCount = 0;
+			state.lastFingerprint = "";
+			await runTriggeredConsult(
+				pi, ctx, config, state,
+				(text) =>
+					`The agent has hit ${whenStuck} consecutive tool errors. An advisor model was consulted:\n\n${text}\n\nUse this to get unstuck.`,
+				"steer",
+			);
+			return;
+		}
+		// Loop trigger: same tool + identical arguments repeated N times.
+		if (state.loopCount >= whenStuck) {
+			state.loopCount = 0;
+			state.lastFingerprint = "";
+			await runTriggeredConsult(
+				pi, ctx, config, state,
+				(text) =>
+					`The agent appears to be stuck in a loop (repeated tool "${event.toolName}" with identical arguments). An advisor model was consulted:\n\n${text}\n\nUse this to get unstuck.`,
+				"steer",
+			);
+		}
+	});
+	// onDone: fires when the agent finishes the turn.
+	pi.on("agent_end", async (_event, ctx) => {
+		if (state.autoRunning) return;
+		const config = loadConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
+		if (!config.enabled) return;
+		if (!ctx.isProjectTrusted()) return;
+		const onDone = config.triggers?.onDone ?? false;
+		if (!onDone) return;
+		if (state.autoReviewedThisRound) return; // at most one auto-review per prompt
+		state.autoReviewedThisRound = true;
+		await runTriggeredConsult(
+			pi, ctx, config, state,
+			(text) =>
+				`Before finishing, an advisor model assessed your work:\n\n${text}\n\n` +
+				`If it raises valid issues, address them; otherwise briefly confirm and stop.`,
+			"followUp",
+		);
+	});
+}
+/**
+ * Run one triggered consult and inject the result via sendUserMessage.
+ *
+ * The consult itself (executeSolo/executeCouncil) is safe to call from a
+ * handler — it does its own completeSimple and returns. What's NOT safe is
+ * calling session-control methods (those deadlock); we avoid that by using
+ * pi.sendUserMessage({ deliverAs }) which is the documented injection path.
+ *
+ * Re-entrancy: autoRunning is set for the duration so the consult's own
+ * tool_result event can't re-trip the detectors. try/finally so a thrown
+ * consult error can't leave autoRunning stuck true.
+ */
+async function runTriggeredConsult(
+	pi: ExtensionAPI,
+	ctx: ExtensionContext,
+	config: BpxConsultConfig,
+	state: TriggerState,
+	buildMessage: (text: string) => string,
+	deliverAs: "steer" | "followUp",
+): Promise<void> {
+	state.autoRunning = true;
+	try {
+		// Auto-triggers ALWAYS run solo, regardless of defaultMode (§T). Rationale:
+		// an auto-fire is not a deliberate consultation — it's a safety net firing
+		// mid-turn. A council would burn 3+ model calls + synthesis per trigger,
+		// which is a surprise-quota footgun on a loop or repeated errors. Council
+		// is reserved for explicit invocation (mode:council tool arg, /consult).
+		const result = await executeSolo({ ctx, config, signal: ctx.signal, onUpdate: undefined });
+		// Extract text from the tool result content blocks.
+		const text = result.content
+			.filter((c): c is { type: "text"; text: string } => c.type === "text")
+			.map((c) => c.text)
+			.join("\n")
+			.trim();
+		if (text) {
+			await pi.sendUserMessage(buildMessage(text), { deliverAs });
+		}
+	} catch {
+		// never let an auto-trigger break the turn
+	} finally {
+		state.autoRunning = false;
+	}
+}