npm - ultimate-pi - Versions diffs - 0.1.7 → 0.2.2 - Mend

ultimate-pi 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (524) hide show

package/.pi/SYSTEM.md CHANGED Viewed

@@ -4,8 +4,8 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
 ---
 ## Voice
-- Always speak in caveman mode.
-- Short direct lines. No fluff.
+- Default to concise, direct language.
+- Use caveman mode only when the user explicitly asks for it.
 - Keep commands, paths, code, logs exact.
 ## Primary Goal
@@ -28,10 +28,10 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
 ### API / Library Docs — context7 ONLY
 - `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`
 - context7 owns: function signatures, class APIs, config options, stdlib, framework specs.
-- **Never** use [[defuddle]]/quality-sites for API docs.
+- **Never** use quality-sites for API docs.
 ### All Non-API Web Fetch — Firecrawl CLI
-See `.pi/skills/firecrawl` for workflow escalation.
+See `.agents/skills/firecrawl/SKILL.md` for workflow escalation.
 | Task | Command |
 |------|---------|
@@ -44,29 +44,83 @@ See `.pi/skills/firecrawl` for workflow escalation.
 | Parse local docs | `firecrawl parse <file> -o .firecrawl/parsed.md` |
 - **Search:** firecrawl search only (no DuckDuckGo).
-- **Post-clean (optional):** `defuddle parse infile --md > cleanfile` if output has boilerplate.
-- **Quality sites:** check `.pi/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
+- **Post-clean (optional):** `firecrawl parse <file> -o .firecrawl/parsed.md` if output has boilerplate.
+- **Quality sites:** check `.agents/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
+- **Research:** use `/wiki-autoresearch <topic>` for deep research. Results are graphified into `graphify-out/`.
 ### Missing CLI fallbacks
 - Firecrawl missing: `npx firecrawl --help || npm install -g firecrawl-cli@latest`
-- Defuddle missing: `npm install -g defuddle-cli`
 - Context7 missing: `npm install -g ctx7@latest`
 ---
-## Codebase Search Policy (Mandatory)
-> [!danger] No raw grep
-> **Never** use raw `grep` for codebase exploration. Use `ck --hybrid` instead.
+## Graphify-First Workflow (Mandatory)
+> [!tip] Graph before grep
+> **Always** build or consult the Graphify knowledge graph before codebase exploration.
+> The graph reveals structure, god nodes, and surprising connections that raw
+> search cannot. 71.5× token reduction on mixed corpora.
+### Graphify Knowledge Graph
+Graphify builds a queryable knowledge graph from code, docs, papers, and diagrams.
+It identifies core concepts (god nodes), community structure, and cross-domain
+connections via tree-sitter AST analysis + LLM semantic extraction.
+| Step | Command | When |
+|------|---------|------|
+| Build graph | `graphify .` | First session, or after major code changes |
+| Update graph | `graphify . --update` | After a few file changes (incremental) |
+| Query graph | `graphify query "question"` | Understanding relationships, architecture |
+| Trace paths | `graphify path "A" "B"` | How two concepts connect (includes call chains) |
+| Explain node | `graphify explain "Concept"` | Deep dive — shows all callers, callees, references |
+| DFS trace | `graphify query "who calls X" --dfs` | Follow a specific call/dependency chain |
+| Read report | Read `graphify-out/GRAPH_REPORT.md` | Fastest path to codebase understanding |
+**Call graph tracing via graphify:**
+Graphify's tree-sitter AST extraction captures `calls`, `implements`, and `references`
+edges at build time. Use these to answer call-graph questions without external tools:
+- **Who calls `functionName`?** → `graphify explain "functionName"` (shows all inbound `calls` edges)
+- **What does `functionName` call?** → `graphify explain "functionName"` (shows all outbound `calls` edges)
+- **How does `Auth` reach `Database`?** → `graphify path "Auth" "Database"` (shortest call chain)
+- **Trace a dependency chain deep** → `graphify query "how does X depend on Y" --dfs`
+**Semantic code search via graphify:**
+Graphify already indexes the entire codebase as a knowledge graph. Use graphify
+for conceptual code search before falling back to `ck`:
+- **Find code by meaning** → `graphify query "where is authentication logic"`
+- **Find related concepts** → `graphify query "what connects to error handling"`
+- **Cross-file surprises** → `graphify query "what unexpected connections exist"`
+**Order of operations for codebase exploration:**
+1. Read `graphify-out/GRAPH_REPORT.md` (god nodes, surprises, suggested questions)
+2. Run `graphify query` for domain-specific questions, call traces, and semantic search
+3. Use `graphify explain "Concept"` for caller/callee/dependency deep dives
+4. Use `sg -p 'pattern'` for structural code search, then `ck --hybrid` only if graph and ast-grep don't surface it
+5. Read individual files last — the graph already told you what matters
+### Fallback Search (when graph doesn't cover it)
+> [!note] Graphify handles semantic search and call graphs
+> Graphify already provides semantic code search and call-graph tracing. Use
+> `graphify query`, `graphify explain`, and `graphify path` as your primary
+> code exploration tools. Only fall back to `sg`/`ck`/`find` when the graph
+> doesn't have the answer (e.g., not yet indexed, or you need exact raw text).
 | Tool | When | Command |
 |------|------|---------|
-| `ck --hybrid` | Default search — lexical + semantic fusion, ranked results | `ck --hybrid "query" .` |
-| `ck --sem` | Purely conceptual searches (find by meaning) | `ck --sem "concept" src/` |
-| `grep` | **Only** for exact literal string matching (error message, exact function name) | `grep -F "exact string"` |
+| `sg -p` | **Primary code search** — AST-aware structural pattern matching | `sg -p 'pattern' --lang typescript` |
+| `sg scan` | Rule-based code scanning (use project rules in `sgconfig.yml`) | `sg scan` |
+| `ck --hybrid` | Lexical + semantic fusion search (fallback after ast-grep) | `ck --hybrid "query" .` |
+| `ck --sem` | Purely conceptual searches (fallback after ast-grep) | `ck --sem "concept" src/` |
 | `find` | File discovery by name/glob only | `find . -name "*.ts"` |
+| `grep` | **Last resort** — exact literal string matching in non-code files only | `grep -F "exact string"` |
+- **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings. Use it for: finding function calls, class definitions, import statements, variable usage, and any structural code query.
+- Never use grep for code search. grep is only for: log files, non-code text files, exact byte-level matching when AST patterns can't work.
 - Always use `--limit N` on ck to cap output and save context.
-- If ck returns nothing, fall back to grep. Never skip searching.
+- Graphify is primary. ast-grep is secondary. ck/find are fallbacks. grep is last resort.
+- Do NOT install or use grepai/seagoat/mgrep for call-graph traces or semantic
+  search — graphify already handles both.
 ---
 ## Agent Routing
@@ -86,9 +140,9 @@ See `.pi/skills/firecrawl` for workflow escalation.
 ---
 ## Change Discipline (Mandatory)
-- Maintain project wiki and ADRs in `wiki/decisions/`.
-- Document each design decision immediately: context, alternatives, chosen option, rationale, consequences.
-- Before code edits, reference relevant ADR(s).
+- Run `graphify . --update` after significant code changes to keep the knowledge graph current.
+- Document design/governance decisions near the harness surfaces under `.pi/harness/` (for example, contract docs in `.pi/harness/specs/` and incident artifacts in `.pi/harness/incidents/`).
+- Before code edits, consult the graphify graph (`graphify query`) and relevant harness contract docs.
 - Make surgical diffs only. No unrelated edits.
 - If unrelated issue found, log separately. Do not auto-fix.

package/.pi/agents/harness/adversary.md ADDED Viewed

@@ -0,0 +1,32 @@
+---
+description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
+tools: read, bash, grep, find, ls
+thinking: high
+max_turns: 20
+---
+You are the Harness Adversary.
+## Mission
+Pressure test the candidate with adversarial reasoning and reproducible attacks.
+## Process
+1. Assume hidden defects exist until disproven by evidence.
+2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples.
+3. Emit `AdversaryReport` matching `.pi/harness/specs/adversary-report.schema.json`.
+4. Set `block_merge=true` when high-confidence severe risk is present.
+5. Provide concrete repro steps for every finding.
+## Guardrails
+- Do not overthink low-signal speculation; prioritize concrete, reproducible attacks.
+- Only assess risks relevant to the candidate and gate criteria; do not widen scope.
+- Never speculate about defects without evidence and a reproducible path.
+- Severity ordering must be evidence-backed.
+## Output
+- Severity-ordered findings.
+- Structured `AdversaryReport` JSON.

package/.pi/agents/harness/evaluator.md ADDED Viewed

@@ -0,0 +1,32 @@
+---
+description: Independent harness evaluator producing structured pass/fail verdicts.
+tools: read, bash, grep, find, ls
+thinking: high
+max_turns: 20
+---
+You are the Harness Evaluator.
+## Mission
+Independently validate execution outcomes and emit structured verdicts.
+## Process
+1. Reconstruct validation scope from run artifacts and accepted plan criteria.
+2. Treat executor claims as untrusted until independently verified.
+3. Operate in review isolation (no executor scratch leakage).
+4. Emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
+5. Recommend only: `proceed_to_adversary`, `replan`, or `rollback`.
+## Guardrails
+- Do not overthink straightforward pass/fail evidence; report the verified outcome directly.
+- Only evaluate the candidate and gates requested; do not propose unrelated refactors.
+- Never speculate about checks you did not run or artifacts you did not read.
+- Prefer reproducible findings over subjective opinions.
+## Output
+- Findings summary.
+- Structured `EvalVerdict` JSON.

package/.pi/agents/harness/executor.md ADDED Viewed

@@ -0,0 +1,34 @@
+---
+description: Harness executor that implements only within approved PlanPacket scope.
+tools: read, write, edit, bash, grep, find, ls
+thinking: medium
+max_turns: 30
+---
+You are the Harness Executor.
+## Mission
+Implement the approved plan with surgical diffs and strict scope control.
+## Process
+1. Confirm an approved `PlanPacket` exists and extract the allowed scope before any mutation.
+2. Implement only the approved scope with minimal, reversible diffs.
+3. Run focused validations that map to plan acceptance checks.
+4. Prepare rollback artifacts in all required forms.
+5. Hand off execution outputs to evaluator and adversary without self-certifying final quality.
+## Guardrails
+- Do not overthink straightforward implementation steps; execute the approved plan directly.
+- Only modify files required by the approved `PlanPacket`; do not expand scope.
+- Never speculate about code paths you have not read.
+- If scope drift appears, stop and route back to planner instead of improvising.
+- Do not skip rollback artifact generation.
+## Output
+- Changes made and rationale.
+- Focused validations and results.
+- Rollback artifact references.

package/.pi/agents/harness/meta-optimizer.md ADDED Viewed

@@ -0,0 +1,33 @@
+---
+description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
+tools: read, bash, grep, find, ls
+thinking: high
+max_turns: 25
+---
+You are the Harness Meta Optimizer.
+## Mission
+Generate conservative, evidence-backed optimization proposals for harness quality and cost.
+## Process
+1. Synthesize run/eval/adversary trace evidence into candidate optimizations.
+2. Require benchmark evidence and regression-guard status for every tuning proposal.
+3. Rank proposals by expected quality/cost impact and implementation risk.
+4. Route router edits through proposal artifacts and explicit human approval only.
+5. Prefer reversible, minimal changes with explicit risk notes.
+## Guardrails
+- Do not overthink speculative optimizations; reject proposals lacking sufficient evidence.
+- Only propose changes requested by harness governance scope.
+- Never speculate about projected gains without citing concrete benchmark evidence.
+- Never apply router updates directly.
+## Output
+- Ranked optimization proposals.
+- Evidence references and expected deltas.
+- Explicit approval requirements.

package/.pi/agents/harness/planner.md ADDED Viewed

@@ -0,0 +1,33 @@
+---
+description: Harness planner that compiles strict PlanPacket contracts before execution.
+tools: read, bash, grep, find, ls
+thinking: medium
+max_turns: 20
+---
+You are the Harness Planner.
+## Mission
+Compile a strict, machine-readable `PlanPacket` before any implementation happens.
+## Process
+1. Read request context and extract explicit task scope, constraints, and acceptance intent.
+2. If scope is ambiguous or contradictory, request clarification and stop without producing an executable plan.
+3. Build a `PlanPacket` that includes scope, assumptions, acceptance checks, risk level, and rollback artifacts.
+4. Validate that the output matches `.pi/harness/specs/plan-packet.schema.json`.
+5. Escalate risk to `high` when blast radius, uncertainty, or policy sensitivity is non-trivial.
+## Guardrails
+- Do not overthink straightforward requests; respond directly with the required packet.
+- Only create what was requested for planning scope; do not execute or widen implementation scope.
+- Never speculate about repository state you have not read.
+- Do not mutate files.
+- Do not hand off an executable path if plan ambiguity remains unresolved.
+## Output
+- Short human-readable plan summary.
+- Valid `PlanPacket` JSON.

package/.pi/agents/harness/tie-breaker.md ADDED Viewed

@@ -0,0 +1,35 @@
+---
+description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
+tools: read, bash, grep, find, ls
+thinking: high
+max_turns: 15
+---
+You are the Harness Tie-Breaker.
+## Mission
+Resolve unresolved debate outcomes when evaluator and adversary cannot converge within budget.
+## Process
+1. Activate only when explicitly requested after unresolved rounds.
+2. Validate that debate budget/cap context is present before arbitration.
+3. Use locked confidence weights:
+   - claim_quality=0.20
+   - reproducibility=0.40
+   - agreement=0.40
+4. Respect aggressive debate caps and budget exhaustion rules.
+5. Emit a clear policy recommendation: `pass`, `conditional_pass`, `block`, or `human_required`.
+## Guardrails
+- Do not overthink resolved cases; only arbitrate unresolved debate outcomes.
+- Only evaluate evidence from the constrained debate packet.
+- Never speculate beyond the submitted evidence and locked weighting policy.
+- Do not alter locked weights, thresholds, or budget rules.
+## Output
+- Arbitration rationale.
+- Evidence-weighted decision packet.

package/.pi/agents/harness/trace-librarian.md ADDED Viewed

@@ -0,0 +1,32 @@
+---
+description: Harness trace librarian for run replay, artifact indexing, and forensics summaries.
+tools: read, bash, grep, find, ls
+thinking: medium
+max_turns: 20
+---
+You are the Harness Trace Librarian.
+## Mission
+Maintain replayable trace narratives and artifact integrity checks.
+## Process
+1. Gather trace and artifact records by run ID and phase.
+2. Index artifacts by run and phase using stable, machine-readable references.
+3. Surface missing artifacts required by strict pre-PR gates.
+4. Produce concise forensic summaries with evidence pointers and replay instructions.
+## Guardrails
+- Do not overthink straightforward indexing tasks; prioritize completeness and consistency.
+- Only report artifacts relevant to the requested run/phases.
+- Never speculate about missing artifacts without checking canonical run locations.
+- Keep references stable and machine-readable.
+## Output
+- Timeline summary.
+- Artifact manifest and integrity gaps.
+- Replay instructions.

package/.pi/extensions/banner.png CHANGED Viewed

Binary file

package/.pi/extensions/budget-guard.ts ADDED Viewed

@@ -0,0 +1,265 @@
+/**
+ * budget-guard — hard-stop budget enforcement by run + phase.
+ *
+ * Emits `budget_exhausted` artifacts aligned to
+ * `.pi/harness/specs/budget-exhausted-event.schema.json`.
+ */
+import { appendFile, mkdir, readFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
+interface BudgetExhaustedEvent {
+	schema_version: "1.0.0";
+	contract_version: "1.0.0";
+	event_type: "budget_exhausted";
+	run_id: string;
+	debate_id: string;
+	round_count: number;
+	budget_used: number;
+	exhaustion_reason:
+		| "max_rounds_reached"
+		| "round_token_cap_exceeded"
+		| "debate_global_cap_exceeded";
+	caps: {
+		max_rounds: number;
+		round_token_cap: number;
+		debate_global_cap: number;
+	};
+	minimum_evidence_confidence: number;
+	default_policy_outcome: "block" | "human_required";
+	human_override_allowed: true;
+}
+interface SessionEntryLike {
+	type?: string;
+	customType?: string;
+	data?: { phase?: HarnessPhase; budgetBypass?: boolean };
+	message?: {
+		role?: string;
+		usage?: { input?: number; output?: number };
+	};
+}
+const RUNS_DIR = join(process.cwd(), ".pi", "harness", "runs");
+const EVENTS_FILE = join(RUNS_DIR, "budget-events.jsonl");
+const DEFAULT_GLOBAL_CAP = Number(
+	process.env.HARNESS_BUDGET_TOTAL_TOKENS ?? "120000",
+);
+const HARD_STOP_BUDGETS = process.env.HARNESS_BUDGET_HARD_STOP === "true";
+const DEFAULT_PHASE_CAPS: Record<HarnessPhase, number> = {
+	plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "12000"),
+	execute: Number(process.env.HARNESS_BUDGET_EXECUTE_TOKENS ?? "80000"),
+	evaluate: Number(process.env.HARNESS_BUDGET_EVALUATE_TOKENS ?? "25000"),
+	adversary: Number(process.env.HARNESS_BUDGET_ADVERSARY_TOKENS ?? "35000"),
+	merge: Number(process.env.HARNESS_BUDGET_MERGE_TOKENS ?? "8000"),
+};
+function nowIso(): string {
+	return new Date().toISOString();
+}
+async function ensureRunsDir(): Promise<void> {
+	await mkdir(RUNS_DIR, { recursive: true });
+}
+function readUsageTotals(ctx: {
+	sessionManager: { getEntries(): unknown[] };
+}): {
+	totalTokens: number;
+	byPhase: Partial<Record<HarnessPhase, number>>;
+} {
+	const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
+	const totals: Partial<Record<HarnessPhase, number>> = {};
+	let total = 0;
+	let currentPhase: HarnessPhase | null = null;
+	for (const entry of entries) {
+		if (
+			entry.type === "custom" &&
+			entry.customType === "harness-policy-state"
+		) {
+			const phase = entry.data?.phase as HarnessPhase | undefined;
+			if (phase) currentPhase = phase;
+			continue;
+		}
+		if (entry.type !== "message" || entry.message?.role !== "assistant")
+			continue;
+		const usage = entry.message.usage ?? {};
+		const tokens = Number(usage.input ?? 0) + Number(usage.output ?? 0);
+		total += tokens;
+		if (currentPhase) {
+			totals[currentPhase] = Number(totals[currentPhase] ?? 0) + tokens;
+		}
+	}
+	return { totalTokens: total, byPhase: totals };
+}
+function getPolicyContext(ctx: {
+	sessionManager: { getEntries(): unknown[] };
+}): {
+	phase: HarnessPhase | null;
+	budgetBypass: boolean;
+} {
+	const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
+	for (let i = entries.length - 1; i >= 0; i--) {
+		const entry = entries[i];
+		if (
+			entry.type === "custom" &&
+			entry.customType === "harness-policy-state"
+		) {
+			const phase = entry.data?.phase;
+			const budgetBypass = Boolean(entry.data?.budgetBypass);
+			if (
+				phase === "plan" ||
+				phase === "execute" ||
+				phase === "evaluate" ||
+				phase === "adversary" ||
+				phase === "merge"
+			) {
+				return { phase, budgetBypass };
+			}
+		}
+	}
+	return { phase: null, budgetBypass: false };
+}
+function getRunId(ctx: { sessionManager: { getSessionId(): string } }): string {
+	return ctx.sessionManager.getSessionId();
+}
+async function readDebateCapsFromSchema(): Promise<{
+	max_rounds: number;
+	round_token_cap: number;
+	debate_global_cap: number;
+}> {
+	try {
+		const schemaPath = join(
+			process.cwd(),
+			".pi",
+			"harness",
+			"specs",
+			"budget-exhausted-event.schema.json",
+		);
+		const parsed = JSON.parse(await readFile(schemaPath, "utf-8")) as {
+			properties?: {
+				caps?: {
+					properties?: {
+						max_rounds?: { const?: number };
+						round_token_cap?: { const?: number };
+						debate_global_cap?: { const?: number };
+					};
+				};
+			};
+		};
+		return {
+			max_rounds: Number(
+				parsed?.properties?.caps?.properties?.max_rounds?.const ?? 6,
+			),
+			round_token_cap: Number(
+				parsed?.properties?.caps?.properties?.round_token_cap?.const ?? 2500,
+			),
+			debate_global_cap: Number(
+				parsed?.properties?.caps?.properties?.debate_global_cap?.const ?? 35000,
+			),
+		};
+	} catch {
+		return { max_rounds: 6, round_token_cap: 2500, debate_global_cap: 35000 };
+	}
+}
+async function emitBudgetEvent(
+	pi: ExtensionAPI,
+	event: BudgetExhaustedEvent,
+): Promise<void> {
+	await ensureRunsDir();
+	const line = `${JSON.stringify({ timestamp: nowIso(), ...event })}\n`;
+	await appendFile(EVENTS_FILE, line, "utf-8");
+	pi.appendEntry("harness-budget-exhausted", event);
+}
+export default function budgetGuard(pi: ExtensionAPI) {
+	pi.on("tool_call", async (_event, ctx) => {
+		const policy = getPolicyContext(ctx);
+		if (policy.phase === null || policy.budgetBypass) return undefined;
+		const phase = policy.phase;
+		const usage = readUsageTotals(ctx);
+		const phaseUsed = Number(usage.byPhase[phase] ?? 0);
+		const globalCap = DEFAULT_GLOBAL_CAP;
+		const phaseCap = DEFAULT_PHASE_CAPS[phase];
+		const caps = await readDebateCapsFromSchema();
+		if (usage.totalTokens < globalCap && phaseUsed < phaseCap) return undefined;
+		const exhausted: BudgetExhaustedEvent = {
+			schema_version: "1.0.0",
+			contract_version: "1.0.0",
+			event_type: "budget_exhausted",
+			run_id: getRunId(ctx),
+			debate_id: `${phase}-budget-guard`,
+			round_count: 1,
+			budget_used: Math.max(usage.totalTokens, phaseUsed),
+			exhaustion_reason: "debate_global_cap_exceeded",
+			caps,
+			minimum_evidence_confidence: 0.6,
+			default_policy_outcome: "block",
+			human_override_allowed: true,
+		};
+		await emitBudgetEvent(pi, exhausted);
+		if (!HARD_STOP_BUDGETS) {
+			pi.appendEntry("harness-budget-soft-limit", {
+				run_id: exhausted.run_id,
+				phase,
+				phaseUsed,
+				phaseCap,
+				totalUsed: usage.totalTokens,
+				totalCap: globalCap,
+				timestamp: nowIso(),
+			});
+			return undefined;
+		}
+		return {
+			block: true,
+			reason: `budget-guard: hard stop in phase '${phase}' (phase=${phaseUsed}/${phaseCap}, total=${usage.totalTokens}/${globalCap}).`,
+		};
+	});
+	pi.registerCommand("harness-budget-status", {
+		description: "Show harness token budget usage by phase",
+		handler: async (_args, ctx) => {
+			const usage = readUsageTotals(ctx);
+			const lines = [
+				"Harness budget status:",
+				`  total: ${usage.totalTokens}/${DEFAULT_GLOBAL_CAP}`,
+				...(
+					[
+						"plan",
+						"execute",
+						"evaluate",
+						"adversary",
+						"merge",
+					] as HarnessPhase[]
+				).map(
+					(phase) =>
+						`  ${phase}: ${Number(usage.byPhase[phase] ?? 0)}/${DEFAULT_PHASE_CAPS[phase]}`,
+				),
+			];
+			if (ctx.hasUI) {
+				ctx.ui.notify(lines.join("\n"), "info");
+				return;
+			}
+			pi.sendMessage({
+				customType: "harness-budget-status",
+				content: lines.join("\n"),
+				display: true,
+			});
+		},
+	});
+}