npm - pi-chalin - Versions diffs - 0.1.0 - Mend

pi-chalin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +264 -0
package/agents/conflict-resolver.md +28 -0
package/agents/context-builder.md +31 -0
package/agents/delegate.md +28 -0
package/agents/oracle.md +28 -0
package/agents/planner.md +28 -0
package/agents/researcher.md +29 -0
package/agents/reviewer.md +30 -0
package/agents/scout.md +32 -0
package/agents/worker.md +29 -0
package/package.json +91 -0
package/src/agent-overrides.ts +12 -0
package/src/agents.ts +274 -0
package/src/artifacts.ts +326 -0
package/src/autoroute.ts +274 -0
package/src/budget.ts +333 -0
package/src/child-sessions.ts +108 -0
package/src/child-tools.ts +796 -0
package/src/commands.ts +140 -0
package/src/config.ts +189 -0
package/src/discovery.ts +190 -0
package/src/index.ts +40 -0
package/src/interview.ts +202 -0
package/src/kernel.ts +254 -0
package/src/memory.ts +945 -0
package/src/model-resolution.ts +106 -0
package/src/orchestration.ts +99 -0
package/src/paths.ts +50 -0
package/src/route-format.ts +149 -0
package/src/route-guards.ts +92 -0
package/src/route-widget.ts +219 -0
package/src/runner-prompt.ts +346 -0
package/src/runner-state.ts +105 -0
package/src/runner.ts +1185 -0
package/src/runtime-state.ts +175 -0
package/src/schemas.ts +316 -0
package/src/snapshot.ts +282 -0
package/src/sql-js-fts5.d.ts +4 -0
package/src/tools.ts +558 -0
package/src/ui-agents.ts +338 -0
package/src/ui-status.ts +87 -0
package/src/ui.ts +875 -0
package/src/webfetch.ts +294 -0
package/src/worktrees.ts +113 -0

package/src/autoroute.ts ADDED Viewed

@@ -0,0 +1,274 @@
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { AgentCatalog } from "./agents.ts";
+import { loadEffectiveConfig } from "./config.ts";
+import { MemoryStore } from "./memory.ts";
+import { buildCompactChalinCriticalSystemPrompt, buildCompactChalinOrchestratorSystemPrompt, buildCompactChalinResumeSystemPrompt, buildChalinOrchestratorSystemPrompt } from "./orchestration.ts";
+import { isUsableStepHandoff, loadResumableRunState } from "./runner-state.ts";
+import { beginChalinTurn, recordDirectToolCompletion } from "./runtime-state.ts";
+import type { RunState } from "./schemas.ts";
+import { setChalinStatus } from "./ui-status.ts";
+export function registerChalinAutoRouter(pi: ExtensionAPI): void {
+  pi.on("input", async (event) => {
+    if (event.source === "extension") return { action: "continue" };
+    const text = event.text.trim();
+    if (!text || text.startsWith("/") || text.startsWith("!")) return { action: "continue" };
+    // Never consume the user prompt. The primary Pi agent stays in control and
+    // decides whether to call chalin_route as one of its normal tools.
+    return { action: "continue" };
+  });
+  pi.on("before_agent_start", async (event, ctx) => {
+    beginChalinTurn({ prompt: typeof event.prompt === "string" ? event.prompt : undefined });
+    const loaded = loadEffectiveConfig({ cwd: ctx.cwd });
+    if (!loaded.config.enabled) return;
+    const promptText = typeof event.prompt === "string" ? event.prompt : "";
+    const resumableRun = looksLikeContinuationPrompt(promptText) ? loadResumableRunState({ cwd: ctx.cwd }) : undefined;
+    const useCompactResumePrompt = Boolean(resumableRun);
+    const useCompactPrompt = shouldUseCompactDirectOrchestrationPrompt(promptText);
+    const useCompactCriticalPrompt = !useCompactResumePrompt && !useCompactPrompt && shouldUseCompactChalinCriticalPrompt(promptText);
+    const catalog = useCompactResumePrompt || useCompactPrompt || useCompactCriticalPrompt ? undefined : AgentCatalog.load({ cwd: ctx.cwd });
+    const orchestrationPrompt = useCompactResumePrompt
+      ? buildCompactChalinResumeSystemPrompt()
+      : useCompactPrompt
+      ? buildCompactChalinOrchestratorSystemPrompt()
+      : useCompactCriticalPrompt ? buildCompactChalinCriticalSystemPrompt() : buildChalinOrchestratorSystemPrompt(catalog?.list() ?? []);
+    const memoryContext = useCompactResumePrompt ? undefined : await globalMemoryContextForPrompt(ctx.cwd, promptText);
+    return {
+      systemPrompt: `${event.systemPrompt}\n\n${orchestrationPrompt}${memoryContext ? `\n\n${memoryContext}` : ""}`,
+      message: {
+        customType: useCompactResumePrompt ? "pi-chalin-resume-orchestration" : useCompactPrompt ? "pi-chalin-direct-compact-orchestration" : useCompactCriticalPrompt ? "pi-chalin-critical-compact-orchestration" : "pi-chalin-orchestration",
+        content: useCompactResumePrompt && resumableRun ? compactResumeSteeringMessage(resumableRun) : useCompactPrompt ? compactDirectSteeringMessage(ctx.hasUI) : useCompactCriticalPrompt ? compactCriticalSteeringMessage(ctx.hasUI) : [
+          "If the user says continue/resume/continua/continúa/sigue/reanuda/retoma after an interrupted pi-chalin run, call chalin_resume before answering from partial findings.",
+          "pi-chalin preflight: if this is branch/project analysis, architecture/planning, broad/project-wide review, project-wide refactor strategy, complex/risky multi-file implementation, or memory recall, call chalin_route first. Bounded read-only mini-project reviews, bounded scaffolding, named-file bugfixes, named-file refactors, and simple implementation with explicit acceptance criteria should stay direct.",
+          "For explicit small bugfix/test requests with named files, inspect the target files once, edit promptly, and verify. Do not route or dry-run unless the change is broad, destructive, a security-sensitive mutation, or ambiguous.",
+          "Also call chalin_route for risky surgical/long-file edits; use scout → planner → worker → reviewer so the edit stays targeted and verified.",
+          "If the user asks to compare independent approaches/options, choose chalin_route with parallel planners/reviewers and synthesize the recommendation afterward.",
+          "Choose topology/agents yourself. Use one chalin_route call only, then synthesize from its handoff; do not inspect files directly unless a concrete gap remains.",
+          ctx.hasUI ? undefined : "Non-interactive mode: avoid dry-run for safe bounded edits; either edit directly or run a real chalin_route. Use dryRun only for destructive/high-risk/ambiguous work that genuinely needs user review.",
+          "Simple chat, definitions, one obvious command, tiny isolated edits, bounded read-only mini-project reviews, named-file bugfixes, or bounded scaffolding/simple implementation tasks with explicit files stay direct. Direct mode must still satisfy every explicit acceptance criterion exactly, including requested helper extraction, tests, no dependency additions, and behavior preservation. If the user asks for tests, changing only implementation is incomplete even when existing tests pass; add or update the relevant test file before final verification. Those tests must prove the requested behavior with at least one non-trivial positive case and one meaningful edge/failure case when applicable; merely renaming or preserving a starter smoke/empty test is incomplete. For time/window behavior, make tests deterministic with an injected or controlled clock when possible; avoid brittle mock timer APIs unless you verify the current Bun API in this project. Do not assert exact `Date.now()`-derived milliseconds against real wall time. For dependency-free TypeScript scaffolding, write the exact requested files, keep requested APIs/exported helpers in the requested source file, prefer package.json test script `bun test`, put tests under `test/`, avoid uninstalled runners like tsx/vitest/jest, export the requested API, declare requested package.json `bin` entries that point to executable file paths, never command strings, and fix verification failures and rerun verification after the final edit before answering. For Bun CLI subprocess tests, derive target paths directly with `import.meta.url` and pass `env: { ...process.env, ...overrides }` so stripped PATH/NODE_OPTIONS cannot create false failures. After edits plus a passing final verification, answer immediately with changed files, verification result, and one note naming the requested behavior/constraint satisfied.",
+        ].filter((line): line is string => Boolean(line)).join("\n"),
+        display: false,
+      },
+    };
+  });
+  pi.on("agent_end", (_event, ctx) => {
+    setChalinStatus(ctx, { kind: "idle" });
+  });
+  pi.on("tool_execution_end", (event, ctx) => {
+    if (["chalin_route", "chalin_resume"].includes(event.toolName)) {
+      if (event.isError) return;
+      const blockedReason = chalinRouteBlockedReason(event);
+      if (blockedReason) {
+        pi.sendMessage({
+          customType: "pi-chalin-route-blocked-nudge",
+          content: `${event.toolName} did not execute work (${blockedReason}). Do not claim completion from that result. If the user's request is a safe explicit edit, continue directly with native tools; otherwise explain the blocker.`,
+          display: false,
+        }, { triggerTurn: false, deliverAs: "steer" });
+        return;
+      }
+      pi.sendMessage({
+        customType: "pi-chalin-synthesis-nudge",
+        content: `${event.toolName} finished. Answer the user's original prompt now from the Final answer material in the tool result. Do not call another tool unless that material explicitly names a critical blocking gap.`,
+        display: false,
+      }, { triggerTurn: false, deliverAs: "steer" });
+      scheduleNonInteractiveShutdown(ctx);
+      return;
+    }
+    const eventArgs = (event as { args?: { command?: unknown } }).args;
+    const { shouldProgressNudge, shouldReadyToVerifyNudge, shouldFailureNudge, shouldMissingTestNudge, shouldCompletionNudge, verificationCommand } = recordDirectToolCompletion({
+      toolName: event.toolName,
+      isError: event.isError,
+      command: typeof eventArgs?.command === "string" ? eventArgs.command : undefined,
+      argsText: eventArgs ? JSON.stringify(eventArgs) : undefined,
+    });
+    if (shouldProgressNudge) {
+      pi.sendMessage({
+        customType: "pi-chalin-direct-progress-nudge",
+        content: "You have changed files for a bounded direct task. If the user asked for tests and you have not changed a test/spec file, add or update the relevant test before verification. Tests must prove the requested behavior with non-trivial assertions, not only keep or rename the starter smoke/empty test. For timer code, prefer injected clocks/schedulers over brittle mock timer APIs; for Bun CLI subprocess tests, preserve process.env and derive target file URLs directly. Then run the nearest relevant verification command. If it fails, fix only the root cause and rerun verification after the last edit; then answer. The final answer must name the changed file paths and the exact verification command/result. Do not continue exploring unless a concrete acceptance criterion is still missing.",
+        display: false,
+      }, { triggerTurn: false, deliverAs: "steer" });
+    }
+    if (shouldReadyToVerifyNudge) {
+      pi.sendMessage({
+        customType: "pi-chalin-direct-ready-to-verify-nudge",
+        content: [
+          "Implementation and required test/doc edits are now in place for this bounded direct task.",
+          "Before verification, sanity-check that requested tests are behavior-bearing: they should assert the requested outputs/effects and relevant edge cases, not only a starter empty/smoke path.",
+          "Stop planning/exploring. Run the nearest relevant verification command now, normally `bun test` for dependency-free Bun fixtures. If it passes, answer immediately.",
+          "If verification fails, fix only the root cause and rerun the same nearest verification after the final edit. A final answer with a failed/stale Verification is invalid.",
+        ].join("\n"),
+        display: false,
+      }, { triggerTurn: false, deliverAs: "steer" });
+    }
+    if (shouldFailureNudge) {
+      const commandText = verificationCommand ? `\`${verificationCommand}\`` : "the verification command";
+      pi.sendMessage({
+        customType: "pi-chalin-direct-verification-failed-nudge",
+        content: [
+          `${commandText} failed after file changes.`,
+          "Do NOT answer as done yet. Read the failure, fix the root cause, and rerun the nearest relevant verification after the final edit. You may not answer with a failed or stale Verification result.",
+          "If this is dependency-free TypeScript scaffolding, do not add uninstalled runners; write the exact requested files, keep requested APIs/exported helpers in the requested source file, use Bun's test runner with `bun test`, keep tests in `test/`, and fix imports/scripts so `bun test` passes.",
+          "If the failure involves time/window logic, remove wall-clock flakiness: inject/control the clock or make assertions tolerant before rerunning verification. Prefer an injected scheduler over mock timer APIs unless you verify the current Bun mock timer API.",
+          "If the failure is a Bun CLI subprocess test, preserve the parent environment with `env: { ...process.env, ...overrides }` and derive the CLI path directly from `import.meta.url`; do not compute a parent directory twice.",
+        ].join("\n"),
+        display: false,
+      }, { triggerTurn: false, deliverAs: "steer" });
+    }
+    if (shouldMissingTestNudge) {
+      pi.sendMessage({
+        customType: "pi-chalin-direct-tests-missing-nudge",
+        content: [
+          "The user requested tests, but the changed files so far do not include a test/spec file.",
+          "Do NOT answer as done yet. Your next action must be an edit/write to the relevant test/spec file, not a final answer.",
+          "Add or update the test so it proves the requested behavior with non-trivial assertions and meaningful edge/failure coverage where applicable, rerun the nearest verification command, then answer with changed implementation and test paths.",
+        ].join("\n"),
+        display: false,
+      }, { triggerTurn: true, deliverAs: "steer" });
+    }
+    if (!shouldCompletionNudge) return;
+    const commandText = verificationCommand ? `\`${verificationCommand}\`` : "the verification command";
+    pi.sendMessage({
+      customType: "pi-chalin-direct-completion-nudge",
+      content: [
+        `You changed files and ${commandText} passed.`,
+        "If the user's acceptance criteria are satisfied and this passing verification happened after the last edit, answer now using this exact compact evidence format:",
+        "Passing tests is not enough by itself: before answering, compare the changed files against every explicit prompt requirement, including requested package metadata, bin/scripts, docs, tests, public API, and no-dependency constraints. If tests were requested but they only cover a starter smoke/empty path instead of the requested behavior, do not answer yet; improve the tests and rerun verification.",
+        "- Changed: `path/to/file`[, `path/to/test`]",
+        `- Verification: ${commandText} passed`,
+        "- Notes: one short sentence naming the requested behavior/constraint you satisfied, such as edge case covered, no external dependencies, or time reset behavior",
+        "Do not omit the Verification or Notes line. Do not call more tools unless a concrete requested requirement is still missing.",
+      ].join("\n"),
+      display: false,
+    }, { triggerTurn: false, deliverAs: "steer" });
+  });
+  pi.on("session_shutdown", () => {
+    // No background auto-routing workers are owned by this module anymore.
+    // Subagent execution is driven through the chalin_route tool and Pi's native
+    // abort signal.
+  });
+}
+async function globalMemoryContextForPrompt(cwd: string, prompt: string): Promise<string | undefined> {
+  const query = prompt.trim();
+  if (query.length < 8) return undefined;
+  try {
+    const bundle = await new MemoryStore({ cwd }).retrieve({
+      query,
+      sourceAgent: "primary-pi-global",
+      limit: 5,
+      tokenBudget: 520,
+    });
+    if (bundle.results.length === 0 || !bundle.text.trim()) return undefined;
+    return [
+      "## pi-chalin global memory context",
+      bundle.text,
+      "Use these memories as soft guidance for this turn, including direct-mode work. Current repository evidence and explicit user instructions override memory; if evidence contradicts memory, prefer the evidence and repair memory when a memory tool is available.",
+    ].join("\n");
+  } catch {
+    return undefined;
+  }
+}
+export function looksLikeContinuationPrompt(prompt: string): boolean {
+  const text = prompt.trim().toLowerCase();
+  if (!text) return false;
+  return /^(continua|continúa|continuar|continue|resume|resumir|reanuda|reanudar|retoma|retomar|sigue|seguir|dale|go on|keep going)(?:\b|[.!?]*)/i.test(text);
+}
+export function shouldUseCompactDirectOrchestrationPrompt(prompt: string): boolean {
+  const text = prompt.toLowerCase();
+  if (!text.trim()) return false;
+  if (looksLikeChalinOrchestrationWork(text)) return false;
+  const pathMentions = countPathMentions(prompt);
+  const hasDirectMutationVerb = /\b(implementa|implementar|implement|fix|corrige|corregir|refactor|refactoriza|añade|agrega|add|update|actualiza|scaffold|scaffoldea|crea|create|write|escribe)\b/i.test(prompt);
+  const hasScaffoldContract = /\b(scaffold|scaffoldea|greenfield|desde cero|librer[ií]a|cli|package\.json|readme|sin dependencias|no external dependencies)\b/i.test(prompt)
+    && /\b(test|tests|prueba|pruebas|src\/|package\.json|readme|api|export)\b/i.test(prompt);
+  return (hasDirectMutationVerb && pathMentions > 0 && pathMentions <= 6) || hasScaffoldContract;
+}
+export function shouldUseCompactChalinCriticalPrompt(prompt: string): boolean {
+  const text = prompt.toLowerCase();
+  return /\b(long-file|archivo largo|surgical|quir[uú]rgic|evita reescribir|avoid rewrite|auth|refresh token|security-sensitive|seguridad|dos cambios independientes|independent implementation|modulos separados|m[oó]dulos separados)\b/i.test(text)
+    && /\b(implementa|implement|cambia|change|fix|corrige|agrega|add|tests|pruebas|worker|parallel|paralel)\b/i.test(text);
+}
+function compactResumeSteeringMessage(run: RunState): string {
+  const completed = run.steps.filter((step) => isUsableStepHandoff(step)).length;
+  const total = Math.max(run.steps.length, 1);
+  const next = run.steps.find((step) => !isUsableStepHandoff(step));
+  return [
+    "Continuation intent detected and a resumable pi-chalin run exists.",
+    `Run id: ${run.id}. Status: ${run.status}. Progress: ${completed}/${total}. Next agent: ${next?.agent ?? "unknown"}.`,
+    `First action MUST be \`chalin_resume\` with {"runId":"${run.id}"}.`,
+    "Do not call `chalin_route`; do not restart the workflow; do not answer from partial findings.",
+    "After `chalin_resume` returns, answer the user from its Final answer material.",
+  ].join("\n");
+}
+function compactCriticalSteeringMessage(hasUI?: boolean): string {
+  return [
+    "pi-chalin critical preflight: this is risky/complex/surgical work. First action must be `chalin_route`; do not answer direct and do not inspect with native tools first.",
+    hasUI ? undefined : "Non-interactive mode: one real chalin_route, then stop from the chalin handoff; no post-chalin native exploration.",
+    "Use worker/reviewer discipline: decompose, assign ownership, verify, and preserve a compact final handoff.",
+  ].filter((line): line is string => Boolean(line)).join("\n");
+}
+function compactDirectSteeringMessage(hasUI?: boolean): string {
+  return [
+    "pi-chalin compact preflight: this looks like bounded direct work. Prefer native tools; do not spend budget on orchestration prose or visible planning before tool calls.",
+    hasUI ? undefined : "Non-interactive mode: first action should be a relevant tool call; inspect briefly, write promptly, verify, fix failures, rerun verification after the final edit, then final answer.",
+    "For dependency-free TypeScript scaffolding: exact requested files, Bun test runner, tests under test/, no uninstalled runners/dependencies, exported requested API. If it is a CLI package, declare the requested command in package.json `bin`, not only in `scripts`; `bin` values must be executable file paths such as `./src/cli.ts` or `./bin/name`, never runtime command strings.",
+    "If tests are requested, make them behavior-bearing: assert requested outputs/effects and edge/failure cases instead of only preserving starter smoke/empty tests.",
+    "For timer behavior, prefer injected clocks/schedulers over brittle mock timer APIs. For Bun CLI subprocess tests, preserve process.env and derive paths directly from import.meta.url.",
+    "If the prompt says review-only, docs-only, no code changes, or no mutations, obey that literally: do not add tests/source files or modify code unless the user explicitly asks.",
+    "Final answer must include Changed, Verification, and Notes. Do not continue exploring after verification passes.",
+  ].filter((line): line is string => Boolean(line)).join("\n");
+}
+function looksLikeChalinOrchestrationWork(text: string): boolean {
+  return /\b(en profundidad|deep|todo el proyecto|project-wide|arquitectura|architecture|migration|migraci[oó]n|strategy|estrategia|review completo|security review|broad|riesgoso|risky|long-file|archivo largo|surgical|quir[uú]rgic|paralel|parallel|compare approaches|opciones|resume|contin[uú]a|memory|memoria)\b/i.test(text);
+}
+function countPathMentions(prompt: string): number {
+  const matches = prompt.match(/(?:^|[\s`'"])(?:[\w.-]+\/)+[\w.@-]+|(?:^|[\s`'"])(?:package\.json|README\.md|tsconfig\.json|pyproject\.toml|Cargo\.toml|go\.mod)(?=$|[\s`'".,:;)]|)/gi);
+  return matches?.length ?? 0;
+}
+function chalinRouteBlockedReason(event: unknown): string | undefined {
+  const details = (event as { result?: { details?: { approval?: { action?: unknown; reason?: unknown }; routeGuard?: { action?: unknown; reason?: unknown } } } }).result?.details;
+  if (details?.routeGuard?.action === "direct-recommended") {
+    const reason = details.routeGuard.reason;
+    return typeof reason === "string" && reason.trim() ? `direct-recommended: ${reason}` : "direct-recommended";
+  }
+  const action = details?.approval?.action;
+  if (typeof action === "string" && action !== "allow") {
+    const reason = details?.approval?.reason;
+    return typeof reason === "string" && reason.trim() ? `${action}: ${reason}` : action;
+  }
+  return undefined;
+}
+function scheduleNonInteractiveShutdown(ctx: { hasUI?: boolean; abort?: () => void; shutdown?: () => void }): void {
+  if (ctx.hasUI || typeof ctx.shutdown !== "function" || process.env.PI_CHALIN_NONINTERACTIVE_SHUTDOWN === "0") return;
+  const abort = ctx.abort;
+  const shutdown = ctx.shutdown;
+  const configuredDelay = Number(process.env.PI_CHALIN_NONINTERACTIVE_SHUTDOWN_DELAY_MS);
+  const delayMs = Number.isFinite(configuredDelay) && configuredDelay >= 0 ? configuredDelay : 0;
+  const timer = setTimeout(() => {
+    try {
+      abort?.();
+      shutdown();
+    } catch {
+      // Pi can mark extension contexts stale while a print-mode turn exits.
+      // The tool result has already been emitted, so stale shutdown is safe to ignore.
+    }
+  }, delayMs);
+  timer.unref?.();
+}

package/src/budget.ts ADDED Viewed

@@ -0,0 +1,333 @@
+import type { ArtifactCheckpoint, ArtifactStore } from "./artifacts.ts";
+import type { AgentDefinition, RouteKind, RouteRisk, RunStepState, ToolBudgetProfile } from "./schemas.ts";
+export type BudgetTaskKind = "recon" | "review" | "implementation" | "migration" | "long-autonomous" | "research" | "planning" | "synthesis";
+export type BudgetHealthStatus = "ok" | "warn" | "budget-capped";
+export type BudgetResumeStrategy = "none" | "handoff-only" | "checkpoint-and-continue" | "split-and-continue" | "stage-checkpoint-validate-memory-next";
+export interface BudgetCaps {
+  maxToolCalls: number;
+  maxSeconds: number;
+  maxUsd: number;
+  maxTurns: number;
+  maxOutputChars: number;
+  maxReadBytes: number;
+  maxFilesTouched: number;
+  maxRetriesPerTool: number;
+}
+export interface BudgetPolicy {
+  id: string;
+  taskKind: BudgetTaskKind;
+  profile: ToolBudgetProfile;
+  risk: RouteRisk;
+  routeKind: RouteKind;
+  caps: BudgetCaps;
+  resumeStrategy: BudgetResumeStrategy;
+}
+export interface BudgetPreflightInput {
+  task: string;
+  routeKind: RouteKind;
+  steps?: Array<{ agent: string; task: string; budget?: ToolBudgetProfile }>;
+  risk?: RouteRisk;
+  needsArtifacts?: boolean;
+}
+export interface BudgetPreflight {
+  taskKind: BudgetTaskKind;
+  expectedStages: number;
+  expectedTools: number;
+  risk: RouteRisk;
+  budgetProfile: ToolBudgetProfile;
+  resumeStrategy: BudgetResumeStrategy;
+  requiresArtifacts: boolean;
+  recommendation: string;
+  policy: BudgetPolicy;
+}
+export interface BudgetUsage {
+  elapsedMs: number;
+  toolCalls: number;
+  totalCostUsd: number;
+  turns: number;
+  outputChars: number;
+  readBytes: number;
+  filesTouched: number;
+  retriesByTool: Record<string, number>;
+}
+export interface BudgetCapHit {
+  name: "max_tool_calls" | "max_seconds" | "max_usd" | "max_turns" | "max_output_chars" | "max_read_bytes" | "max_files_touched" | "max_retries_per_tool";
+  used: number;
+  limit: number;
+}
+export interface BudgetHealth {
+  status: BudgetHealthStatus;
+  caps: BudgetCapHit[];
+  warnings: string[];
+  next: "continue" | "checkpoint-and-continue" | "split" | "escalate";
+}
+export interface ToolUtilityInput {
+  findings: string[];
+  toolCalls: number;
+  filesRead: string[];
+  firstSignalToolCall?: number;
+  verificationDone: boolean;
+  memoryCandidates: Array<{ content: string; category?: string; confidence?: number }>;
+}
+export interface ToolUtilityMetrics {
+  findingsPerTool: number;
+  filesReadPerFinding: number;
+  duplicateReads: number;
+  toolCallsBeforeFirstSignal: number;
+  verificationDone: boolean;
+  memoryCandidatesQuality: number;
+}
+export function policyForStep(
+  agent: AgentDefinition | undefined,
+  step: Pick<RunStepState, "agent" | "task" | "budget">,
+  routeKind: RouteKind = "single-agent",
+  risk: RouteRisk = "low",
+): BudgetPolicy {
+  const profile = step.budget ?? inferredBudgetProfile(agent, step, routeKind);
+  const taskKind = taskKindForAgent(agent, step.task);
+  const caps = scaleCaps(baseCapsForTask(taskKind, agent?.name ?? step.agent), profile, risk);
+  return {
+    id: `${taskKind}:${profile}:${risk}`,
+    taskKind,
+    profile,
+    risk,
+    routeKind,
+    caps,
+    resumeStrategy: resumeStrategyFor(taskKind, profile),
+  };
+}
+export function estimateBudgetPreflight(input: BudgetPreflightInput): BudgetPreflight {
+  const taskKind = inferTaskKind(input.task, input.steps);
+  const risk = input.risk ?? inferRisk(input.task, input.steps);
+  const budgetProfile = inferPreflightProfile(input.task, taskKind, input.steps, input.routeKind);
+  const representativeStep = input.steps?.[0] ?? { agent: "delegate", task: input.task, budget: budgetProfile };
+  const policy = policyForStep(undefined, { ...representativeStep, budget: budgetProfile }, input.routeKind, risk);
+  const expectedStages = input.routeKind === "multi-agent-dag"
+    ? Math.max(2, Math.min(8, input.steps?.length ?? 3))
+    : Math.max(1, input.steps?.length ?? 1);
+  const expectedTools = Math.max(policy.caps.maxToolCalls, (input.steps ?? [representativeStep]).reduce((sum, step) => {
+    const stepPolicy = policyForStep(undefined, step, input.routeKind, risk);
+    return sum + stepPolicy.caps.maxToolCalls;
+  }, 0));
+  const requiresArtifacts = Boolean(input.needsArtifacts || taskKind === "long-autonomous" || budgetProfile === "extended");
+  const resumeStrategy = requiresArtifacts ? "stage-checkpoint-validate-memory-next" : taskKind === "implementation" ? "checkpoint-and-continue" : "handoff-only";
+  return {
+    taskKind,
+    expectedStages,
+    expectedTools,
+    risk,
+    budgetProfile,
+    resumeStrategy,
+    requiresArtifacts,
+    recommendation: recommendationFor(taskKind, budgetProfile, requiresArtifacts),
+    policy: { ...policy, resumeStrategy },
+  };
+}
+export function evaluateBudgetUsage(policy: BudgetPolicy, usage: BudgetUsage): BudgetHealth {
+  const caps: BudgetCapHit[] = [];
+  compare(caps, "max_tool_calls", usage.toolCalls, policy.caps.maxToolCalls);
+  compare(caps, "max_seconds", Math.ceil(usage.elapsedMs / 1000), policy.caps.maxSeconds);
+  compare(caps, "max_usd", usage.totalCostUsd, policy.caps.maxUsd);
+  compare(caps, "max_turns", usage.turns, policy.caps.maxTurns);
+  compare(caps, "max_output_chars", usage.outputChars, policy.caps.maxOutputChars);
+  compare(caps, "max_read_bytes", usage.readBytes, policy.caps.maxReadBytes);
+  compare(caps, "max_files_touched", usage.filesTouched, policy.caps.maxFilesTouched);
+  const maxRetries = Math.max(0, ...Object.values(usage.retriesByTool));
+  compare(caps, "max_retries_per_tool", maxRetries, policy.caps.maxRetriesPerTool);
+  if (caps.length === 0) return { status: "ok", caps, warnings: [], next: "continue" };
+  const hard = caps.some((cap) => ["max_seconds", "max_usd", "max_turns"].includes(cap.name));
+  const status: BudgetHealthStatus = hard ? "budget-capped" : "warn";
+  return {
+    status,
+    caps,
+    warnings: caps.map((cap) => `${cap.name} used ${formatNumber(cap.used)} over limit ${formatNumber(cap.limit)}`),
+    next: status === "budget-capped"
+      ? policy.resumeStrategy === "stage-checkpoint-validate-memory-next" ? "split" : "checkpoint-and-continue"
+      : "continue",
+  };
+}
+export function summarizeToolUtility(input: ToolUtilityInput): ToolUtilityMetrics {
+  const findings = input.findings.filter((item) => item.trim().length > 0);
+  const uniqueFiles = new Set(input.filesRead);
+  const duplicateReads = input.filesRead.length - uniqueFiles.size;
+  const qualityScores = input.memoryCandidates.map(memoryQualityScore);
+  const memoryCandidatesQuality = qualityScores.length ? round(qualityScores.reduce((sum, value) => sum + value, 0) / qualityScores.length) : 0;
+  return {
+    findingsPerTool: round(findings.length / Math.max(input.toolCalls, 1)),
+    filesReadPerFinding: round(uniqueFiles.size / Math.max(findings.length, 1)),
+    duplicateReads,
+    toolCallsBeforeFirstSignal: input.firstSignalToolCall ?? (findings.length > 0 ? Math.min(input.toolCalls, 1) : input.toolCalls),
+    verificationDone: input.verificationDone,
+    memoryCandidatesQuality,
+  };
+}
+export async function recordBudgetCheckpoint(store: ArtifactStore, featureId: string, step: RunStepState, reason: string): Promise<ArtifactCheckpoint> {
+  await store.initFeature({
+    featureId,
+    goal: `Continue budget-capped pi-chalin step ${step.agent}`,
+    chain: [step.agent],
+    currentStep: step.task,
+  });
+  return store.appendCheckpoint(featureId, {
+    agent: step.agent,
+    title: `${step.agent} budget-capped`,
+    summary: compact([step.output?.handoff, step.output?.text, reason].filter(Boolean).join(" "), 900),
+    status: "paused",
+    stage: step.id,
+  });
+}
+function compare(caps: BudgetCapHit[], name: BudgetCapHit["name"], used: number, limit: number): void {
+  if (Number.isFinite(limit) && used >= limit) caps.push({ name, used, limit });
+}
+function baseCapsForTask(taskKind: BudgetTaskKind, agentName: string): BudgetCaps {
+  const baseToolCalls = baseToolCallsFor(taskKind, agentName);
+  const isLong = taskKind === "long-autonomous";
+  const isWriteHeavy = taskKind === "implementation" || taskKind === "migration";
+  const isSynthesis = taskKind === "synthesis" || taskKind === "planning";
+  return {
+    maxToolCalls: baseToolCalls,
+    maxSeconds: isLong ? 7200 : isWriteHeavy ? 1800 : isSynthesis ? 900 : 1200,
+    maxUsd: isLong ? 2.5 : isWriteHeavy ? 1.2 : isSynthesis ? 0.45 : 0.8,
+    maxTurns: isLong ? 12 : isWriteHeavy ? 8 : isSynthesis ? 4 : 6,
+    maxOutputChars: isLong ? 24000 : isWriteHeavy ? 16000 : isSynthesis ? 7000 : 12000,
+    maxReadBytes: isLong ? 5_000_000 : isWriteHeavy ? 2_000_000 : isSynthesis ? 350_000 : 1_500_000,
+    maxFilesTouched: taskKind === "migration" ? 40 : taskKind === "implementation" ? 20 : 4,
+    maxRetriesPerTool: 3,
+  };
+}
+function baseToolCallsFor(taskKind: BudgetTaskKind, agentName: string): number {
+  if (agentName === "context-builder") return 60;
+  if (agentName === "scout") return 40;
+  if (agentName === "planner") return 25;
+  if (agentName === "reviewer") return 50;
+  if (agentName === "worker") return 80;
+  if (taskKind === "long-autonomous") return 160;
+  if (taskKind === "migration") return 120;
+  if (taskKind === "implementation") return 80;
+  if (taskKind === "review") return 50;
+  if (taskKind === "research") return 60;
+  if (taskKind === "planning") return 25;
+  if (taskKind === "synthesis") return 25;
+  return 40;
+}
+function scaleCaps(caps: BudgetCaps, profile: ToolBudgetProfile, risk: RouteRisk): BudgetCaps {
+  const multiplier = profile === "tight" ? 0.5 : profile === "deep" ? 2 : profile === "extended" ? 4 : 1;
+  const riskMultiplier = risk === "critical" ? 0.75 : risk === "high" ? 0.9 : 1;
+  const toolCap = profile === "extended" ? 500 : profile === "deep" ? 240 : profile === "tight" ? 60 : 140;
+  return {
+    maxToolCalls: Math.max(1, Math.min(toolCap, Math.ceil(caps.maxToolCalls * multiplier * riskMultiplier))),
+    maxSeconds: Math.max(120, Math.ceil(caps.maxSeconds * multiplier)),
+    maxUsd: round(caps.maxUsd * multiplier),
+    maxTurns: Math.max(1, Math.ceil(caps.maxTurns * (profile === "tight" ? 0.75 : profile === "deep" ? 1.5 : profile === "extended" ? 2 : 1))),
+    maxOutputChars: Math.ceil(caps.maxOutputChars * multiplier),
+    maxReadBytes: Math.ceil(caps.maxReadBytes * multiplier),
+    maxFilesTouched: Math.max(1, Math.ceil(caps.maxFilesTouched * (profile === "extended" ? 2 : profile === "deep" ? 1.5 : profile === "tight" ? 0.75 : 1))),
+    maxRetriesPerTool: caps.maxRetriesPerTool,
+  };
+}
+function taskKindForAgent(agent: AgentDefinition | undefined, task: string): BudgetTaskKind {
+  if (agent?.concern === "implementation") return "implementation";
+  if (/\b(long[- ]running|hours?|days?|checkpoint|resume|migration|migrate)\b/i.test(task)) {
+    return /\b(long[- ]running|hours?|days?|checkpoint|resume)\b/i.test(task) ? "long-autonomous" : "migration";
+  }
+  if (/\b(implement|fix|edit|modify|write)\b/i.test(task)) return "implementation";
+  if (agent?.concern === "review" || /\b(review|audit|validate)\b/i.test(task)) return "review";
+  if (agent?.concern === "research") return "research";
+  if (agent?.concern === "planning") return "planning";
+  if (agent?.concern === "context-building" && /\b(synthesize|summarize|final)\b/i.test(task)) return "synthesis";
+  return "recon";
+}
+function inferTaskKind(task: string, steps: BudgetPreflightInput["steps"]): BudgetTaskKind {
+  const combined = [task, ...(steps ?? []).flatMap((step) => [step.agent, step.task])].join(" ");
+  if (/\b(long[- ]running|hours?|days?|checkpoint|resume|autonomous)\b/i.test(combined)) return "long-autonomous";
+  if (/\b(migrate|migration|codemod|vue3|rewrite across|all components)\b/i.test(combined)) return "migration";
+  if (/\b(implement|fix|edit|modify|worker)\b/i.test(combined)) return "implementation";
+  if (/\b(review|audit|security|validate)\b/i.test(combined)) return "review";
+  if (/\b(web|internet|docs?|source)\b/i.test(combined)) return "research";
+  if (/\b(plan|roadmap|design)\b/i.test(combined)) return "planning";
+  return "recon";
+}
+function inferRisk(task: string, steps: BudgetPreflightInput["steps"]): RouteRisk {
+  const combined = [task, ...(steps ?? []).map((step) => step.task)].join(" ");
+  if (/\b(delete|security|auth|payment|database|migration|production|write|modify|edit)\b/i.test(combined)) return "high";
+  if ((steps ?? []).some((step) => step.agent === "worker")) return "medium";
+  return "low";
+}
+function inferPreflightProfile(task: string, taskKind: BudgetTaskKind, steps: BudgetPreflightInput["steps"], routeKind: RouteKind): ToolBudgetProfile {
+  const explicit = steps?.map((step) => step.budget).filter(Boolean).at(-1);
+  if (explicit) return explicit;
+  if (taskKind === "long-autonomous") return "extended";
+  if (taskKind === "migration" || routeKind === "multi-agent-dag") return "deep";
+  if (taskKind === "planning" || /\b(simple|quick|small)\b/i.test(task)) return "tight";
+  return "normal";
+}
+function inferredBudgetProfile(agent: AgentDefinition | undefined, step: Pick<RunStepState, "agent" | "task" | "budget">, routeKind: RouteKind): ToolBudgetProfile {
+  if (step.budget) return step.budget;
+  if (/\b(long[- ]running|hours?|days?|checkpoint|resume|autonomous)\b/i.test(step.task)) return "extended";
+  if (routeKind === "multi-agent-dag" && ["recon", "context-building", "review", "research"].includes(agent?.concern ?? "")) return "deep";
+  if (agent?.concern === "planning") return "tight";
+  return "normal";
+}
+function resumeStrategyFor(taskKind: BudgetTaskKind, profile: ToolBudgetProfile): BudgetResumeStrategy {
+  if (taskKind === "long-autonomous" || profile === "extended") return "stage-checkpoint-validate-memory-next";
+  if (taskKind === "implementation" || taskKind === "migration") return "checkpoint-and-continue";
+  if (taskKind === "recon" || taskKind === "review" || taskKind === "research") return "handoff-only";
+  return "none";
+}
+function recommendationFor(taskKind: BudgetTaskKind, profile: ToolBudgetProfile, artifacts: boolean): string {
+  if (taskKind === "long-autonomous") return "Use staged DAG execution with checkpoint → validate → memory → next-stage continuation.";
+  if (artifacts || profile === "extended") return "Write checkpoint artifacts at every handoff and split work before budget caps are hit.";
+  if (taskKind === "migration") return "Prefer DAG fan-out by module with reviewer synthesis and validation contracts.";
+  return "Use the smallest bounded agent workflow and stop after high-signal evidence.";
+}
+function memoryQualityScore(candidate: { content: string; category?: string; confidence?: number }): number {
+  const content = candidate.content.trim();
+  if (!content || /\b(stdout|stderr|traceback|cmd =|subprocess|returncode)\b/i.test(content)) return 0;
+  const durableCategory = /^(project-fact|pattern|tooling|testing|workflow|bugfix|validation|artifact|decision|preference|architecture|safety|security|failure)$/i.test(candidate.category ?? "");
+  const sentenceScore = content.split(/[.!?]+/).filter((part) => part.trim().length > 15).length >= 1 ? 0.35 : 0.15;
+  const lengthScore = content.length >= 80 && content.length <= 600 ? 0.35 : 0.15;
+  const categoryScore = durableCategory ? 0.2 : 0.05;
+  const confidenceScore = Math.min(0.1, Math.max(0, candidate.confidence ?? 0) / 10);
+  return round(sentenceScore + lengthScore + categoryScore + confidenceScore);
+}
+function compact(text: string, max: number): string {
+  const normalized = text.replace(/\s+/g, " ").trim();
+  return normalized.length <= max ? normalized : `${normalized.slice(0, max - 1)}…`;
+}
+function round(value: number): number {
+  return Math.round(value * 1000) / 1000;
+}
+function formatNumber(value: number): string {
+  return Number.isInteger(value) ? String(value) : value.toFixed(3);
+}