npm - @circuitwall/jarela - Versions diffs - 0.14.0 → 1.0.0 - Mend

@circuitwall/jarela 0.14.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

package/app/api/v1/agents/[id]/route.ts CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * @public — `GET /api/v1/agents/[id]`, `PATCH /api/v1/agents/[id]`,
+ *           `DELETE /api/v1/agents/[id]`
+ *
+ * Per-agent CRUD on a single config. See `docs/api.md`.
+ */
 import { NextRequest, NextResponse } from "next/server";
 import {
   getAgentConfig,

package/app/api/v1/agents/route.ts CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * @public — `GET /api/v1/agents` (list), `POST /api/v1/agents` (upsert)
+ *
+ * Agent-config CRUD: identity, instructions, tool policy, model config.
+ * See `docs/api.md`.
+ */
 import { NextRequest } from "next/server";
 import {
   listAgentConfigs,

package/app/api/v1/events/route.ts CHANGED Viewed

@@ -1,3 +1,11 @@
+/**
+ * @public — `GET /api/v1/events` (Server-Sent Events)
+ *
+ * Live notification stream — run completion, watcher fires, queue
+ * progress, etc. UI subscribes here for real-time updates; external
+ * scripts can subscribe over the same SSE wire format. See `docs/api.md`.
+ */
 import { NextRequest } from "next/server";
 import { recentSince, subscribe } from "@/lib/notifications/bus";
 import { startScheduler } from "@/lib/scheduler";

package/app/api/v1/extensions/route.ts CHANGED Viewed

@@ -4,13 +4,13 @@ import {
 } from "@/lib/providers/external";
 import { BUILTIN_PROVIDER_NAMES } from "@/lib/providers";
 import { loadExternalTools, getToolsDir } from "@/lib/tools/external";
-import { BUILTIN_TOOL_NAMES } from "@/lib/tools";
+import { getBuiltinToolNames } from "@/lib/tools";
 import { describeToolSecrets } from "@/lib/stores/tool-secrets";
 import { cachedJson } from "@/lib/api/responses";
 export function GET() {
   const provs = loadExternalProvidersDetailed(BUILTIN_PROVIDER_NAMES);
-  const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
+  const tools = loadExternalTools(getBuiltinToolNames());
   return cachedJson({
     directories: {

package/app/api/v1/extensions/tools/[name]/secrets/route.ts CHANGED Viewed

@@ -13,7 +13,7 @@
 import { NextRequest, NextResponse } from "next/server";
 import { z } from "zod";
 import { loadExternalTools } from "@/lib/tools/external";
-import { BUILTIN_TOOL_NAMES } from "@/lib/tools";
+import { getBuiltinToolNames } from "@/lib/tools";
 import {
   describeToolSecrets,
   setToolSecret,
@@ -31,7 +31,7 @@ export async function GET(
   { params }: { params: Promise<{ name: string }> },
 ) {
   const { name } = await params;
-  const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
+  const tools = loadExternalTools(getBuiltinToolNames());
   const slots = tools.secrets.get(name);
   if (!slots) {
     return NextResponse.json({ error: "tool not found" }, { status: 404 });
@@ -58,7 +58,7 @@ export async function PUT(
     );
   }
-  const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
+  const tools = loadExternalTools(getBuiltinToolNames());
   const slots = tools.secrets.get(name);
   if (!slots) {
     return NextResponse.json({ error: "tool not found" }, { status: 404 });

package/app/api/v1/health/route.ts CHANGED Viewed

@@ -1,3 +1,11 @@
+/**
+ * @public — `GET /api/v1/health`
+ *
+ * Liveness/readiness probe. Returns DB-path, agent count, and basic
+ * runtime metadata. The browser extension and external uptime monitors
+ * poll this. See `docs/api.md`.
+ */
 import { NextResponse } from "next/server";
 import { listAgentConfigs } from "@/lib/stores/agent-configs";
 import { DB_PATH, getDb } from "@/lib/db";

package/app/api/v1/models/route.ts CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * @public — `GET /api/v1/models` (list), `POST /api/v1/models` (upsert)
+ *
+ * Model-config catalog: per-model parameter presets that agents bind to
+ * by name (`model_config_name`). See `docs/api.md`.
+ */
 import { NextRequest } from "next/server";
 import { listModelConfigs, upsertModelConfig } from "@/lib/stores/model-config";
 import { errorResponse, createdResponse, cachedJson } from "@/lib/api/responses";

package/app/api/v1/page-capture/route.ts CHANGED Viewed

@@ -1,3 +1,11 @@
+/**
+ * @public — `POST /api/v1/page-capture` (with CORS `OPTIONS` preflight)
+ *
+ * Browser-extension upload endpoint: receives the active page's URL,
+ * title, and selected/full text and routes it into the active thread.
+ * See `docs/api.md`.
+ */
 import { handlePageCapture, handlePageCaptureOptions } from "@/lib/api/page-capture";
 export const POST = handlePageCapture;

package/app/api/v1/providers/route.ts CHANGED Viewed

@@ -1,3 +1,11 @@
+/**
+ * @public — `GET /api/v1/providers`
+ *
+ * Lists every registered LLM provider name (built-in + external `.cjs`
+ * plugins). The agent-callable equivalent is the `list_providers` tool.
+ * See `docs/api.md`.
+ */
 import { cachedJson } from "@/lib/api/responses";
 import { listProviderNames } from "@/lib/providers";

package/app/api/v1/threads/[thread_id]/route.ts CHANGED Viewed

@@ -1,3 +1,11 @@
+/**
+ * @public — `GET /api/v1/threads/[thread_id]` (read messages),
+ *           `PATCH /api/v1/threads/[thread_id]` (rename / set agent),
+ *           `DELETE /api/v1/threads/[thread_id]` (drop thread)
+ *
+ * Thread-level CRUD. See `docs/api.md`.
+ */
 import { NextRequest, NextResponse } from "next/server";
 import {
   deleteThread,

package/app/api/v1/threads/[thread_id]/run/route.ts CHANGED Viewed

@@ -1,3 +1,12 @@
+/**
+ * @public — `POST /api/v1/threads/[thread_id]/run` (submit run),
+ *           `GET /api/v1/threads/[thread_id]/run` (subscribe via SSE)
+ *
+ * Agent execution endpoint. Submit a run, then stream tokens, tool
+ * calls, and final state. The split-and-subscribe shape lets reconnects
+ * pick up an in-flight stream. See `docs/api.md`.
+ */
 import { NextRequest } from "next/server";
 import type { StreamOptions, StreamChunk } from "@/lib/agents/base";
 import type { ContentPart } from "@/lib/tools/types";

package/app/api/v1/threads/route.ts CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * @public — `GET /api/v1/threads` (list), `POST /api/v1/threads` (create)
+ *
+ * Thread lifecycle. Threads are the unit of conversation history; every
+ * agent run lives inside one. See `docs/api.md`.
+ */
 import { NextRequest, NextResponse } from "next/server";
 import { createThread, listThreads } from "@/lib/stores/threads";
 import { getAgentConfig } from "@/lib/stores/agent-configs";

package/app/api/v1/tools/route.ts CHANGED Viewed

@@ -1,3 +1,12 @@
+/**
+ * @public — `GET /api/v1/tools`
+ *
+ * Lists every tool in the agent's pool — built-in, external (loaded from
+ * `~/.jarela/tools/*.cjs`), and MCP — with category, capability, source,
+ * and per-tool stats. The agent-callable equivalent is the `list_tools`
+ * tool. See `docs/api.md`.
+ */
 import { NextResponse } from "next/server";
 import { getAllToolsAsync, getToolCategory, getToolCapability, getToolGroup, getToolSource } from "@/lib/tools";
 import { cachedJson } from "@/lib/api/responses";

package/components/chat/ContextUsageBar.tsx CHANGED Viewed

@@ -69,6 +69,14 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
   const warmUsed = usage.warm_tokens!;
   const factsUsed = usage.facts_tokens!;
   const overheadUsed = usage.overhead_tokens!;
+  // Anthropic prompt-cache breakdown (ADR-0062). Disjoint from
+  // hot/warm/facts/overhead: those tiers count fresh input, while these
+  // count tokens served from / written to the prompt cache. Surface them
+  // in the tooltip and expanded panel so the user can see when caching
+  // is firing for this turn.
+  const cacheRead = usage.cache_read_input_tokens ?? 0;
+  const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+  const cacheActive = cacheRead > 0 || cacheCreation > 0;
   // Overhead's "budget" is whatever it actually consumed — there's no slider
   // for it. Shown as a fixed-size segment so it doesn't visually compete
@@ -90,6 +98,13 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
           `Context window: ${cap.toLocaleString()} tokens (the model's full capacity)`,
           `This turn's prompt used ${(hotUsed + warmUsed + factsUsed + overheadUsed).toLocaleString()} tokens`,
           `Reply generated: ${usage.output_tokens.toLocaleString()} tokens`,
+          ...(cacheActive
+            ? [
+                "",
+                `Prompt cache: ${cacheRead.toLocaleString()} read · ${cacheCreation.toLocaleString()} written`,
+                "(cache reads bill at 0.1× input, writes at 1.25×)",
+              ]
+            : []),
           "",
           "Each coloured slot's width = budget for that tier; filled portion = actually used.",
           "Red = tier overflowed its budget. Grey tail = headroom reserved for the reply.",
@@ -104,12 +119,41 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
           {trailing > 0 && <div className="h-full bg-surface-3" style={{ width: `${toPct(trailing)}%` }} aria-hidden title={`Reserved for reply: ${trailing.toLocaleString()} tokens (${Math.round((trailing/cap)*100)}% of window)`} />}
         </div>
       </button>
+      {cacheActive && !showDetails && (
+        <div
+          className="mt-0.5 px-2 text-[10px] text-violet-500/80"
+          title={[
+            "Prompt cache (ADR-0062). Reads bill at 0.1× input, writes at 1.25×.",
+            cacheRead > 0 ? `${cacheRead.toLocaleString()} tokens served from cache.` : "",
+            cacheCreation > 0 ? `${cacheCreation.toLocaleString()} tokens written to cache.` : "",
+          ].filter(Boolean).join("\n")}
+        >
+          {cacheRead > 0 && <>cache hit · {fmtTokens(cacheRead)} read</>}
+          {cacheRead > 0 && cacheCreation > 0 && " · "}
+          {cacheCreation > 0 && <>cache write · {fmtTokens(cacheCreation)}</>}
+        </div>
+      )}
       {showDetails && (
         <div className="mt-1 px-2 pb-1 grid grid-cols-2 gap-x-3 gap-y-0.5 text-[10px] text-fg-faint">
           <Row label="Hot"      color="text-accent"    used={hotUsed}      budget={hotBudget}      hint="Recent messages kept verbatim" />
           <Row label="Warm"     color="text-amber-500" used={warmUsed}     budget={warmBudget}     hint="Older history compressed into rolling summary" />
           <Row label="Facts"    color="text-teal-500"  used={factsUsed}    budget={factsBudget}    hint="Retrieved long-term memory + recall snippets" />
           <Row label="Overhead" color="text-fg-muted"  used={overheadUsed} budget={overheadUsed}   hint="System prompt + per-message scaffolding" />
+          {cacheActive && (
+            <span
+              className="col-span-2 text-violet-500"
+              title={[
+                "Prompt cache (ADR-0062). Disjoint from the tiers above.",
+                `Read ${cacheRead.toLocaleString()} tokens — billed at 0.1× input rate.`,
+                `Wrote ${cacheCreation.toLocaleString()} tokens — billed at 1.25× input rate.`,
+                "Reads pay off on subsequent turns; writes are an investment.",
+              ].join("\n")}
+            >
+              <span className="text-violet-500">Cache</span>{" "}
+              read {fmtTokens(cacheRead)}
+              {cacheCreation > 0 ? ` · created ${fmtTokens(cacheCreation)}` : ""}
+            </span>
+          )}
           <span
             className="col-span-2 mt-0.5 border-t border-border pt-0.5"
             title={`Output: tokens the model generated in its reply.\nWindow: total context capacity of this model.`}

package/lib/agents/llm.ts CHANGED Viewed

@@ -134,6 +134,12 @@ export async function* streamWithConfig(
   // JarelaChatModel; we sum them so the final figure covers the whole turn.
   let usageInputTokens = 0;
   let usageOutputTokens = 0;
+  // PR #181 + cache-fidelity follow-up: Anthropic prompt-cache reads/writes
+  // arrive as a separate breakdown via `input_token_details`. Sum them
+  // independently so the dashboard can report cost correctly (cache reads
+  // are 10× cheaper, cache writes 1.25× more expensive than fresh input).
+  let usageCacheCreationTokens = 0;
+  let usageCacheReadTokens = 0;
   let sawUsage = false;
   // Tracks whether the model hit max_tokens mid-stream. JarelaChatModel tags
   // the final chunk with additional_kwargs.stop_reason="length" when this
@@ -190,6 +196,11 @@ export async function* streamWithConfig(
           if (usage && (usage.input_tokens > 0 || usage.output_tokens > 0)) {
             usageInputTokens += usage.input_tokens ?? 0;
             usageOutputTokens += usage.output_tokens ?? 0;
+            const details = usage.input_token_details;
+            if (details) {
+              usageCacheCreationTokens += details.cache_creation ?? 0;
+              usageCacheReadTokens += details.cache_read ?? 0;
+            }
             sawUsage = true;
           }
           if (typeof chunk.content === "string" && chunk.content) {
@@ -269,7 +280,13 @@ export async function* streamWithConfig(
         data: {
           message_id: `llm-${threadId}-${Date.now()}`,
           usage: sawUsage
-            ? { input_tokens: usageInputTokens, output_tokens: usageOutputTokens, source: "provider" }
+            ? {
+                input_tokens: usageInputTokens,
+                output_tokens: usageOutputTokens,
+                cache_creation_input_tokens: usageCacheCreationTokens,
+                cache_read_input_tokens: usageCacheReadTokens,
+                source: "provider",
+              }
             : { input_tokens: 0, output_tokens: totalOutputTokens, source: "estimate" },
           provider: cfg.provider,
           model_id: cfg.model_id,
@@ -375,7 +392,13 @@ export async function* streamWithConfig(
     data: {
       message_id: `llm-${threadId}-${Date.now()}`,
       usage: sawUsage
-        ? { input_tokens: usageInputTokens, output_tokens: usageOutputTokens, source: "provider" }
+        ? {
+            input_tokens: usageInputTokens,
+            output_tokens: usageOutputTokens,
+            cache_creation_input_tokens: usageCacheCreationTokens,
+            cache_read_input_tokens: usageCacheReadTokens,
+            source: "provider",
+          }
         : { input_tokens: 0, output_tokens: totalOutputTokens, source: "estimate" },
       provider: cfg.provider,
       model_id: cfg.model_id,

package/lib/agents/run-thread.ts CHANGED Viewed

@@ -464,6 +464,11 @@ async function* stallRetryStream(
 export interface AssistantUsageSnapshot {
   input_tokens: number;
   output_tokens: number;
+  // Anthropic prompt-cache breakdown (PR #181). Disjoint from input_tokens:
+  // total billable input = input_tokens + cache_creation + cache_read,
+  // priced at 1×, 1.25×, and 0.1× the input rate respectively.
+  cache_creation_input_tokens?: number;
+  cache_read_input_tokens?: number;
   provider: string;
   model_id: string;
   model_config_name: string | null;
@@ -566,8 +571,13 @@ export function persistAssistantMessage(
         const rates = hasProviderUsage
           ? modelRatesFor(tables, usage!.provider, usage!.model_id)
           : { inputPer1M: null, outputPer1M: null };
+        const cacheCreation = hasProviderUsage ? (usage!.cache_creation_input_tokens ?? 0) : 0;
+        const cacheRead = hasProviderUsage ? (usage!.cache_read_input_tokens ?? 0) : 0;
         const cost = hasProviderUsage
-          ? estimateCostUsd(usage!.input_tokens, usage!.output_tokens, rates)
+          ? estimateCostUsd(usage!.input_tokens, usage!.output_tokens, rates, {
+              cache_creation_input_tokens: cacheCreation,
+              cache_read_input_tokens: cacheRead,
+            })
           : 0;
         recordMessageUsage({
           message_id: row.msg_id,
@@ -582,6 +592,8 @@ export function persistAssistantMessage(
           input_rate_usd_per_mtok: rates.inputPer1M,
           output_rate_usd_per_mtok: rates.outputPer1M,
           cost_usd: cost,
+          cache_creation_input_tokens: cacheCreation > 0 ? cacheCreation : null,
+          cache_read_input_tokens: cacheRead > 0 ? cacheRead : null,
           tier_usage: contextSnapshot
             ? {
                 hot_tokens: contextSnapshot.hot_tokens,

package/lib/agents/stream-collector.ts CHANGED Viewed

@@ -81,7 +81,13 @@ export async function collectStream(
         }
         case "done": {
           const d = chunk.data as {
-            usage?: { input_tokens?: number; output_tokens?: number; source?: string };
+            usage?: {
+              input_tokens?: number;
+              output_tokens?: number;
+              cache_creation_input_tokens?: number;
+              cache_read_input_tokens?: number;
+              source?: string;
+            };
             provider?: string;
             model_id?: string;
             model_config_name?: string | null;
@@ -90,6 +96,8 @@ export async function collectStream(
             result.usage = {
               input_tokens: d.usage.input_tokens ?? 0,
               output_tokens: d.usage.output_tokens ?? 0,
+              cache_creation_input_tokens: d.usage.cache_creation_input_tokens ?? 0,
+              cache_read_input_tokens: d.usage.cache_read_input_tokens ?? 0,
               provider: d.provider,
               model_id: d.model_id,
               model_config_name: d.model_config_name ?? null,

package/lib/api/serializers.test.ts CHANGED Viewed

@@ -161,6 +161,8 @@ function makeUsageRow(overrides: Partial<MessageUsageRow> = {}): MessageUsageRow
     warm_budget_tokens: 20_000,
     facts_budget_tokens: 10_000,
     context_window_tokens: 100_000,
+    cache_creation_input_tokens: null,
+    cache_read_input_tokens: null,
     ...overrides,
   };
 }
@@ -198,6 +200,10 @@ describe("messageUsageToResponse", () => {
       warm_budget_tokens: 20_000,
       facts_budget_tokens: 10_000,
       context_window_tokens: 100_000,
+      // Anthropic prompt-cache breakdown carries through. NULL by default
+      // (legacy rows + non-Anthropic providers) — see PR #181 follow-up.
+      cache_creation_input_tokens: null,
+      cache_read_input_tokens: null,
     });
     // Cost + provenance fields stay server-side; the bar doesn't need them.
     expect(out).not.toHaveProperty("cost_usd");
@@ -205,6 +211,15 @@ describe("messageUsageToResponse", () => {
     expect(out).not.toHaveProperty("agent_id");
   });
+  it("surfaces Anthropic cache token breakdown when populated", () => {
+    const out = messageUsageToResponse(makeUsageRow({
+      cache_creation_input_tokens: 4_000,
+      cache_read_input_tokens: 80_000,
+    }))!;
+    expect(out.cache_creation_input_tokens).toBe(4_000);
+    expect(out.cache_read_input_tokens).toBe(80_000);
+  });
   it("preserves NULL tier columns for legacy snapshots", () => {
     const out = messageUsageToResponse(makeUsageRow({
       hot_tokens: null, warm_tokens: null, facts_tokens: null, overhead_tokens: null,

package/lib/api/serializers.ts CHANGED Viewed

@@ -78,6 +78,12 @@ export function mcpServerToResponse(r: McpServerRow) {
  * chat panel's `ContextUsageBar` consumes. Returns `null` for messages
  * that have no snapshot (user turns and legacy assistant rows recorded
  * before the per-turn snapshot landed in ADR-0041).
+ *
+ * Anthropic prompt-cache tokens (PR #181 + the cache-fidelity follow-up)
+ * are surfaced as additive fields so future UI work can render a
+ * "served from cache" badge without another wire change. Both are
+ * `null` for rows that predate cache plumbing or for providers that
+ * don't expose a cache breakdown.
  */
 export function messageUsageToResponse(u: MessageUsageRow | undefined | null) {
   if (!u) return null;
@@ -92,6 +98,8 @@ export function messageUsageToResponse(u: MessageUsageRow | undefined | null) {
     warm_budget_tokens: u.warm_budget_tokens,
     facts_budget_tokens: u.facts_budget_tokens,
     context_window_tokens: u.context_window_tokens,
+    cache_creation_input_tokens: u.cache_creation_input_tokens,
+    cache_read_input_tokens: u.cache_read_input_tokens,
   };
 }

package/lib/db/migrations.ts CHANGED Viewed

@@ -283,6 +283,7 @@ export function runMigrations(db: DatabaseSync): void {
   ensureScheduledTasksReactionKindColumns(db);
   ensureMessageUsageTable(db);
   ensureMessageUsageTierColumns(db);
+  ensureMessageUsageCacheColumns(db);
   ensureThreadContextPinColumns(db);
   ensureThreadChannelSummariesTable(db);
   seedModelConfigs(db);
@@ -728,6 +729,20 @@ function ensureMessageUsageTierColumns(db: DatabaseSync): void {
   if (!names.has("context_window_tokens")) db.exec("ALTER TABLE message_usage ADD COLUMN context_window_tokens INTEGER");
 }
+// PR #181 enabled Anthropic prompt caching, but the per-turn usage snapshot
+// only captured `input_tokens` / `output_tokens`. Anthropic returns cache
+// reads and writes as separate counts (priced at 0.1× and 1.25× the input
+// rate respectively), so without these columns the dashboard underreports
+// cost on cache-creating turns and *over*reports on cache-hitting turns.
+// Both columns are nullable: legacy rows and non-Anthropic providers leave
+// them NULL.
+function ensureMessageUsageCacheColumns(db: DatabaseSync): void {
+  const cols = db.prepare("PRAGMA table_info(message_usage)").all() as Array<{ name: string }>;
+  const names = new Set(cols.map((c) => c.name));
+  if (!names.has("cache_creation_input_tokens")) db.exec("ALTER TABLE message_usage ADD COLUMN cache_creation_input_tokens INTEGER");
+  if (!names.has("cache_read_input_tokens"))     db.exec("ALTER TABLE message_usage ADD COLUMN cache_read_input_tokens INTEGER");
+}
 function seedAgentConfigs(db: DatabaseSync): void {
   // Only seed on first run — once the user has any agents we must not
   // resurrect ones they've deleted (e.g. the legacy "echo" / "llm" defaults).

package/lib/health/runner.test.ts CHANGED Viewed

@@ -46,8 +46,30 @@ describe("health runner", () => {
     for (const k of ["atlassian", "github", "google", "gmail", "outlook", "anthropic", "jira_align"]) {
       deleteMemory("integrations", k);
     }
-    delete process.env.OPENAI_API_KEY;
-    delete process.env.DEEPSEEK_API_KEY;
+    // Several integrations have env-var fallbacks that bypass the
+    // `integrations` namespace (`_resolveGithubAuth` reads GITHUB_TOKEN /
+    // GH_TOKEN before falling back to the store; Atlassian and Jira Align
+    // do the same with their respective vars). The test runs in whatever
+    // shell the developer happens to have, which often has GITHUB_TOKEN
+    // set for the gh CLI — leaving an "unconfigured" assertion silently
+    // false. Clear them all here so each case opts in by setting only what
+    // it needs.
+    for (const v of [
+      "OPENAI_API_KEY",
+      "DEEPSEEK_API_KEY",
+      "GITHUB_TOKEN",
+      "GH_TOKEN",
+      "ATLASSIAN_URL",
+      "ATLASSIAN_EMAIL",
+      "ATLASSIAN_API_TOKEN",
+      "JIRA_ALIGN_URL",
+      "JIRA_ALIGN_TOKEN",
+      "GMAIL_CLIENT_ID",
+      "GMAIL_CLIENT_SECRET",
+      "GMAIL_REFRESH_TOKEN",
+    ]) {
+      delete process.env[v];
+    }
   });
   it("publishes a single alert on first failure (not one per cycle)", async () => {

package/lib/mcp/registry.ts CHANGED Viewed

@@ -1,9 +1,17 @@
-// MCP server registry types + variable substitution.
-//
-// Discovery is online via the official MCP Registry (see
-// `lib/mcp/upstream-registry.ts` and ADR-0013). Entries flow through this
-// module's types so the picker UI, install action, and `applyVariables`
-// substitution path stay decoupled from the upstream wire format.
+/**
+ * @public
+ *
+ * MCP server registry types + variable substitution.
+ *
+ * Discovery is online via the official MCP Registry (see
+ * `lib/mcp/upstream-registry.ts` and ADR-0013). Entries flow through this
+ * module's types so the picker UI, install action, and `applyVariables`
+ * substitution path stay decoupled from the upstream wire format.
+ *
+ * All exports in this file are part of the package's stable public
+ * surface (per `package.json#exports`). Removing or breaking them counts
+ * as a breaking change under the deprecation policy in CONTRIBUTING.md.
+ */
 export interface RegistryVariable {
   /** Unique key used as `${key}` in spec strings or env values. */

package/lib/providers/anthropic.test.ts ADDED Viewed

@@ -0,0 +1,95 @@
+import { describe, it, expect } from "vitest";
+import type Anthropic from "@anthropic-ai/sdk";
+import {
+  withSystemCacheControl,
+  withToolsCacheControl,
+  withLastToolResultCacheControl,
+} from "./anthropic";
+describe("withSystemCacheControl", () => {
+  it("wraps non-empty text in a TextBlockParam with ephemeral cache_control", () => {
+    expect(withSystemCacheControl("you are helpful")).toEqual([
+      { type: "text", text: "you are helpful", cache_control: { type: "ephemeral" } },
+    ]);
+  });
+  it("returns undefined for empty string so the system field is omitted", () => {
+    expect(withSystemCacheControl("")).toBeUndefined();
+  });
+});
+describe("withToolsCacheControl", () => {
+  const tool = (name: string): Anthropic.Tool => ({
+    name,
+    description: "",
+    input_schema: { type: "object", properties: {} } as Anthropic.Tool.InputSchema,
+  });
+  it("returns the input unchanged when no tools are provided", () => {
+    expect(withToolsCacheControl([])).toEqual([]);
+  });
+  it("marks only the last tool with cache_control", () => {
+    const out = withToolsCacheControl([tool("a"), tool("b"), tool("c")]);
+    expect(out).toHaveLength(3);
+    expect((out[0] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toBeUndefined();
+    expect((out[1] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toBeUndefined();
+    expect((out[2] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toEqual({ type: "ephemeral" });
+  });
+  it("does not mutate the input array", () => {
+    const tools = [tool("a"), tool("b")];
+    const snapshot = JSON.stringify(tools);
+    withToolsCacheControl(tools);
+    expect(JSON.stringify(tools)).toBe(snapshot);
+  });
+});
+describe("withLastToolResultCacheControl", () => {
+  it("returns messages unchanged when none contain a tool_result", () => {
+    const msgs: Anthropic.MessageParam[] = [
+      { role: "user", content: "hi" },
+      { role: "assistant", content: [{ type: "text", text: "hello" }] },
+    ];
+    expect(withLastToolResultCacheControl(msgs)).toEqual(msgs);
+  });
+  it("marks the last tool_result block in the last message that has one", () => {
+    const msgs: Anthropic.MessageParam[] = [
+      {
+        role: "user",
+        content: [
+          { type: "tool_result", tool_use_id: "t1", content: "old" },
+        ],
+      },
+      { role: "assistant", content: [{ type: "text", text: "thinking" }] },
+      {
+        role: "user",
+        content: [
+          { type: "tool_result", tool_use_id: "t2", content: "fresh-A" },
+          { type: "tool_result", tool_use_id: "t3", content: "fresh-B" },
+        ],
+      },
+    ];
+    const out = withLastToolResultCacheControl(msgs);
+    const lastMsgContent = out[out.length - 1].content as Anthropic.ContentBlockParam[];
+    expect((lastMsgContent[0] as { cache_control?: unknown }).cache_control).toBeUndefined();
+    expect((lastMsgContent[1] as { cache_control?: unknown }).cache_control).toEqual({ type: "ephemeral" });
+    // Older tool_result on prior message must remain unmarked — only the
+    // most recent breakpoint is needed for incremental within-turn caching.
+    const firstMsgContent = out[0].content as Anthropic.ContentBlockParam[];
+    expect((firstMsgContent[0] as { cache_control?: unknown }).cache_control).toBeUndefined();
+  });
+  it("does not mutate the input messages array", () => {
+    const msgs: Anthropic.MessageParam[] = [
+      {
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: "t1", content: "x" }],
+      },
+    ];
+    const snapshot = JSON.stringify(msgs);
+    withLastToolResultCacheControl(msgs);
+    expect(JSON.stringify(msgs)).toBe(snapshot);
+  });
+});