npm - @circuitwall/jarela - Versions diffs - 0.14.0 → 1.0.0 - Mend

@circuitwall/jarela 0.14.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

package/lib/providers/anthropic.ts CHANGED Viewed

@@ -53,6 +53,80 @@ function appendServerTools(
   return merged;
 }
+// Anthropic prompt-caching breakpoints. Within a multi-tool ReAct turn the
+// system prompt + tools are stable across every LLM call, and tool_results
+// only grow at the tail — exactly the prefix Anthropic's ephemeral cache is
+// built for. We mark three breakpoints (system, last tool, last tool_result)
+// so calls 2..N read the prefix at ~10% the input rate. The prefix below the
+// minimum cacheable size is silently ignored by the API at no extra cost,
+// so it is safe to mark unconditionally.
+const EPHEMERAL: Anthropic.CacheControlEphemeral = { type: "ephemeral" };
+export function withSystemCacheControl(text: string): Anthropic.TextBlockParam[] | undefined {
+  if (!text) return undefined;
+  return [{ type: "text", text, cache_control: EPHEMERAL }];
+}
+export function withToolsCacheControl(tools: Anthropic.Tool[]): Anthropic.Tool[] {
+  if (tools.length === 0) return tools;
+  const last = tools[tools.length - 1];
+  return [...tools.slice(0, -1), { ...last, cache_control: EPHEMERAL }];
+}
+export function withLastToolResultCacheControl(
+  messages: Anthropic.MessageParam[],
+): Anthropic.MessageParam[] {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i];
+    if (typeof m.content === "string") continue;
+    const blocks = m.content;
+    for (let j = blocks.length - 1; j >= 0; j--) {
+      const b = blocks[j];
+      if (b.type === "tool_result") {
+        const newBlocks = [...blocks];
+        newBlocks[j] = { ...b, cache_control: EPHEMERAL };
+        const next = [...messages];
+        next[i] = { ...m, content: newBlocks };
+        return next;
+      }
+    }
+  }
+  return messages;
+}
+interface AnthropicMessageStartEvent {
+  type: "message_start";
+  message: { usage?: Anthropic.Usage };
+}
+interface AnthropicMessageDeltaEvent {
+  type: "message_delta";
+  usage?: Anthropic.MessageDeltaUsage;
+}
+function usageEventFromStart(usage: Anthropic.Usage | undefined): ProviderStreamEvent | null {
+  if (!usage) return null;
+  return {
+    type: "usage",
+    input_tokens: usage.input_tokens ?? 0,
+    output_tokens: usage.output_tokens ?? 0,
+    cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
+    cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
+  };
+}
+function usageEventFromDelta(usage: Anthropic.MessageDeltaUsage | undefined): ProviderStreamEvent | null {
+  if (!usage) return null;
+  // message_delta only carries the *final* output_tokens; input/cache fields
+  // are already accounted for from message_start. Emitting just the output
+  // delta here keeps the agent loop's running total accurate without
+  // double-counting cache reads.
+  return {
+    type: "usage",
+    input_tokens: 0,
+    output_tokens: usage.output_tokens ?? 0,
+  };
+}
 export const anthropicProvider: ModelProvider = {
   name: "anthropic",
@@ -80,7 +154,7 @@ export const anthropicProvider: ModelProvider = {
     const stream = await client.messages.stream({
       model: model_id,
       max_tokens: params.max_tokens ?? 4096,
-      system: systemText || undefined,
+      system: withSystemCacheControl(systemText),
       messages: userMessages,
     });
@@ -103,13 +177,18 @@ export const anthropicProvider: ModelProvider = {
     });
     const systemMsg = messages.find((m) => m.role === "system");
-    const msgList = toAnthropicMessages(messages.filter((m) => m.role !== "system"));
-    const anthropicTools = appendServerTools(toAnthropicTools(tools), params);
+    const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
+    const msgList = withLastToolResultCacheControl(
+      toAnthropicMessages(messages.filter((m) => m.role !== "system")),
+    );
+    const anthropicTools = withToolsCacheControl(
+      appendServerTools(toAnthropicTools(tools), params),
+    );
     const resp = await client.messages.create({
       model: model_id,
       max_tokens: params.max_tokens ?? 4096,
-      system: typeof systemMsg?.content === "string" ? systemMsg.content : undefined,
+      system: withSystemCacheControl(systemText),
       messages: msgList,
       tools: anthropicTools,
       ...(params.thinking ? { thinking: params.thinking } : {}),
@@ -141,8 +220,13 @@ export const anthropicProvider: ModelProvider = {
       });
       const systemMsg = messages.find((m) => m.role === "system");
-      const msgList = toAnthropicMessages(messages.filter((m) => m.role !== "system"));
-      const anthropicTools = appendServerTools(toAnthropicTools(tools), params);
+      const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
+      const msgList = withLastToolResultCacheControl(
+        toAnthropicMessages(messages.filter((m) => m.role !== "system")),
+      );
+      const anthropicTools = withToolsCacheControl(
+        appendServerTools(toAnthropicTools(tools), params),
+      );
       const body: Anthropic.Messages.MessageStreamParams = {
         model: model_id,
@@ -150,7 +234,8 @@ export const anthropicProvider: ModelProvider = {
         messages: msgList,
         ...(pickAnthropicOptions(params) as Record<string, unknown>),
       };
-      if (typeof systemMsg?.content === "string") body.system = systemMsg.content;
+      const systemParam = withSystemCacheControl(systemText);
+      if (systemParam) body.system = systemParam;
       if (anthropicTools.length > 0) body.tools = anthropicTools;
       if (params.thinking) {
         (body as unknown as Record<string, unknown>).thinking = params.thinking;
@@ -161,6 +246,12 @@ export const anthropicProvider: ModelProvider = {
       const blockType = new Map<number, "text" | "thinking" | "tool_use">();
       for await (const event of stream) {
+        if (event.type === "message_start") {
+          const ev = event as unknown as AnthropicMessageStartEvent;
+          const u = usageEventFromStart(ev.message?.usage);
+          if (u) yield u;
+          continue;
+        }
         if (event.type === "content_block_start") {
           const cb = event.content_block;
           if (cb.type === "tool_use") {
@@ -180,9 +271,14 @@ export const anthropicProvider: ModelProvider = {
           } else if (d.type === "input_json_delta" && d.partial_json !== undefined) {
             yield { type: "tool_call_chunk", index: event.index, args_delta: d.partial_json };
           }
-        } else if (event.type === "message_delta" && event.delta?.stop_reason) {
-          const reason = event.delta.stop_reason;
-          yield { type: "stop", reason: reason === "tool_use" ? "tool_use" : reason === "max_tokens" ? "length" : "stop" };
+        } else if (event.type === "message_delta") {
+          const delta = event as unknown as AnthropicMessageDeltaEvent;
+          const u = usageEventFromDelta(delta.usage);
+          if (u) yield u;
+          if (event.delta?.stop_reason) {
+            const reason = event.delta.stop_reason;
+            yield { type: "stop", reason: reason === "tool_use" ? "tool_use" : reason === "max_tokens" ? "length" : "stop" };
+          }
         }
       }
     })();

package/lib/providers/jarela-chat-model.ts CHANGED Viewed

@@ -203,8 +203,13 @@ export class JarelaChatModel extends BaseChatModel {
       } else if (event.type === "usage") {
         // ADR-0041: surface real provider token counts on the final
         // AIMessageChunk via LangChain's standard `usage_metadata` field so
-        // the agent loop can snapshot them into message_usage.
+        // the agent loop can snapshot them into message_usage. PR #181 added
+        // Anthropic prompt caching; carry the cache breakdown through
+        // `input_token_details` (LangChain's standard channel) so cost
+        // attribution downstream can apply the 1.25× / 0.1× rates.
         emittedAny = true;
+        const cacheCreation = event.cache_creation_input_tokens ?? 0;
+        const cacheRead = event.cache_read_input_tokens ?? 0;
         yield new ChatGenerationChunk({
           message: new AIMessageChunk({
             content: "",
@@ -213,6 +218,9 @@ export class JarelaChatModel extends BaseChatModel {
               output_tokens: event.output_tokens ?? 0,
               total_tokens: event.total_tokens
                 ?? (event.input_tokens ?? 0) + (event.output_tokens ?? 0),
+              ...(cacheCreation > 0 || cacheRead > 0
+                ? { input_token_details: { cache_creation: cacheCreation, cache_read: cacheRead } }
+                : {}),
             },
           }),
           text: "",

package/lib/providers/known-context-windows.ts CHANGED Viewed

@@ -119,3 +119,24 @@ export function getKnownContextLength(provider: string, model_id: string): numbe
 export function getKnownMaxOutputTokens(provider: string, model_id: string): number | null {
   return getKnownModelLimits(provider, model_id)?.max_output_tokens ?? null;
 }
+// Flat catalog snapshot for one provider — used by introspection tools so
+// the agent can enumerate what's known statically. Returns [] for providers
+// without a static table (e.g. `langchain`, `mock`, externals).
+export function listKnownModels(
+  provider: string,
+): Array<{ model_id: string; context_length: number; max_output_tokens: number | null }> {
+  let table: Record<string, KnownModelLimits> | null = null;
+  switch (provider) {
+    case "anthropic": table = ANTHROPIC; break;
+    case "gemini": table = GEMINI; break;
+    case "openai": table = OPENAI; break;
+    case "deepseek": table = DEEPSEEK; break;
+    default: return [];
+  }
+  return Object.entries(table).map(([model_id, l]) => ({
+    model_id,
+    context_length: l.context_length,
+    max_output_tokens: l.max_output_tokens ?? null,
+  }));
+}

package/lib/providers/types.ts CHANGED Viewed

@@ -1,3 +1,16 @@
+/**
+ * @public
+ *
+ * Public LLM-provider extension contract.
+ *
+ * Every type and interface in this file is part of the package's
+ * stable public surface (per `package.json#exports`). External provider
+ * adapters — both in-tree and `~/.jarela/providers/*.cjs` plugins —
+ * conform to {@link ModelProvider}. Removing or breaking any export
+ * here counts as a breaking change under the deprecation policy in
+ * CONTRIBUTING.md.
+ */
 import type { ContentPart, InvokeMessage, InvokeResult, OpenAITool } from "@/lib/tools/types";
 export type { InvokeMessage, InvokeResult, OpenAITool };
@@ -44,7 +57,14 @@ export type ProviderStreamEvent =
   | { type: "thinking"; delta: string }
   | { type: "tool_call_chunk"; index: number; id?: string; name?: string; args_delta?: string }
   | { type: "citation"; source?: string; snippet?: string; url?: string }
-  | { type: "usage"; input_tokens?: number; output_tokens?: number; total_tokens?: number }
+  | {
+      type: "usage";
+      input_tokens?: number;
+      output_tokens?: number;
+      total_tokens?: number;
+      cache_creation_input_tokens?: number;
+      cache_read_input_tokens?: number;
+    }
   | { type: "audio_chunk"; mime_type: string; data_b64: string }
   | { type: "provider_event"; name: string; payload: unknown }
   | { type: "stop"; reason: "stop" | "tool_use" | "length" };

package/lib/stores/message-usage.test.ts CHANGED Viewed

@@ -231,4 +231,38 @@ describe("message_usage snapshot store (ADR-0041)", () => {
     const map = getMessageUsageByIds([]);
     expect(map.size).toBe(0);
   });
+  it("persists Anthropic cache_creation/cache_read token counts (PR #181 follow-up)", () => {
+    recordMessageUsage({
+      message_id: "m-cache",
+      thread_id: "t-cache",
+      agent_id: "a", agent_name: "A",
+      provider: "anthropic", model_id: "claude-sonnet-4", model_config_name: null,
+      input_tokens: 1200,
+      output_tokens: 350,
+      cache_creation_input_tokens: 4000,
+      cache_read_input_tokens: 80_000,
+      input_rate_usd_per_mtok: 3,
+      output_rate_usd_per_mtok: 15,
+      cost_usd: 0.04,
+    });
+    const row = getMessageUsage("m-cache");
+    expect(row?.cache_creation_input_tokens).toBe(4000);
+    expect(row?.cache_read_input_tokens).toBe(80_000);
+  });
+  it("stores NULL cache columns for legacy rows that omit them", () => {
+    recordMessageUsage({
+      message_id: "m-no-cache",
+      thread_id: "t-no-cache",
+      agent_id: "a", agent_name: "A",
+      provider: "openai", model_id: "gpt-5", model_config_name: null,
+      input_tokens: 10, output_tokens: 20,
+      input_rate_usd_per_mtok: null, output_rate_usd_per_mtok: null,
+      cost_usd: 0,
+    });
+    const row = getMessageUsage("m-no-cache");
+    expect(row?.cache_creation_input_tokens).toBeNull();
+    expect(row?.cache_read_input_tokens).toBeNull();
+  });
 });

package/lib/stores/message-usage.ts CHANGED Viewed

@@ -24,6 +24,13 @@ export interface MessageUsageInput {
   // assembly. NULL/undefined when unknown (very old assistant turns
   // persisted before the breakdown was wired up, or non-LLM persists).
   tier_usage?: TierUsage | null;
+  // Anthropic prompt caching (PR #181). Both fields are disjoint from
+  // `input_tokens`: total billable input tokens =
+  //   input_tokens + cache_creation_input_tokens + cache_read_input_tokens
+  // priced at 1×, 1.25×, and 0.1× the input rate respectively. NULL/zero
+  // for providers that don't expose cache counts.
+  cache_creation_input_tokens?: number | null;
+  cache_read_input_tokens?: number | null;
 }
 export interface TierUsage {
@@ -37,7 +44,7 @@ export interface TierUsage {
   context_window_tokens: number;
 }
-export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
+export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage" | "cache_creation_input_tokens" | "cache_read_input_tokens"> {
   created_at: string;
   hot_tokens: number | null;
   warm_tokens: number | null;
@@ -47,6 +54,8 @@ export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
   warm_budget_tokens: number | null;
   facts_budget_tokens: number | null;
   context_window_tokens: number | null;
+  cache_creation_input_tokens: number | null;
+  cache_read_input_tokens: number | null;
 }
 export function recordMessageUsage(input: MessageUsageInput): void {
@@ -58,8 +67,9 @@ export function recordMessageUsage(input: MessageUsageInput): void {
        model_config_name, input_tokens, output_tokens,
        input_rate_usd_per_mtok, output_rate_usd_per_mtok, cost_usd, created_at,
        hot_tokens, warm_tokens, facts_tokens, overhead_tokens,
-       hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens
-     ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`,
+       hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens,
+       cache_creation_input_tokens, cache_read_input_tokens
+     ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`,
   ).run(
     input.message_id,
     input.thread_id,
@@ -82,6 +92,8 @@ export function recordMessageUsage(input: MessageUsageInput): void {
     t?.warm_budget_tokens ?? null,
     t?.facts_budget_tokens ?? null,
     t?.context_window_tokens ?? null,
+    input.cache_creation_input_tokens ?? null,
+    input.cache_read_input_tokens ?? null,
   );
 }

package/lib/stores/pricing.test.ts CHANGED Viewed

@@ -80,6 +80,58 @@ describe("estimateCostUsd", () => {
   it("scales sub-million token counts proportionally", () => {
     expect(estimateCostUsd(500_000, 100_000, { inputPer1M: 2, outputPer1M: 10 })).toBeCloseTo(2, 6);
   });
+  describe("anthropic prompt-cache pricing", () => {
+    const rates = { inputPer1M: 3, outputPer1M: 15 };
+    it("ignores cache breakdown when not provided", () => {
+      // Sanity: existing call signature unchanged.
+      expect(estimateCostUsd(1_000_000, 0, rates)).toBe(3);
+    });
+    it("prices cache writes at 1.25× the input rate", () => {
+      // 1M cache_creation tokens × $3/M × 1.25 = $3.75
+      expect(
+        estimateCostUsd(0, 0, rates, { cache_creation_input_tokens: 1_000_000 }),
+      ).toBeCloseTo(3.75, 6);
+    });
+    it("prices cache reads at 0.1× the input rate", () => {
+      // 1M cache_read tokens × $3/M × 0.1 = $0.30
+      expect(
+        estimateCostUsd(0, 0, rates, { cache_read_input_tokens: 1_000_000 }),
+      ).toBeCloseTo(0.3, 6);
+    });
+    it("sums fresh + cache_creation + cache_read + output (Anthropic-style turn)", () => {
+      // 100k fresh input ($0.30) + 50k cache_creation ($0.1875)
+      // + 800k cache_read ($0.24) + 20k output ($0.30) = $1.0275
+      const cost = estimateCostUsd(100_000, 20_000, rates, {
+        cache_creation_input_tokens: 50_000,
+        cache_read_input_tokens: 800_000,
+      });
+      expect(cost).toBeCloseTo(0.3 + 0.1875 + 0.24 + 0.3, 6);
+    });
+    it("does not double-bill cache when input rate is null", () => {
+      // No input rate → cache multipliers have nothing to multiply against.
+      expect(
+        estimateCostUsd(0, 100_000, { inputPer1M: null, outputPer1M: 5 }, {
+          cache_creation_input_tokens: 1_000_000,
+          cache_read_input_tokens: 1_000_000,
+        }),
+      ).toBeCloseTo(0.5, 6);
+    });
+    it("treats nullish cache fields as zero", () => {
+      expect(
+        estimateCostUsd(0, 0, rates, {
+          cache_creation_input_tokens: null,
+          cache_read_input_tokens: undefined,
+        }),
+      ).toBe(0);
+    });
+  });
 });
 describe("getPricingTables", () => {

package/lib/stores/pricing.ts CHANGED Viewed

@@ -317,15 +317,40 @@ function inferRatesFromSignals(signals: string[]): {
   };
 }
+// Anthropic prompt-cache multipliers, applied against the standard input
+// rate. Cache writes are billed at 1.25× input ("cache create"); cache
+// reads are billed at 0.1× input ("cache hit"). Source:
+// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
+// We apply the same multipliers to other providers that publish a
+// cache-token breakdown — OpenAI's prompt caching, for example, also
+// quotes a 0.5× read multiplier, but its API surfaces only `cached_tokens`
+// (no separate write count) and a future PR can split that out. For now
+// any provider that emits both fields will be priced as above.
+export const CACHE_CREATION_INPUT_RATE_MULTIPLIER = 1.25;
+export const CACHE_READ_INPUT_RATE_MULTIPLIER = 0.1;
+export interface CacheTokenBreakdown {
+  cache_creation_input_tokens?: number | null;
+  cache_read_input_tokens?: number | null;
+}
 export function estimateCostUsd(
   inputTokens: number,
   outputTokens: number,
   rates: Pick<ProviderRates, "inputPer1M" | "outputPer1M">,
+  cache?: CacheTokenBreakdown | null,
 ): number {
   const inputRate = rates.inputPer1M;
   const outputRate = rates.outputPer1M;
   if (inputRate == null && outputRate == null) return 0;
   const inCost = inputRate == null ? 0 : (inputTokens / 1_000_000) * inputRate;
   const outCost = outputRate == null ? 0 : (outputTokens / 1_000_000) * outputRate;
-  return inCost + outCost;
+  let cacheCost = 0;
+  if (cache && inputRate != null) {
+    const create = cache.cache_creation_input_tokens ?? 0;
+    const read = cache.cache_read_input_tokens ?? 0;
+    if (create > 0) cacheCost += (create / 1_000_000) * inputRate * CACHE_CREATION_INPUT_RATE_MULTIPLIER;
+    if (read > 0)   cacheCost += (read   / 1_000_000) * inputRate * CACHE_READ_INPUT_RATE_MULTIPLIER;
+  }
+  return inCost + outCost + cacheCost;
 }

package/lib/tools/builtins.ts CHANGED Viewed

@@ -29,3 +29,7 @@ import "./outlook";
 import "./outlook-calendar";
 import "./delegate";
 import "./system_config";
+import "./list-tools";
+import "./providers-info";
+import "./mcp-servers-info";
+import "./extension-surfaces";

package/lib/tools/extension-surfaces.test.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import { describe, it, expect, afterAll } from "vitest";
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+const tmpRoot = mkdtempSync(join(tmpdir(), "jarela-test-extension-surfaces-"));
+process.env.HOME = tmpRoot;
+process.env.USERPROFILE = tmpRoot;
+process.env.JARELA_DB_DIR = join(tmpRoot, ".jarela-dbdir");
+afterAll(() => {
+  try { rmSync(tmpRoot, { recursive: true, force: true }); } catch {}
+});
+const { describeExtensionSurfacesTool } = await import("./extension-surfaces");
+interface Surface {
+  id: string;
+  name: string;
+  summary: string;
+  registration_entrypoint: string;
+  doc_section: string;
+  example_path?: string;
+  introspection_tool?: string;
+  related_adrs: string[];
+}
+interface Result {
+  surfaces: Surface[];
+  count: number;
+  guide_path: string;
+  contract_paths: string[];
+  notes: string[];
+}
+describe("describe_extension_surfaces", () => {
+  let out: Result;
+  it("returns the curated catalog with all required fields", async () => {
+    out = JSON.parse(await describeExtensionSurfacesTool.invoke({})) as Result;
+    expect(out.count).toBe(out.surfaces.length);
+    expect(out.guide_path).toBe("docs/EXTENDING.md");
+    expect(out.contract_paths.length).toBeGreaterThan(0);
+    expect(out.notes.length).toBeGreaterThan(0);
+  });
+  it("includes the core extension points", () => {
+    const ids = out.surfaces.map((s) => s.id).sort();
+    expect(ids).toContain("llm_provider_builtin");
+    expect(ids).toContain("llm_provider_external");
+    expect(ids).toContain("builtin_tool");
+    expect(ids).toContain("mcp_server");
+    expect(ids).toContain("agent_harness");
+    expect(ids).toContain("integration_manifest");
+    expect(ids).toContain("brand_overlay");
+  });
+  it("every surface has a registration entrypoint, doc section, and at least one ADR reference", () => {
+    for (const s of out.surfaces) {
+      expect(s.registration_entrypoint).toBeTruthy();
+      expect(s.doc_section.startsWith("docs/EXTENDING.md#")).toBe(true);
+      expect(Array.isArray(s.related_adrs)).toBe(true);
+      expect(s.related_adrs.length).toBeGreaterThan(0);
+    }
+  });
+  it("introspection_tool references match real tool names", () => {
+    const expectedTools = new Set([
+      "list_providers",
+      "list_tools",
+      "list_mcp_servers",
+      "list_integrations",
+    ]);
+    for (const s of out.surfaces) {
+      if (s.introspection_tool) {
+        expect(expectedTools.has(s.introspection_tool)).toBe(true);
+      }
+    }
+  });
+});