npm - @deepstrike/wasm - Versions diffs - 0.2.11 → 0.2.12 - Mend

@deepstrike/wasm 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/providers/anthropic.js +86 -12
package/dist/providers/base.d.ts +5 -0
package/dist/providers/base.js +38 -3
package/dist/types.d.ts +23 -0
package/package.json +2 -2

package/dist/providers/anthropic.js CHANGED Viewed

@@ -1,12 +1,61 @@
 import { assistantReplayKey, collectStreamMessage, toAnthropicMessages } from "./base.js";
-function buildAnthropicTools(tools) {
+/** Anthropic accepts at most this many cache_control breakpoints per request. */
+const MAX_CACHE_BREAKPOINTS = 4;
+/** Rolling cache breakpoints reserved for the message history (system uses ≤2). */
+const MESSAGE_CACHE_BREAKPOINTS = 2;
+function buildAnthropicTools(tools, anchorCache) {
     return tools.map((t, i) => ({
         name: t.name,
         description: t.description,
         input_schema: JSON.parse(t.parameters),
-        ...(i === tools.length - 1 ? { cache_control: { type: "ephemeral" } } : {}),
+        // Anchor a tool breakpoint only when the system blocks won't carry one;
+        // otherwise systemStable already caches the tools prefix (tools render
+        // first), and a redundant tool breakpoint would burn a slot the message
+        // history needs to stay within the 4-breakpoint budget.
+        ...(anchorCache && i === tools.length - 1 ? { cache_control: { type: "ephemeral" } } : {}),
     }));
 }
+/**
+ * Roll cache breakpoints across the conversation tail so the message-history
+ * prefix is written once and re-read on later turns (without this the cached
+ * prefix stops at the end of `system` and the whole tool-result history is
+ * re-billed at full input price every turn). Marks the final message plus the
+ * nearest preceding user turn (read anchor); a bare string body is promoted to
+ * a cache-bearing text block.
+ */
+function applyMessageCacheControl(msgs) {
+    if (!msgs.length)
+        return;
+    const targets = new Set([msgs.length - 1]);
+    for (let i = msgs.length - 2; i >= 0 && targets.size < MESSAGE_CACHE_BREAKPOINTS; i--) {
+        if (msgs[i].role === "user")
+            targets.add(i);
+    }
+    for (const idx of targets)
+        markLastBlockCacheable(msgs[idx]);
+}
+function markLastBlockCacheable(msg) {
+    const cache_control = { type: "ephemeral" };
+    if (typeof msg.content === "string") {
+        if (!msg.content)
+            return;
+        msg.content = [{ type: "text", text: msg.content, cache_control }];
+        return;
+    }
+    if (Array.isArray(msg.content) && msg.content.length) {
+        const last = msg.content[msg.content.length - 1];
+        last.cache_control = cache_control;
+    }
+}
+/** Regression guard: fail loudly before the API would reject the request for
+ *  exceeding the cache_control breakpoint limit. */
+function assertCacheBudget(system, toolCount) {
+    const systemBreakpoints = Array.isArray(system) ? system.length : 0;
+    const toolBreakpoints = toolCount > 0 && !Array.isArray(system) ? 1 : 0;
+    if (systemBreakpoints + toolBreakpoints + MESSAGE_CACHE_BREAKPOINTS > MAX_CACHE_BREAKPOINTS) {
+        throw new Error(`Anthropic cache_control budget exceeded: ${systemBreakpoints} system + ${toolBreakpoints} tool + ${MESSAGE_CACHE_BREAKPOINTS} message > ${MAX_CACHE_BREAKPOINTS}`);
+    }
+}
 export class AnthropicProvider {
     apiKey;
     model;
@@ -61,13 +110,23 @@ export class AnthropicProvider {
         }
         const system = systemBlocks.length ? systemBlocks : (context.systemText || undefined);
         const msgs = toAnthropicMessages(context, message => this.nativeAssistantBlocks.get(assistantReplayKey(message)));
+        applyMessageCacheControl(msgs);
+        // Append the volatile State turn AFTER the cache breakpoints (uncached tail);
+        // absent on un-rebuilt bindings, where the state is already inside `turns`.
+        if (context.stateTurn) {
+            msgs.push({
+                role: context.stateTurn.role === "assistant" ? "assistant" : "user",
+                content: context.stateTurn.content,
+            });
+        }
+        assertCacheBudget(system, tools.length);
         const body = {
             model: this.model,
             max_tokens: this.maxTokens,
             messages: msgs,
             stream: true,
             ...(system ? { system } : {}),
-            ...(tools.length ? { tools: buildAnthropicTools(tools) } : {}),
+            ...(tools.length ? { tools: buildAnthropicTools(tools, !Array.isArray(system)) } : {}),
         };
         if (extensions?.enable_thinking) {
             body.thinking = { type: "enabled", budget_tokens: 8000 };
@@ -91,6 +150,10 @@ export class AnthropicProvider {
         const reader = resp.body.getReader();
         const decoder = new TextDecoder();
         let buf = "";
+        let uncachedInput = 0;
+        let cacheReadTokens = 0;
+        let cacheCreationTokens = 0;
+        let outputTokens = 0;
         while (true) {
             const { done, value } = await reader.read();
             if (done)
@@ -108,15 +171,26 @@ export class AnthropicProvider {
                     const evt = JSON.parse(data);
                     if (evt.type === "message_start" || evt.type === "message_delta") {
                         const usage = (evt.usage ?? evt.message?.usage);
-                        if (usage?.input_tokens != null) {
-                            const inputTokens = usage.input_tokens ?? 0;
-                            const outputTokens = usage.output_tokens ?? 0;
-                            yield {
-                                type: "usage",
-                                totalTokens: inputTokens + outputTokens,
-                                inputTokens,
-                                outputTokens,
-                            };
+                        if (usage) {
+                            // input + cache counts are pinned at message_start; a later
+                            // message_delta may omit them — Math.max prevents zeroing.
+                            uncachedInput = Math.max(uncachedInput, usage.input_tokens ?? 0);
+                            cacheReadTokens = Math.max(cacheReadTokens, usage.cache_read_input_tokens ?? 0);
+                            cacheCreationTokens = Math.max(cacheCreationTokens, usage.cache_creation_input_tokens ?? 0);
+                            outputTokens = Math.max(outputTokens, usage.output_tokens ?? 0);
+                            // inputTokens is the FULL prompt (uncached + cache read + write):
+                            // the kernel reads it as the authoritative context size.
+                            const inputTokens = uncachedInput + cacheReadTokens + cacheCreationTokens;
+                            if (inputTokens > 0 || outputTokens > 0) {
+                                yield {
+                                    type: "usage",
+                                    totalTokens: inputTokens + outputTokens,
+                                    inputTokens,
+                                    outputTokens,
+                                    cacheReadInputTokens: cacheReadTokens,
+                                    cacheCreationInputTokens: cacheCreationTokens,
+                                };
+                            }
                         }
                     }
                     else if (evt.type === "content_block_start") {

package/dist/providers/base.d.ts CHANGED Viewed

@@ -1,5 +1,10 @@
 import type { Message, RenderedContext } from "../types.js";
 import { assistantReplayKey } from "../runtime/provider-replay.js";
+/** History turns with the volatile State turn appended as the latest turn
+ *  (OpenAI), keeping the history a stable cacheable prefix. Anthropic appends it
+ *  after the cache breakpoint. Absent on un-rebuilt bindings — then the state is
+ *  already inside `turns`. */
+export declare function turnsWithStateAppended(context: RenderedContext): Message[];
 /** Build OpenAI-compatible chat messages from a RenderedContext. */
 export declare function toOpenAIMessages(context: RenderedContext): Array<Record<string, unknown>>;
 export declare function toAnthropicMessages(context: RenderedContext, nativeReplay?: (message: Message) => Array<Record<string, unknown>> | undefined): Array<Record<string, unknown>>;

package/dist/providers/base.js CHANGED Viewed

@@ -7,18 +7,50 @@ function parseToolArguments(args) {
         return {};
     }
 }
+/** Multimodal: OpenAI content blocks from contentParts (text + image). */
+function openAIPartsContent(parts) {
+    return parts.map(p => {
+        if (p.type === "image") {
+            const url = p.data ? `data:${p.mediaType ?? "image/png"};base64,${p.data}` : (p.url ?? "");
+            return { type: "image_url", image_url: { url, ...(p.detail ? { detail: p.detail } : {}) } };
+        }
+        return { type: "text", text: p.text ?? p.output ?? "" };
+    });
+}
+/** Multimodal: Anthropic content blocks from contentParts (text + image). */
+function anthropicPartsContent(parts) {
+    return parts.map(p => {
+        if (p.type === "image") {
+            const source = p.data
+                ? { type: "base64", media_type: p.mediaType ?? "image/png", data: p.data }
+                : { type: "url", url: p.url ?? "" };
+            return { type: "image", source };
+        }
+        return { type: "text", text: p.text ?? p.output ?? "" };
+    });
+}
+/** History turns with the volatile State turn appended as the latest turn
+ *  (OpenAI), keeping the history a stable cacheable prefix. Anthropic appends it
+ *  after the cache breakpoint. Absent on un-rebuilt bindings — then the state is
+ *  already inside `turns`. */
+export function turnsWithStateAppended(context) {
+    return context.stateTurn ? [...context.turns, context.stateTurn] : context.turns;
+}
 /** Build OpenAI-compatible chat messages from a RenderedContext. */
 export function toOpenAIMessages(context) {
     const messages = [];
     if (context.systemText) {
         messages.push({ role: "system", content: context.systemText });
     }
-    for (const msg of context.turns) {
+    for (const msg of turnsWithStateAppended(context)) {
         if (msg.role === "tool") {
             messages.push({ role: "tool", content: msg.content });
             continue;
         }
-        const next = { role: msg.role, content: msg.content };
+        const next = {
+            role: msg.role,
+            content: msg.contentParts?.length ? openAIPartsContent(msg.contentParts) : msg.content,
+        };
         if (msg.role === "assistant" && msg.toolCalls?.length) {
             next.tool_calls = msg.toolCalls.map(tc => ({
                 id: tc.id,
@@ -55,7 +87,10 @@ export function toAnthropicMessages(context, nativeReplay) {
             result.push({ role: "assistant", content: blocks });
             continue;
         }
-        result.push({ role: msg.role, content: msg.content });
+        result.push({
+            role: msg.role,
+            content: msg.contentParts?.length ? anthropicPartsContent(msg.contentParts) : msg.content,
+        });
     }
     if (context.systemVolatile && result.length > 0) {
         const last = result[result.length - 1];

package/dist/types.d.ts CHANGED Viewed

@@ -1,9 +1,27 @@
 import type { WorkflowNodeSpec } from "./runtime/types/agent.js";
+export interface ContentPart {
+    type: "text" | "image" | "audio" | "tool_result";
+    text?: string;
+    /** Remote image URL (mutually exclusive with `data`). */
+    url?: string;
+    /** Raw base64-encoded bytes (image/audio). */
+    data?: string;
+    /** MIME type, e.g. `"image/png"`. */
+    mediaType?: string;
+    /** OpenAI vision detail level. */
+    detail?: "auto" | "low" | "high";
+    callId?: string;
+    output?: string;
+    isError?: boolean;
+}
 export interface Message {
     role: "system" | "user" | "assistant" | "tool";
     content: string;
     tokenCount?: number;
     toolCalls?: ToolCall[];
+    /** Multimodal parts (text + image/audio). When present, providers render these
+     *  instead of the plain `content` string. */
+    contentParts?: ContentPart[];
 }
 export interface ToolCall {
     id: string;
@@ -31,6 +49,9 @@ export interface RenderedContext {
     systemKnowledge?: string;
     systemVolatile?: string;
     turns: Message[];
+    /** Volatile State turn (task_state + signals), rendered after the cacheable
+     *  history. Absent on un-rebuilt bindings — then it's still inside turns[0]. */
+    stateTurn?: Message;
 }
 export interface StreamEvent {
     type: string;
@@ -44,6 +65,8 @@ export interface UsageEvent extends StreamEvent {
     totalTokens: number;
     inputTokens?: number;
     outputTokens?: number;
+    cacheReadInputTokens?: number;
+    cacheCreationInputTokens?: number;
 }
 export interface ThinkingDelta extends StreamEvent {
     type: "thinking_delta";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@deepstrike/wasm",
-  "version": "0.2.11",
+  "version": "0.2.12",
   "description": "DeepStrike WASM SDK — browser, Cloudflare Workers, Deno Deploy",
   "type": "module",
   "main": "dist/index.js",
@@ -15,7 +15,7 @@
     "test": "node --experimental-vm-modules node_modules/.bin/jest"
   },
   "dependencies": {
-    "@deepstrike/wasm-kernel": "0.2.11"
+    "@deepstrike/wasm-kernel": "0.2.12"
   },
   "devDependencies": {
     "@types/jest": "^30.0.0",