@deepstrike/wasm 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,61 @@
1
1
  import { assistantReplayKey, collectStreamMessage, toAnthropicMessages } from "./base.js";
2
- function buildAnthropicTools(tools) {
2
+ /** Anthropic accepts at most this many cache_control breakpoints per request. */
3
+ const MAX_CACHE_BREAKPOINTS = 4;
4
+ /** Rolling cache breakpoints reserved for the message history (system uses ≤2). */
5
+ const MESSAGE_CACHE_BREAKPOINTS = 2;
6
+ function buildAnthropicTools(tools, anchorCache) {
3
7
  return tools.map((t, i) => ({
4
8
  name: t.name,
5
9
  description: t.description,
6
10
  input_schema: JSON.parse(t.parameters),
7
- ...(i === tools.length - 1 ? { cache_control: { type: "ephemeral" } } : {}),
11
+ // Anchor a tool breakpoint only when the system blocks won't carry one;
12
+ // otherwise systemStable already caches the tools prefix (tools render
13
+ // first), and a redundant tool breakpoint would burn a slot the message
14
+ // history needs to stay within the 4-breakpoint budget.
15
+ ...(anchorCache && i === tools.length - 1 ? { cache_control: { type: "ephemeral" } } : {}),
8
16
  }));
9
17
  }
18
+ /**
19
+ * Roll cache breakpoints across the conversation tail so the message-history
20
+ * prefix is written once and re-read on later turns (without this the cached
21
+ * prefix stops at the end of `system` and the whole tool-result history is
22
+ * re-billed at full input price every turn). Marks the final message plus the
23
+ * nearest preceding user turn (read anchor); a bare string body is promoted to
24
+ * a cache-bearing text block.
25
+ */
26
+ function applyMessageCacheControl(msgs) {
27
+ if (!msgs.length)
28
+ return;
29
+ const targets = new Set([msgs.length - 1]);
30
+ for (let i = msgs.length - 2; i >= 0 && targets.size < MESSAGE_CACHE_BREAKPOINTS; i--) {
31
+ if (msgs[i].role === "user")
32
+ targets.add(i);
33
+ }
34
+ for (const idx of targets)
35
+ markLastBlockCacheable(msgs[idx]);
36
+ }
37
+ function markLastBlockCacheable(msg) {
38
+ const cache_control = { type: "ephemeral" };
39
+ if (typeof msg.content === "string") {
40
+ if (!msg.content)
41
+ return;
42
+ msg.content = [{ type: "text", text: msg.content, cache_control }];
43
+ return;
44
+ }
45
+ if (Array.isArray(msg.content) && msg.content.length) {
46
+ const last = msg.content[msg.content.length - 1];
47
+ last.cache_control = cache_control;
48
+ }
49
+ }
50
+ /** Regression guard: fail loudly before the API would reject the request for
51
+ * exceeding the cache_control breakpoint limit. */
52
+ function assertCacheBudget(system, toolCount) {
53
+ const systemBreakpoints = Array.isArray(system) ? system.length : 0;
54
+ const toolBreakpoints = toolCount > 0 && !Array.isArray(system) ? 1 : 0;
55
+ if (systemBreakpoints + toolBreakpoints + MESSAGE_CACHE_BREAKPOINTS > MAX_CACHE_BREAKPOINTS) {
56
+ throw new Error(`Anthropic cache_control budget exceeded: ${systemBreakpoints} system + ${toolBreakpoints} tool + ${MESSAGE_CACHE_BREAKPOINTS} message > ${MAX_CACHE_BREAKPOINTS}`);
57
+ }
58
+ }
10
59
  export class AnthropicProvider {
11
60
  apiKey;
12
61
  model;
@@ -61,13 +110,23 @@ export class AnthropicProvider {
61
110
  }
62
111
  const system = systemBlocks.length ? systemBlocks : (context.systemText || undefined);
63
112
  const msgs = toAnthropicMessages(context, message => this.nativeAssistantBlocks.get(assistantReplayKey(message)));
113
+ applyMessageCacheControl(msgs);
114
+ // Append the volatile State turn AFTER the cache breakpoints (uncached tail);
115
+ // absent on un-rebuilt bindings, where the state is already inside `turns`.
116
+ if (context.stateTurn) {
117
+ msgs.push({
118
+ role: context.stateTurn.role === "assistant" ? "assistant" : "user",
119
+ content: context.stateTurn.content,
120
+ });
121
+ }
122
+ assertCacheBudget(system, tools.length);
64
123
  const body = {
65
124
  model: this.model,
66
125
  max_tokens: this.maxTokens,
67
126
  messages: msgs,
68
127
  stream: true,
69
128
  ...(system ? { system } : {}),
70
- ...(tools.length ? { tools: buildAnthropicTools(tools) } : {}),
129
+ ...(tools.length ? { tools: buildAnthropicTools(tools, !Array.isArray(system)) } : {}),
71
130
  };
72
131
  if (extensions?.enable_thinking) {
73
132
  body.thinking = { type: "enabled", budget_tokens: 8000 };
@@ -91,6 +150,10 @@ export class AnthropicProvider {
91
150
  const reader = resp.body.getReader();
92
151
  const decoder = new TextDecoder();
93
152
  let buf = "";
153
+ let uncachedInput = 0;
154
+ let cacheReadTokens = 0;
155
+ let cacheCreationTokens = 0;
156
+ let outputTokens = 0;
94
157
  while (true) {
95
158
  const { done, value } = await reader.read();
96
159
  if (done)
@@ -108,15 +171,26 @@ export class AnthropicProvider {
108
171
  const evt = JSON.parse(data);
109
172
  if (evt.type === "message_start" || evt.type === "message_delta") {
110
173
  const usage = (evt.usage ?? evt.message?.usage);
111
- if (usage?.input_tokens != null) {
112
- const inputTokens = usage.input_tokens ?? 0;
113
- const outputTokens = usage.output_tokens ?? 0;
114
- yield {
115
- type: "usage",
116
- totalTokens: inputTokens + outputTokens,
117
- inputTokens,
118
- outputTokens,
119
- };
174
+ if (usage) {
175
+ // input + cache counts are pinned at message_start; a later
176
+ // message_delta may omit them — Math.max prevents zeroing.
177
+ uncachedInput = Math.max(uncachedInput, usage.input_tokens ?? 0);
178
+ cacheReadTokens = Math.max(cacheReadTokens, usage.cache_read_input_tokens ?? 0);
179
+ cacheCreationTokens = Math.max(cacheCreationTokens, usage.cache_creation_input_tokens ?? 0);
180
+ outputTokens = Math.max(outputTokens, usage.output_tokens ?? 0);
181
+ // inputTokens is the FULL prompt (uncached + cache read + write):
182
+ // the kernel reads it as the authoritative context size.
183
+ const inputTokens = uncachedInput + cacheReadTokens + cacheCreationTokens;
184
+ if (inputTokens > 0 || outputTokens > 0) {
185
+ yield {
186
+ type: "usage",
187
+ totalTokens: inputTokens + outputTokens,
188
+ inputTokens,
189
+ outputTokens,
190
+ cacheReadInputTokens: cacheReadTokens,
191
+ cacheCreationInputTokens: cacheCreationTokens,
192
+ };
193
+ }
120
194
  }
121
195
  }
122
196
  else if (evt.type === "content_block_start") {
@@ -1,5 +1,10 @@
1
1
  import type { Message, RenderedContext } from "../types.js";
2
2
  import { assistantReplayKey } from "../runtime/provider-replay.js";
3
+ /** History turns with the volatile State turn appended as the latest turn
4
+ * (OpenAI), keeping the history a stable cacheable prefix. Anthropic appends it
5
+ * after the cache breakpoint. Absent on un-rebuilt bindings — then the state is
6
+ * already inside `turns`. */
7
+ export declare function turnsWithStateAppended(context: RenderedContext): Message[];
3
8
  /** Build OpenAI-compatible chat messages from a RenderedContext. */
4
9
  export declare function toOpenAIMessages(context: RenderedContext): Array<Record<string, unknown>>;
5
10
  export declare function toAnthropicMessages(context: RenderedContext, nativeReplay?: (message: Message) => Array<Record<string, unknown>> | undefined): Array<Record<string, unknown>>;
@@ -7,18 +7,50 @@ function parseToolArguments(args) {
7
7
  return {};
8
8
  }
9
9
  }
10
+ /** Multimodal: OpenAI content blocks from contentParts (text + image). */
11
+ function openAIPartsContent(parts) {
12
+ return parts.map(p => {
13
+ if (p.type === "image") {
14
+ const url = p.data ? `data:${p.mediaType ?? "image/png"};base64,${p.data}` : (p.url ?? "");
15
+ return { type: "image_url", image_url: { url, ...(p.detail ? { detail: p.detail } : {}) } };
16
+ }
17
+ return { type: "text", text: p.text ?? p.output ?? "" };
18
+ });
19
+ }
20
+ /** Multimodal: Anthropic content blocks from contentParts (text + image). */
21
+ function anthropicPartsContent(parts) {
22
+ return parts.map(p => {
23
+ if (p.type === "image") {
24
+ const source = p.data
25
+ ? { type: "base64", media_type: p.mediaType ?? "image/png", data: p.data }
26
+ : { type: "url", url: p.url ?? "" };
27
+ return { type: "image", source };
28
+ }
29
+ return { type: "text", text: p.text ?? p.output ?? "" };
30
+ });
31
+ }
32
+ /** History turns with the volatile State turn appended as the latest turn
33
+ * (OpenAI), keeping the history a stable cacheable prefix. Anthropic appends it
34
+ * after the cache breakpoint. Absent on un-rebuilt bindings — then the state is
35
+ * already inside `turns`. */
36
+ export function turnsWithStateAppended(context) {
37
+ return context.stateTurn ? [...context.turns, context.stateTurn] : context.turns;
38
+ }
10
39
  /** Build OpenAI-compatible chat messages from a RenderedContext. */
11
40
  export function toOpenAIMessages(context) {
12
41
  const messages = [];
13
42
  if (context.systemText) {
14
43
  messages.push({ role: "system", content: context.systemText });
15
44
  }
16
- for (const msg of context.turns) {
45
+ for (const msg of turnsWithStateAppended(context)) {
17
46
  if (msg.role === "tool") {
18
47
  messages.push({ role: "tool", content: msg.content });
19
48
  continue;
20
49
  }
21
- const next = { role: msg.role, content: msg.content };
50
+ const next = {
51
+ role: msg.role,
52
+ content: msg.contentParts?.length ? openAIPartsContent(msg.contentParts) : msg.content,
53
+ };
22
54
  if (msg.role === "assistant" && msg.toolCalls?.length) {
23
55
  next.tool_calls = msg.toolCalls.map(tc => ({
24
56
  id: tc.id,
@@ -55,7 +87,10 @@ export function toAnthropicMessages(context, nativeReplay) {
55
87
  result.push({ role: "assistant", content: blocks });
56
88
  continue;
57
89
  }
58
- result.push({ role: msg.role, content: msg.content });
90
+ result.push({
91
+ role: msg.role,
92
+ content: msg.contentParts?.length ? anthropicPartsContent(msg.contentParts) : msg.content,
93
+ });
59
94
  }
60
95
  if (context.systemVolatile && result.length > 0) {
61
96
  const last = result[result.length - 1];
package/dist/types.d.ts CHANGED
@@ -1,9 +1,27 @@
1
1
  import type { WorkflowNodeSpec } from "./runtime/types/agent.js";
2
+ export interface ContentPart {
3
+ type: "text" | "image" | "audio" | "tool_result";
4
+ text?: string;
5
+ /** Remote image URL (mutually exclusive with `data`). */
6
+ url?: string;
7
+ /** Raw base64-encoded bytes (image/audio). */
8
+ data?: string;
9
+ /** MIME type, e.g. `"image/png"`. */
10
+ mediaType?: string;
11
+ /** OpenAI vision detail level. */
12
+ detail?: "auto" | "low" | "high";
13
+ callId?: string;
14
+ output?: string;
15
+ isError?: boolean;
16
+ }
2
17
  export interface Message {
3
18
  role: "system" | "user" | "assistant" | "tool";
4
19
  content: string;
5
20
  tokenCount?: number;
6
21
  toolCalls?: ToolCall[];
22
+ /** Multimodal parts (text + image/audio). When present, providers render these
23
+ * instead of the plain `content` string. */
24
+ contentParts?: ContentPart[];
7
25
  }
8
26
  export interface ToolCall {
9
27
  id: string;
@@ -31,6 +49,9 @@ export interface RenderedContext {
31
49
  systemKnowledge?: string;
32
50
  systemVolatile?: string;
33
51
  turns: Message[];
52
+ /** Volatile State turn (task_state + signals), rendered after the cacheable
53
+ * history. Absent on un-rebuilt bindings — then it's still inside turns[0]. */
54
+ stateTurn?: Message;
34
55
  }
35
56
  export interface StreamEvent {
36
57
  type: string;
@@ -44,6 +65,8 @@ export interface UsageEvent extends StreamEvent {
44
65
  totalTokens: number;
45
66
  inputTokens?: number;
46
67
  outputTokens?: number;
68
+ cacheReadInputTokens?: number;
69
+ cacheCreationInputTokens?: number;
47
70
  }
48
71
  export interface ThinkingDelta extends StreamEvent {
49
72
  type: "thinking_delta";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepstrike/wasm",
3
- "version": "0.2.11",
3
+ "version": "0.2.12",
4
4
  "description": "DeepStrike WASM SDK — browser, Cloudflare Workers, Deno Deploy",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -15,7 +15,7 @@
15
15
  "test": "node --experimental-vm-modules node_modules/.bin/jest"
16
16
  },
17
17
  "dependencies": {
18
- "@deepstrike/wasm-kernel": "0.2.11"
18
+ "@deepstrike/wasm-kernel": "0.2.12"
19
19
  },
20
20
  "devDependencies": {
21
21
  "@types/jest": "^30.0.0",