zeitlich 0.2.45 → 0.2.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +137 -11
  2. package/dist/{activities-Coafq5zr.d.cts → activities-CPwKoUlD.d.cts} +22 -2
  3. package/dist/{activities-CrN-ghLo.d.ts → activities-DlaBxNID.d.ts} +22 -2
  4. package/dist/adapters/thread/anthropic/index.cjs +276 -71
  5. package/dist/adapters/thread/anthropic/index.cjs.map +1 -1
  6. package/dist/adapters/thread/anthropic/index.d.cts +62 -8
  7. package/dist/adapters/thread/anthropic/index.d.ts +62 -8
  8. package/dist/adapters/thread/anthropic/index.js +275 -72
  9. package/dist/adapters/thread/anthropic/index.js.map +1 -1
  10. package/dist/adapters/thread/anthropic/workflow.cjs +38 -20
  11. package/dist/adapters/thread/anthropic/workflow.cjs.map +1 -1
  12. package/dist/adapters/thread/anthropic/workflow.d.cts +5 -4
  13. package/dist/adapters/thread/anthropic/workflow.d.ts +5 -4
  14. package/dist/adapters/thread/anthropic/workflow.js +38 -20
  15. package/dist/adapters/thread/anthropic/workflow.js.map +1 -1
  16. package/dist/adapters/thread/google-genai/index.cjs +171 -69
  17. package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
  18. package/dist/adapters/thread/google-genai/index.d.cts +6 -4
  19. package/dist/adapters/thread/google-genai/index.d.ts +6 -4
  20. package/dist/adapters/thread/google-genai/index.js +171 -69
  21. package/dist/adapters/thread/google-genai/index.js.map +1 -1
  22. package/dist/adapters/thread/google-genai/workflow.cjs +38 -20
  23. package/dist/adapters/thread/google-genai/workflow.cjs.map +1 -1
  24. package/dist/adapters/thread/google-genai/workflow.d.cts +7 -4
  25. package/dist/adapters/thread/google-genai/workflow.d.ts +7 -4
  26. package/dist/adapters/thread/google-genai/workflow.js +38 -20
  27. package/dist/adapters/thread/google-genai/workflow.js.map +1 -1
  28. package/dist/adapters/thread/langchain/index.cjs +170 -66
  29. package/dist/adapters/thread/langchain/index.cjs.map +1 -1
  30. package/dist/adapters/thread/langchain/index.d.cts +19 -4
  31. package/dist/adapters/thread/langchain/index.d.ts +19 -4
  32. package/dist/adapters/thread/langchain/index.js +170 -66
  33. package/dist/adapters/thread/langchain/index.js.map +1 -1
  34. package/dist/adapters/thread/langchain/workflow.cjs +38 -20
  35. package/dist/adapters/thread/langchain/workflow.cjs.map +1 -1
  36. package/dist/adapters/thread/langchain/workflow.d.cts +5 -4
  37. package/dist/adapters/thread/langchain/workflow.d.ts +5 -4
  38. package/dist/adapters/thread/langchain/workflow.js +38 -20
  39. package/dist/adapters/thread/langchain/workflow.js.map +1 -1
  40. package/dist/cold-store-BDgJpwLI.d.ts +114 -0
  41. package/dist/cold-store-Z2wvK2cV.d.cts +114 -0
  42. package/dist/index.cjs +440 -67
  43. package/dist/index.cjs.map +1 -1
  44. package/dist/index.d.cts +150 -8
  45. package/dist/index.d.ts +150 -8
  46. package/dist/index.js +432 -68
  47. package/dist/index.js.map +1 -1
  48. package/dist/proxy-CDh3Rsa7.d.cts +40 -0
  49. package/dist/proxy-Du8ggERu.d.ts +40 -0
  50. package/dist/{thread-manager-wRVVBFgj.d.cts → thread-manager-BjoYYXgd.d.cts} +8 -2
  51. package/dist/{thread-manager-BsLO3Fgc.d.cts → thread-manager-D8zKNFZ9.d.cts} +8 -2
  52. package/dist/{thread-manager-Bi1XlbpJ.d.ts → thread-manager-DtHYws2F.d.ts} +8 -2
  53. package/dist/{thread-manager-BhkOyQ1I.d.ts → thread-manager-Dw96FKH1.d.ts} +8 -2
  54. package/dist/{types-C66-BVBr.d.cts → types-BMJrsHo0.d.cts} +17 -1
  55. package/dist/{types-BkX4HLzi.d.ts → types-CtdOquo3.d.ts} +17 -1
  56. package/dist/{types-CdALEF3z.d.cts → types-DNEl5uxQ.d.cts} +38 -0
  57. package/dist/{types-ChAy_jSP.d.ts → types-qQVZfhoT.d.ts} +38 -0
  58. package/dist/{workflow-DMmiaw6w.d.cts → workflow-BH9ImDGq.d.cts} +48 -2
  59. package/dist/{workflow-BwT5EybR.d.ts → workflow-Cdw3-RNB.d.ts} +48 -2
  60. package/dist/workflow.cjs +47 -4
  61. package/dist/workflow.cjs.map +1 -1
  62. package/dist/workflow.d.cts +2 -2
  63. package/dist/workflow.d.ts +2 -2
  64. package/dist/workflow.js +47 -5
  65. package/dist/workflow.js.map +1 -1
  66. package/package.json +14 -3
  67. package/src/adapters/thread/anthropic/activities.ts +82 -39
  68. package/src/adapters/thread/anthropic/index.ts +8 -0
  69. package/src/adapters/thread/anthropic/model-invoker.test.ts +110 -0
  70. package/src/adapters/thread/anthropic/model-invoker.ts +26 -5
  71. package/src/adapters/thread/anthropic/prompt-cache.test.ts +134 -0
  72. package/src/adapters/thread/anthropic/prompt-cache.ts +163 -0
  73. package/src/adapters/thread/anthropic/proxy.ts +1 -0
  74. package/src/adapters/thread/anthropic/thread-manager.ts +9 -1
  75. package/src/adapters/thread/google-genai/activities.ts +64 -40
  76. package/src/adapters/thread/google-genai/proxy.ts +1 -0
  77. package/src/adapters/thread/google-genai/thread-manager.ts +9 -1
  78. package/src/adapters/thread/langchain/activities.ts +63 -36
  79. package/src/adapters/thread/langchain/proxy.ts +1 -0
  80. package/src/adapters/thread/langchain/thread-manager.ts +9 -1
  81. package/src/index.ts +21 -2
  82. package/src/lib/session/session-edge-cases.integration.test.ts +12 -0
  83. package/src/lib/session/session.integration.test.ts +138 -0
  84. package/src/lib/session/session.ts +29 -0
  85. package/src/lib/session/types.ts +22 -0
  86. package/src/lib/subagent/define.ts +1 -0
  87. package/src/lib/subagent/handler.ts +11 -2
  88. package/src/lib/subagent/subagent.integration.test.ts +139 -0
  89. package/src/lib/subagent/types.ts +16 -0
  90. package/src/lib/thread/cold-store.test.ts +221 -0
  91. package/src/lib/thread/cold-store.ts +269 -0
  92. package/src/lib/thread/index.ts +32 -0
  93. package/src/lib/thread/keys.ts +20 -0
  94. package/src/lib/thread/manager.ts +16 -27
  95. package/src/lib/thread/proxy.ts +79 -27
  96. package/src/lib/thread/snapshot.test.ts +443 -0
  97. package/src/lib/thread/snapshot.ts +163 -0
  98. package/src/lib/thread/test-utils.ts +228 -0
  99. package/src/lib/thread/tiered.test.ts +281 -0
  100. package/src/lib/thread/tiered.ts +135 -0
  101. package/src/lib/thread/types.ts +16 -0
  102. package/src/tools/edit/handler.test.ts +177 -0
  103. package/src/tools/edit/handler.ts +249 -47
  104. package/src/tools/edit/tool.ts +40 -0
  105. package/src/tools/task-create/handler.ts +1 -1
  106. package/src/tools/task-update/handler.ts +1 -1
  107. package/src/workflow.ts +2 -2
  108. package/dist/proxy-Bf7uI-Hw.d.cts +0 -24
  109. package/dist/proxy-COqA95FW.d.ts +0 -24
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "zeitlich",
3
- "version": "0.2.45",
3
+ "version": "0.2.47",
4
4
  "description": "[EXPERIMENTAL] An opinionated AI agent implementation for Temporal",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -181,7 +181,8 @@
181
181
  "release:pr:dry": "release-please release-pr --repo-url=bead-ai/zeitlich --token=$GITHUB_TOKEN --dry-run",
182
182
  "release:github": "release-please github-release --repo-url=bead-ai/zeitlich --token=$GITHUB_TOKEN",
183
183
  "release:npm": "npm publish --access public",
184
- "release:publish": "npm run release:github && npm run release:npm"
184
+ "release:publish": "npm run release:github && npm run release:npm",
185
+ "eval:edit": "node scripts/run-edit-tool-evals.mjs"
185
186
  },
186
187
  "keywords": [
187
188
  "ai",
@@ -200,7 +201,9 @@
200
201
  "node": ">=18"
201
202
  },
202
203
  "devDependencies": {
203
- "@anthropic-ai/sdk": "^0.93.0",
204
+ "@anthropic-ai/sdk": "^0.98.0",
205
+ "@aws-sdk/client-s3": "^3.1000.0",
206
+ "@aws-sdk/lib-storage": "^3.1000.0",
204
207
  "@daytonaio/sdk": "^0.171.0",
205
208
  "@e2b/code-interpreter": "^2.3.3",
206
209
  "@eslint/js": "^10.0.1",
@@ -223,6 +226,8 @@
223
226
  },
224
227
  "peerDependencies": {
225
228
  "@anthropic-ai/sdk": ">=0.50.0",
229
+ "@aws-sdk/client-s3": ">=3.700.0",
230
+ "@aws-sdk/lib-storage": ">=3.700.0",
226
231
  "@daytonaio/sdk": ">=0.153.0",
227
232
  "@e2b/code-interpreter": "^2.3.3",
228
233
  "@google/genai": "^1.43.0",
@@ -241,6 +246,12 @@
241
246
  "@anthropic-ai/sdk": {
242
247
  "optional": true
243
248
  },
249
+ "@aws-sdk/client-s3": {
250
+ "optional": true
251
+ },
252
+ "@aws-sdk/lib-storage": {
253
+ "optional": true
254
+ },
244
255
  "@google/genai": {
245
256
  "optional": true
246
257
  },
@@ -13,16 +13,21 @@ import type {
13
13
  ScopedPrefix,
14
14
  } from "../../../lib/session/types";
15
15
  import type { ModelInvoker } from "../../../lib/model";
16
+ import { createTieredThreadManager } from "../../../lib/thread/tiered";
17
+ import type { ColdThreadStore } from "../../../lib/thread/cold-store";
16
18
  import {
17
19
  createAnthropicThreadManager,
20
+ storedMessageId,
18
21
  type AnthropicContent,
19
22
  type AnthropicSystemContent,
20
23
  type AnthropicThreadManagerHooks,
24
+ type StoredMessage,
21
25
  } from "./thread-manager";
22
26
  import {
23
27
  createAnthropicModelInvoker,
24
28
  type AnthropicModelInvokerConfig,
25
29
  } from "./model-invoker";
30
+ import type { AnthropicPromptCacheConfig } from "./prompt-cache";
26
31
  import { ADAPTER_ID } from "./adapter-id";
27
32
 
28
33
  export type AnthropicThreadOps<TScope extends string = ""> = PrefixedThreadOps<
@@ -37,7 +42,26 @@ export interface AnthropicAdapterConfig {
37
42
  model?: string;
38
43
  /** Maximum tokens to generate. Defaults to 16384. */
39
44
  maxTokens?: number;
45
+ /**
46
+ * Controls Anthropic/Bedrock-compatible prompt caching. Defaults to enabled
47
+ * with an explicit 5 minute TTL. Set to `false` to disable.
48
+ */
49
+ promptCache?: AnthropicPromptCacheConfig;
40
50
  hooks?: AnthropicThreadManagerHooks;
51
+ /**
52
+ * Optional durable cold tier (e.g. S3, R2, GCS). When provided,
53
+ * the session will hydrate the thread from cold storage on entry
54
+ * (`continue`/`fork` modes) and flush it back on every exit path.
55
+ * When omitted, the adapter is Redis-only and `hydrateThread`/
56
+ * `flushThread` activities are no-ops.
57
+ */
58
+ coldStore?: ColdThreadStore;
59
+ /**
60
+ * Override the default Redis TTL (90 days) for thread keys. When
61
+ * pairing the adapter with a `coldStore`, a shorter TTL (hours)
62
+ * is typically more appropriate.
63
+ */
64
+ ttlSeconds?: number;
41
65
  }
42
66
 
43
67
  /**
@@ -58,7 +82,8 @@ export interface AnthropicAdapter {
58
82
  /** Create an invoker for a specific model name (for multi-model setups) */
59
83
  createModelInvoker(
60
84
  model: string,
61
- maxTokens?: number
85
+ maxTokens?: number,
86
+ promptCache?: AnthropicPromptCacheConfig
62
87
  ): ModelInvoker<Anthropic.Messages.Message>;
63
88
  /**
64
89
  * Create prefixed thread activities for registration on the worker.
@@ -135,16 +160,41 @@ export function createAnthropicAdapter(
135
160
  ): AnthropicAdapter {
136
161
  const { redis, client } = config;
137
162
 
163
+ /**
164
+ * Common per-call config plumbed into both the provider thread
165
+ * manager (for message I/O) and the tiered base manager (for
166
+ * hot↔cold lifecycle ops). Keeping them in lockstep means a single
167
+ * `coldStore` / `ttlSeconds` configuration controls every Redis
168
+ * write the adapter does.
169
+ */
170
+ const baseExtras = {
171
+ ...(config.ttlSeconds !== undefined && { ttlSeconds: config.ttlSeconds }),
172
+ };
173
+
174
+ const makeProviderThread = (threadId: string, threadKey?: string) =>
175
+ createAnthropicThreadManager({
176
+ redis,
177
+ threadId,
178
+ key: threadKey,
179
+ ...baseExtras,
180
+ });
181
+
182
+ const makeTieredBase = (threadId: string, threadKey?: string) =>
183
+ createTieredThreadManager<StoredMessage>({
184
+ redis,
185
+ threadId,
186
+ key: threadKey,
187
+ idOf: storedMessageId,
188
+ ...baseExtras,
189
+ ...(config.coldStore && { coldStore: config.coldStore }),
190
+ });
191
+
138
192
  const threadOps: ThreadOps<AnthropicContent> = {
139
193
  async initializeThread(
140
194
  threadId: string,
141
195
  threadKey?: string
142
196
  ): Promise<void> {
143
- const thread = createAnthropicThreadManager({
144
- redis,
145
- threadId,
146
- key: threadKey,
147
- });
197
+ const thread = makeProviderThread(threadId, threadKey);
148
198
  await thread.initialize();
149
199
  },
150
200
 
@@ -154,11 +204,7 @@ export function createAnthropicAdapter(
154
204
  content: AnthropicContent,
155
205
  threadKey?: string
156
206
  ): Promise<void> {
157
- const thread = createAnthropicThreadManager({
158
- redis,
159
- threadId,
160
- key: threadKey,
161
- });
207
+ const thread = makeProviderThread(threadId, threadKey);
162
208
  await thread.appendUserMessage(id, content);
163
209
  },
164
210
 
@@ -168,21 +214,13 @@ export function createAnthropicAdapter(
168
214
  content: AnthropicSystemContent,
169
215
  threadKey?: string
170
216
  ): Promise<void> {
171
- const thread = createAnthropicThreadManager({
172
- redis,
173
- threadId,
174
- key: threadKey,
175
- });
217
+ const thread = makeProviderThread(threadId, threadKey);
176
218
  await thread.appendSystemMessage(id, content);
177
219
  },
178
220
 
179
221
  async appendToolResult(id: string, cfg: ToolResultConfig): Promise<void> {
180
222
  const { threadId, threadKey, toolCallId, toolName, content } = cfg;
181
- const thread = createAnthropicThreadManager({
182
- redis,
183
- threadId,
184
- key: threadKey,
185
- });
223
+ const thread = makeProviderThread(threadId, threadKey);
186
224
  await thread.appendToolResult(id, toolCallId, toolName, content);
187
225
  },
188
226
 
@@ -192,11 +230,7 @@ export function createAnthropicAdapter(
192
230
  message: Anthropic.Messages.Message,
193
231
  threadKey?: string
194
232
  ): Promise<void> {
195
- const thread = createAnthropicThreadManager({
196
- redis,
197
- threadId,
198
- key: threadKey,
199
- });
233
+ const thread = makeProviderThread(threadId, threadKey);
200
234
  await thread.appendAssistantMessage(id, message.content);
201
235
  },
202
236
 
@@ -210,6 +244,7 @@ export function createAnthropicAdapter(
210
244
  threadId: sourceThreadId,
211
245
  key: threadKey,
212
246
  hooks: config.hooks,
247
+ ...baseExtras,
213
248
  });
214
249
  await thread.fork(targetThreadId);
215
250
  },
@@ -217,9 +252,9 @@ export function createAnthropicAdapter(
217
252
  async truncateThread(
218
253
  threadId: string,
219
254
  messageId: string,
220
- threadKey?: string,
255
+ threadKey?: string
221
256
  ): Promise<void> {
222
- const thread = createAnthropicThreadManager({ redis, threadId, key: threadKey });
257
+ const thread = makeProviderThread(threadId, threadKey);
223
258
  await thread.truncateFromId(messageId);
224
259
  },
225
260
 
@@ -227,11 +262,7 @@ export function createAnthropicAdapter(
227
262
  threadId: string,
228
263
  threadKey?: string
229
264
  ): Promise<PersistedThreadState | null> {
230
- const thread = createAnthropicThreadManager({
231
- redis,
232
- threadId,
233
- key: threadKey,
234
- });
265
+ const thread = makeProviderThread(threadId, threadKey);
235
266
  return thread.loadState();
236
267
  },
237
268
 
@@ -240,13 +271,19 @@ export function createAnthropicAdapter(
240
271
  state: PersistedThreadState,
241
272
  threadKey?: string
242
273
  ): Promise<void> {
243
- const thread = createAnthropicThreadManager({
244
- redis,
245
- threadId,
246
- key: threadKey,
247
- });
274
+ const thread = makeProviderThread(threadId, threadKey);
248
275
  await thread.saveState(state);
249
276
  },
277
+
278
+ async hydrateThread(threadId: string, threadKey?: string): Promise<void> {
279
+ if (!config.coldStore) return;
280
+ await makeTieredBase(threadId, threadKey).hydrate();
281
+ },
282
+
283
+ async flushThread(threadId: string, threadKey?: string): Promise<void> {
284
+ if (!config.coldStore) return;
285
+ await makeTieredBase(threadId, threadKey).flush();
286
+ },
250
287
  };
251
288
 
252
289
  function createActivities<S extends string = "">(
@@ -263,7 +300,8 @@ export function createAnthropicAdapter(
263
300
 
264
301
  const makeInvoker = (
265
302
  model: string,
266
- maxTokens?: number
303
+ maxTokens?: number,
304
+ promptCache?: AnthropicPromptCacheConfig
267
305
  ): ModelInvoker<Anthropic.Messages.Message> => {
268
306
  const invokerConfig: AnthropicModelInvokerConfig = {
269
307
  redis,
@@ -273,6 +311,11 @@ export function createAnthropicAdapter(
273
311
  ...(config.maxTokens !== undefined && maxTokens === undefined
274
312
  ? { maxTokens: config.maxTokens }
275
313
  : {}),
314
+ ...(promptCache !== undefined
315
+ ? { promptCache }
316
+ : config.promptCache !== undefined
317
+ ? { promptCache: config.promptCache }
318
+ : {}),
276
319
  hooks: config.hooks,
277
320
  };
278
321
  return createAnthropicModelInvoker(invokerConfig);
@@ -45,3 +45,11 @@ export {
45
45
  invokeAnthropicModel,
46
46
  type AnthropicModelInvokerConfig,
47
47
  } from "./model-invoker";
48
+
49
+ // Prompt caching helpers
50
+ export {
51
+ addPromptCacheControl,
52
+ resolvePromptCacheOptions,
53
+ type AnthropicPromptCacheConfig,
54
+ type AnthropicPromptCacheOptions,
55
+ } from "./prompt-cache";
@@ -0,0 +1,110 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import type Anthropic from "@anthropic-ai/sdk";
3
+ import { createAnthropicModelInvoker } from "./model-invoker";
4
+ import type { StoredMessage } from "./thread-manager";
5
+
6
+ function createMockRedis(stored: StoredMessage[]) {
7
+ return {
8
+ exists: vi.fn().mockResolvedValue(1),
9
+ lrange: vi.fn().mockResolvedValue(stored.map((m) => JSON.stringify(m))),
10
+ ltrim: vi.fn().mockResolvedValue("OK"),
11
+ del: vi.fn().mockResolvedValue(1),
12
+ set: vi.fn().mockResolvedValue("OK"),
13
+ rpush: vi.fn().mockResolvedValue(1),
14
+ expire: vi.fn().mockResolvedValue(1),
15
+ eval: vi.fn().mockResolvedValue(1),
16
+ };
17
+ }
18
+
19
+ function createMockClient() {
20
+ const finalMessage: Anthropic.Messages.Message = {
21
+ id: "msg-response",
22
+ type: "message",
23
+ role: "assistant",
24
+ container: null,
25
+ model: "claude-test",
26
+ content: [{ type: "text", text: "ok", citations: null }],
27
+ stop_details: null,
28
+ stop_reason: "end_turn",
29
+ stop_sequence: null,
30
+ usage: {
31
+ cache_creation: null,
32
+ cache_creation_input_tokens: null,
33
+ cache_read_input_tokens: null,
34
+ inference_geo: null,
35
+ input_tokens: 1,
36
+ output_tokens: 1,
37
+ server_tool_use: null,
38
+ service_tier: null,
39
+ },
40
+ };
41
+ const stream = {
42
+ async *[Symbol.asyncIterator]() {},
43
+ finalMessage: vi.fn().mockResolvedValue(finalMessage),
44
+ };
45
+ const client = {
46
+ messages: {
47
+ stream: vi.fn().mockReturnValue(stream),
48
+ },
49
+ };
50
+ return { client, stream };
51
+ }
52
+
53
+ describe("createAnthropicModelInvoker prompt caching", () => {
54
+ it("sends explicit block-level cache_control by default", async () => {
55
+ const redis = createMockRedis([
56
+ { id: "msg-1", message: { role: "user", content: "hello" } },
57
+ ]);
58
+ const { client } = createMockClient();
59
+ const invoker = createAnthropicModelInvoker({
60
+ redis: redis as never,
61
+ client: client as never,
62
+ model: "claude-test",
63
+ });
64
+
65
+ await invoker({
66
+ threadId: "thread-1",
67
+ assistantMessageId: "assistant-1",
68
+ state: { tools: [] } as never,
69
+ agentName: "Agent",
70
+ });
71
+
72
+ const params = client.messages.stream.mock.calls[0]?.[0] as
73
+ | Anthropic.MessageCreateParams
74
+ | undefined;
75
+ expect(params).toBeDefined();
76
+ expect(params).not.toHaveProperty("cache_control");
77
+ expect(params?.messages[0]?.content).toEqual([
78
+ {
79
+ type: "text",
80
+ text: "hello",
81
+ cache_control: { type: "ephemeral", ttl: "5m" },
82
+ },
83
+ ]);
84
+ });
85
+
86
+ it("can disable prompt caching", async () => {
87
+ const redis = createMockRedis([
88
+ { id: "msg-1", message: { role: "user", content: "hello" } },
89
+ ]);
90
+ const { client } = createMockClient();
91
+ const invoker = createAnthropicModelInvoker({
92
+ redis: redis as never,
93
+ client: client as never,
94
+ model: "claude-test",
95
+ promptCache: false,
96
+ });
97
+
98
+ await invoker({
99
+ threadId: "thread-1",
100
+ assistantMessageId: "assistant-1",
101
+ state: { tools: [] } as never,
102
+ agentName: "Agent",
103
+ });
104
+
105
+ const params = client.messages.stream.mock.calls[0]?.[0] as
106
+ | Anthropic.MessageCreateParams
107
+ | undefined;
108
+ expect(params?.messages[0]?.content).toBe("hello");
109
+ });
110
+ });
@@ -6,6 +6,11 @@ import {
6
6
  createAnthropicThreadManager,
7
7
  type AnthropicThreadManagerHooks,
8
8
  } from "./thread-manager";
9
+ import {
10
+ addPromptCacheControl,
11
+ resolvePromptCacheOptions,
12
+ type AnthropicPromptCacheConfig,
13
+ } from "./prompt-cache";
9
14
  import { getActivityContext } from "../../../lib/activity";
10
15
 
11
16
  export interface AnthropicModelInvokerConfig {
@@ -14,6 +19,11 @@ export interface AnthropicModelInvokerConfig {
14
19
  model: string;
15
20
  /** Maximum tokens to generate. Defaults to 16384. */
16
21
  maxTokens?: number;
22
+ /**
23
+ * Controls Anthropic/Bedrock-compatible prompt caching. Defaults to enabled
24
+ * with an explicit 5 minute TTL. Set to `false` to disable.
25
+ */
26
+ promptCache?: AnthropicPromptCacheConfig;
17
27
  hooks?: AnthropicThreadManagerHooks;
18
28
  }
19
29
 
@@ -56,6 +66,7 @@ export function createAnthropicModelInvoker({
56
66
  client,
57
67
  model,
58
68
  maxTokens = 16384,
69
+ promptCache,
59
70
  hooks,
60
71
  }: AnthropicModelInvokerConfig) {
61
72
  return async function invokeAnthropicModel(
@@ -76,17 +87,24 @@ export function createAnthropicModelInvoker({
76
87
  // attempt's assistant + tool results so the LLM sees the same
77
88
  // pre-call state that it saw originally.
78
89
  await thread.truncateFromId(assistantMessageId);
79
- const { messages, system } = await thread.prepareForInvocation();
90
+ const prepared = await thread.prepareForInvocation();
80
91
 
81
92
  const anthropicTools = toAnthropicTools(state.tools);
82
- const tools = anthropicTools.length > 0 ? anthropicTools : undefined;
93
+ const preparedPayload = {
94
+ ...prepared,
95
+ ...(anthropicTools.length > 0 ? { tools: anthropicTools } : {}),
96
+ };
97
+ const cacheOptions = resolvePromptCacheOptions(promptCache);
98
+ const payload = cacheOptions
99
+ ? addPromptCacheControl(preparedPayload, cacheOptions)
100
+ : preparedPayload;
83
101
 
84
102
  const params: Anthropic.MessageCreateParams = {
85
103
  model,
86
104
  max_tokens: maxTokens,
87
- messages,
88
- ...(system ? { system } : {}),
89
- ...(tools ? { tools } : {}),
105
+ messages: payload.messages,
106
+ ...(payload.system ? { system: payload.system } : {}),
107
+ ...(payload.tools ? { tools: payload.tools } : {}),
90
108
  };
91
109
 
92
110
  const stream = client.messages.stream(params, { signal });
@@ -130,6 +148,7 @@ export async function invokeAnthropicModel({
130
148
  client,
131
149
  model,
132
150
  maxTokens,
151
+ promptCache,
133
152
  hooks,
134
153
  config,
135
154
  }: {
@@ -137,6 +156,7 @@ export async function invokeAnthropicModel({
137
156
  client: Anthropic;
138
157
  model: string;
139
158
  maxTokens?: number;
159
+ promptCache?: AnthropicPromptCacheConfig;
140
160
  hooks?: AnthropicThreadManagerHooks;
141
161
  config: ModelInvokerConfig;
142
162
  }): Promise<AgentResponse<Anthropic.Messages.Message>> {
@@ -145,6 +165,7 @@ export async function invokeAnthropicModel({
145
165
  client,
146
166
  model,
147
167
  maxTokens,
168
+ promptCache,
148
169
  hooks,
149
170
  });
150
171
  return invoker(config);
@@ -0,0 +1,134 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import type Anthropic from "@anthropic-ai/sdk";
3
+ import {
4
+ addPromptCacheControl,
5
+ resolvePromptCacheOptions,
6
+ } from "./prompt-cache";
7
+
8
+ function firstContentBlock(
9
+ message: Anthropic.Messages.MessageParam
10
+ ): Record<string, unknown> {
11
+ if (!Array.isArray(message.content)) {
12
+ throw new Error("Expected array content");
13
+ }
14
+ const block = message.content[0];
15
+ if (!block || typeof block !== "object") {
16
+ throw new Error("Expected content block");
17
+ }
18
+ return block as unknown as Record<string, unknown>;
19
+ }
20
+
21
+ function messageAt(
22
+ messages: Anthropic.Messages.MessageParam[],
23
+ index: number
24
+ ): Anthropic.Messages.MessageParam {
25
+ const message = messages[index];
26
+ if (!message) throw new Error(`Expected message at index ${String(index)}`);
27
+ return message;
28
+ }
29
+
30
+ describe("Anthropic prompt cache helpers", () => {
31
+ it("enables prompt caching by default", () => {
32
+ expect(resolvePromptCacheOptions()).toEqual({});
33
+ });
34
+
35
+ it("can be disabled", () => {
36
+ expect(resolvePromptCacheOptions(false)).toBeUndefined();
37
+ });
38
+
39
+ it("adds Bedrock-compatible block-level cache_control to the last message", () => {
40
+ const payload = {
41
+ messages: [{ role: "user" as const, content: "hello" }],
42
+ };
43
+
44
+ const result = addPromptCacheControl(payload);
45
+ const block = firstContentBlock(messageAt(result.messages, 0));
46
+
47
+ expect(block).toEqual({
48
+ type: "text",
49
+ text: "hello",
50
+ cache_control: { type: "ephemeral", ttl: "5m" },
51
+ });
52
+ expect("cache_control" in result).toBe(false);
53
+ });
54
+
55
+ it("supports a 1h TTL", () => {
56
+ const result = addPromptCacheControl(
57
+ {
58
+ messages: [
59
+ {
60
+ role: "user" as const,
61
+ content: [{ type: "text" as const, text: "hello" }],
62
+ },
63
+ ],
64
+ },
65
+ { ttl: "1h" }
66
+ );
67
+
68
+ expect(
69
+ firstContentBlock(messageAt(result.messages, 0)).cache_control
70
+ ).toEqual({
71
+ type: "ephemeral",
72
+ ttl: "1h",
73
+ });
74
+ });
75
+
76
+ it("does not add a fifth cache breakpoint", () => {
77
+ const cacheControl = { type: "ephemeral" as const };
78
+ const result = addPromptCacheControl({
79
+ system: [
80
+ { type: "text" as const, text: "system", cache_control: cacheControl },
81
+ ],
82
+ tools: [
83
+ {
84
+ name: "tool",
85
+ description: "A test tool",
86
+ input_schema: { type: "object", properties: {} },
87
+ cache_control: cacheControl,
88
+ },
89
+ ],
90
+ messages: [
91
+ {
92
+ role: "user" as const,
93
+ content: [
94
+ { type: "text" as const, text: "1", cache_control: cacheControl },
95
+ { type: "text" as const, text: "2", cache_control: cacheControl },
96
+ { type: "text" as const, text: "latest" },
97
+ ],
98
+ },
99
+ ],
100
+ });
101
+
102
+ const latest = (
103
+ messageAt(result.messages, 0).content as unknown as Array<
104
+ Record<string, unknown>
105
+ >
106
+ )[2];
107
+ expect(latest?.cache_control).toBeUndefined();
108
+ });
109
+
110
+ it("preserves an existing cache marker on the last cacheable block", () => {
111
+ const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const };
112
+ const payload = {
113
+ messages: [
114
+ {
115
+ role: "user" as const,
116
+ content: [
117
+ {
118
+ type: "text" as const,
119
+ text: "hello",
120
+ cache_control: cacheControl,
121
+ },
122
+ ],
123
+ },
124
+ ],
125
+ };
126
+
127
+ const result = addPromptCacheControl(payload, { ttl: "5m" });
128
+
129
+ expect(result).toBe(payload);
130
+ expect(
131
+ firstContentBlock(messageAt(result.messages, 0)).cache_control
132
+ ).toEqual(cacheControl);
133
+ });
134
+ });