@codilore/llm 1.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/AGENTS.md +321 -0
  2. package/README.md +131 -0
  3. package/example/call-sites.md +591 -0
  4. package/example/tutorial.ts +255 -0
  5. package/package.json +50 -0
  6. package/script/recording-cost-report.ts +250 -0
  7. package/script/setup-recording-env.ts +542 -0
  8. package/src/cache-policy.ts +111 -0
  9. package/src/index.ts +32 -0
  10. package/src/llm.ts +186 -0
  11. package/src/protocols/anthropic-messages.ts +841 -0
  12. package/src/protocols/bedrock-converse.ts +649 -0
  13. package/src/protocols/bedrock-event-stream.ts +87 -0
  14. package/src/protocols/gemini.ts +465 -0
  15. package/src/protocols/index.ts +6 -0
  16. package/src/protocols/openai-chat.ts +431 -0
  17. package/src/protocols/openai-compatible-chat.ts +24 -0
  18. package/src/protocols/openai-responses.ts +987 -0
  19. package/src/protocols/shared.ts +283 -0
  20. package/src/protocols/utils/bedrock-auth.ts +70 -0
  21. package/src/protocols/utils/bedrock-cache.ts +37 -0
  22. package/src/protocols/utils/bedrock-media.ts +80 -0
  23. package/src/protocols/utils/cache.ts +16 -0
  24. package/src/protocols/utils/gemini-tool-schema.ts +101 -0
  25. package/src/protocols/utils/lifecycle.ts +102 -0
  26. package/src/protocols/utils/openai-options.ts +84 -0
  27. package/src/protocols/utils/tool-stream.ts +218 -0
  28. package/src/provider.ts +37 -0
  29. package/src/providers/amazon-bedrock.ts +43 -0
  30. package/src/providers/anthropic.ts +35 -0
  31. package/src/providers/azure.ts +110 -0
  32. package/src/providers/cloudflare.ts +127 -0
  33. package/src/providers/github-copilot.ts +66 -0
  34. package/src/providers/google.ts +35 -0
  35. package/src/providers/index.ts +11 -0
  36. package/src/providers/openai-compatible-profile.ts +20 -0
  37. package/src/providers/openai-compatible.ts +65 -0
  38. package/src/providers/openai-options.ts +81 -0
  39. package/src/providers/openai.ts +63 -0
  40. package/src/providers/openrouter.ts +98 -0
  41. package/src/providers/xai.ts +56 -0
  42. package/src/route/auth-options.ts +57 -0
  43. package/src/route/auth.ts +156 -0
  44. package/src/route/client.ts +434 -0
  45. package/src/route/endpoint.ts +53 -0
  46. package/src/route/executor.ts +374 -0
  47. package/src/route/framing.ts +27 -0
  48. package/src/route/index.ts +25 -0
  49. package/src/route/protocol.ts +84 -0
  50. package/src/route/transport/http.ts +108 -0
  51. package/src/route/transport/index.ts +33 -0
  52. package/src/route/transport/websocket.ts +280 -0
  53. package/src/schema/errors.ts +203 -0
  54. package/src/schema/events.ts +370 -0
  55. package/src/schema/ids.ts +43 -0
  56. package/src/schema/index.ts +5 -0
  57. package/src/schema/messages.ts +404 -0
  58. package/src/schema/options.ts +221 -0
  59. package/src/tool-runtime.ts +78 -0
  60. package/src/tool.ts +241 -0
  61. package/src/utils/record.ts +3 -0
  62. package/sst-env.d.ts +10 -0
  63. package/test/adapter.test.ts +164 -0
  64. package/test/auth-options.types.ts +168 -0
  65. package/test/auth.test.ts +103 -0
  66. package/test/cache-policy.test.ts +262 -0
  67. package/test/continuation-scenarios.ts +104 -0
  68. package/test/endpoint.test.ts +58 -0
  69. package/test/executor.test.ts +418 -0
  70. package/test/exports.test.ts +62 -0
  71. package/test/fixtures/media/restroom.png +0 -0
  72. package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
  73. package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
  74. package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
  75. package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
  76. package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
  77. package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
  78. package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
  79. package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
  80. package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
  81. package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
  82. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  83. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
  84. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  85. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
  86. package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
  87. package/test/fixtures/recordings/gemini/streams-text.json +28 -0
  88. package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
  89. package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
  90. package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
  91. package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
  92. package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
  93. package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
  94. package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
  95. package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
  96. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
  97. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
  98. package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
  99. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
  100. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
  101. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
  102. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
  103. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
  104. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
  105. package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
  106. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
  107. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
  108. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
  109. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
  110. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
  111. package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
  112. package/test/generate-object.test.ts +184 -0
  113. package/test/lib/effect.ts +50 -0
  114. package/test/lib/http.ts +98 -0
  115. package/test/lib/openai-chunks.ts +27 -0
  116. package/test/lib/sse.ts +17 -0
  117. package/test/lib/tool-runtime.ts +146 -0
  118. package/test/llm.test.ts +167 -0
  119. package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
  120. package/test/provider/anthropic-messages.recorded.test.ts +46 -0
  121. package/test/provider/anthropic-messages.test.ts +829 -0
  122. package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
  123. package/test/provider/bedrock-converse.test.ts +707 -0
  124. package/test/provider/cloudflare.test.ts +230 -0
  125. package/test/provider/gemini-cache.recorded.test.ts +48 -0
  126. package/test/provider/gemini.test.ts +476 -0
  127. package/test/provider/golden.recorded.test.ts +219 -0
  128. package/test/provider/openai-chat.test.ts +446 -0
  129. package/test/provider/openai-compatible-chat.test.ts +238 -0
  130. package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
  131. package/test/provider/openai-responses.test.ts +1322 -0
  132. package/test/provider/openrouter.test.ts +56 -0
  133. package/test/provider.types.ts +41 -0
  134. package/test/recorded-golden.ts +97 -0
  135. package/test/recorded-runner.ts +100 -0
  136. package/test/recorded-scenarios.ts +531 -0
  137. package/test/recorded-test.ts +74 -0
  138. package/test/recorded-utils.ts +56 -0
  139. package/test/recorded-websocket.ts +26 -0
  140. package/test/route.test.ts +43 -0
  141. package/test/schema.test.ts +97 -0
  142. package/test/tool-runtime.test.ts +802 -0
  143. package/test/tool-stream.test.ts +99 -0
  144. package/test/tool.types.ts +40 -0
  145. package/tsconfig.json +15 -0
@@ -0,0 +1,829 @@
1
+ import { describe, expect } from "bun:test"
2
+ import { Effect } from "effect"
3
+ import { HttpClientRequest } from "effect/unstable/http"
4
+ import { CacheHint, LLM, LLMError, Message, ToolCallPart, Usage } from "../../src"
5
+ import { Auth, LLMClient } from "../../src/route"
6
+ import * as AnthropicMessages from "../../src/protocols/anthropic-messages"
7
+ import { continuationRequest, nativeAnthropicMessagesContinuation } from "../continuation-scenarios"
8
+ import { it } from "../lib/effect"
9
+ import { dynamicResponse, fixedResponse } from "../lib/http"
10
+ import { sseEvents } from "../lib/sse"
11
+
12
+ const model = AnthropicMessages.route
13
+ .with({ endpoint: { baseURL: "https://api.anthropic.test/v1/" }, auth: Auth.header("x-api-key", "test") })
14
+ .model({ id: "claude-sonnet-4-5" })
15
+
16
+ const opus48 = AnthropicMessages.route
17
+ .with({ endpoint: { baseURL: "https://api.anthropic.test/v1/" }, auth: Auth.header("x-api-key", "test") })
18
+ .model({ id: "claude-opus-4-8" })
19
+
20
+ const request = LLM.request({
21
+ id: "req_1",
22
+ model,
23
+ system: { type: "text", text: "You are concise.", cache: new CacheHint({ type: "ephemeral" }) },
24
+ prompt: "Say hello.",
25
+ // This fixture predates the `cache: "auto"` default; pin the policy off so
26
+ // existing wire-shape assertions only see the manual hint on the system part.
27
+ cache: "none",
28
+ generation: { maxTokens: 20, temperature: 0 },
29
+ })
30
+
31
+ type AnthropicToolResult = Extract<
32
+ AnthropicMessages.AnthropicMessagesBody["messages"][number]["content"][number],
33
+ { readonly type: "tool_result" }
34
+ >
35
+
36
+ const expectToolResult = (body: AnthropicMessages.AnthropicMessagesBody): AnthropicToolResult => {
37
+ const result = body.messages
38
+ .flatMap((message) => (message.role === "user" ? message.content : []))
39
+ .find((block): block is AnthropicToolResult => block.type === "tool_result")
40
+ expect(result).toBeDefined()
41
+ return result!
42
+ }
43
+
44
+ describe("Anthropic Messages route", () => {
45
+ it.effect("prepares Anthropic Messages target", () =>
46
+ Effect.gen(function* () {
47
+ const prepared = yield* LLMClient.prepare(request)
48
+
49
+ expect(prepared.body).toEqual({
50
+ model: "claude-sonnet-4-5",
51
+ system: [{ type: "text", text: "You are concise.", cache_control: { type: "ephemeral" } }],
52
+ messages: [{ role: "user", content: [{ type: "text", text: "Say hello." }] }],
53
+ stream: true,
54
+ max_tokens: 20,
55
+ temperature: 0,
56
+ })
57
+ }),
58
+ )
59
+
60
+ it.effect("lowers chronological system updates natively for Claude Opus 4.8 with cache hints", () =>
61
+ Effect.gen(function* () {
62
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
63
+ LLM.request({
64
+ model: opus48,
65
+ messages: [
66
+ Message.user("Before."),
67
+ Message.system([{ type: "text", text: "Operator update.", cache: new CacheHint({ type: "ephemeral" }) }]),
68
+ Message.assistant("After."),
69
+ ],
70
+ cache: "none",
71
+ }),
72
+ )
73
+
74
+ expect(prepared.body.messages).toEqual([
75
+ { role: "user", content: [{ type: "text", text: "Before." }] },
76
+ {
77
+ role: "system",
78
+ content: [{ type: "text", text: "Operator update.", cache_control: { type: "ephemeral" } }],
79
+ },
80
+ { role: "assistant", content: [{ type: "text", text: "After." }] },
81
+ ])
82
+ }),
83
+ )
84
+
85
+ it.effect("lowers chronological system updates to wrapped user text for unsupported Anthropic models", () =>
86
+ Effect.gen(function* () {
87
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
88
+ LLM.request({
89
+ model,
90
+ messages: [
91
+ Message.user("Before."),
92
+ Message.system("Treat </system-update> literally."),
93
+ Message.assistant("After."),
94
+ ],
95
+ cache: "none",
96
+ }),
97
+ )
98
+
99
+ expect(prepared.body.messages).toEqual([
100
+ {
101
+ role: "user",
102
+ content: [
103
+ { type: "text", text: "Before." },
104
+ { type: "text", text: "<system-update>\nTreat &lt;/system-update&gt; literally.\n</system-update>" },
105
+ ],
106
+ },
107
+ { role: "assistant", content: [{ type: "text", text: "After." }] },
108
+ ])
109
+ }),
110
+ )
111
+
112
+ it.effect("rejects non-text chronological system update content before send", () =>
113
+ Effect.gen(function* () {
114
+ const error = yield* LLMClient.prepare(
115
+ LLM.request({
116
+ model: opus48,
117
+ messages: [
118
+ Message.user("Before."),
119
+ Message.make({ role: "system", content: { type: "media", mediaType: "image/png", data: "AAECAw==" } }),
120
+ ],
121
+ }),
122
+ ).pipe(Effect.flip)
123
+
124
+ expect(error.message).toContain("Anthropic Messages system messages only support text content for now")
125
+ }),
126
+ )
127
+
128
+ it.effect("rejects invalid native chronological system update placement", () =>
129
+ Effect.gen(function* () {
130
+ const placementError = (messages: Parameters<typeof LLM.request>[0]["messages"]) =>
131
+ LLMClient.prepare(LLM.request({ model: opus48, messages, cache: "none" })).pipe(Effect.flip)
132
+
133
+ expect((yield* placementError([Message.system("First.")])).message).toContain("cannot be the first message")
134
+ expect(
135
+ (yield* placementError([Message.user("Before."), Message.system("One."), Message.system("Two.")])).message,
136
+ ).toContain("cannot be consecutive")
137
+ expect(
138
+ (yield* placementError([Message.assistant("Plain."), Message.system("After plain assistant.")])).message,
139
+ ).toContain("must follow a user message, tool result, or assistant server tool use")
140
+ expect(
141
+ (yield* placementError([
142
+ Message.user("Use the tool."),
143
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "lookup", input: {} })]),
144
+ Message.system("Too early."),
145
+ Message.tool({ id: "call_1", name: "lookup", result: "Done." }),
146
+ ])).message,
147
+ ).toContain("cannot appear between a local tool call and its tool result")
148
+ }),
149
+ )
150
+
151
+ it.effect("prepares tool call and tool result messages", () =>
152
+ Effect.gen(function* () {
153
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
154
+ LLM.request({
155
+ id: "req_tool_result",
156
+ model,
157
+ messages: [
158
+ Message.user("What is the weather?"),
159
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
160
+ Message.tool({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
161
+ ],
162
+ cache: "none",
163
+ }),
164
+ )
165
+
166
+ expect(prepared.body).toEqual({
167
+ model: "claude-sonnet-4-5",
168
+ messages: [
169
+ { role: "user", content: [{ type: "text", text: "What is the weather?" }] },
170
+ {
171
+ role: "assistant",
172
+ content: [{ type: "tool_use", id: "call_1", name: "lookup", input: { query: "weather" } }],
173
+ },
174
+ { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: '{"forecast":"sunny"}' }] },
175
+ ],
176
+ stream: true,
177
+ max_tokens: 4096,
178
+ })
179
+ }),
180
+ )
181
+
182
+ // Regression: screenshot/read tool results must stay structured so base64
183
+ // image data is not JSON-stringified into `tool_result.content`.
184
+ it.effect("lowers image tool-result content as structured image blocks", () =>
185
+ Effect.gen(function* () {
186
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
187
+ LLM.request({
188
+ id: "req_tool_result_image",
189
+ model,
190
+ messages: [
191
+ Message.user("Show me the screenshot."),
192
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "read", input: { filePath: "shot.png" } })]),
193
+ Message.tool({
194
+ id: "call_1",
195
+ name: "read",
196
+ resultType: "content",
197
+ result: [
198
+ { type: "text", text: "Image read successfully" },
199
+ { type: "media", mediaType: "image/png", data: "AAECAw==" },
200
+ ],
201
+ }),
202
+ ],
203
+ cache: "none",
204
+ }),
205
+ )
206
+
207
+ expect(expectToolResult(prepared.body).content).toEqual([
208
+ { type: "text", text: "Image read successfully" },
209
+ { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } },
210
+ ])
211
+ }),
212
+ )
213
+
214
+ it.effect("lowers single-image tool-result content as a structured image block", () =>
215
+ Effect.gen(function* () {
216
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
217
+ LLM.request({
218
+ id: "req_tool_result_image_only",
219
+ model,
220
+ messages: [
221
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "screenshot", input: {} })]),
222
+ Message.tool({
223
+ id: "call_1",
224
+ name: "screenshot",
225
+ resultType: "content",
226
+ result: [{ type: "media", mediaType: "image/jpeg", data: "/9j/AA==" }],
227
+ }),
228
+ ],
229
+ cache: "none",
230
+ }),
231
+ )
232
+
233
+ expect(expectToolResult(prepared.body).content).toEqual([
234
+ { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "/9j/AA==" } },
235
+ ])
236
+ }),
237
+ )
238
+
239
+ it.effect("rejects non-image media in tool-result content with a clear error", () =>
240
+ Effect.gen(function* () {
241
+ const error = yield* LLMClient.prepare(
242
+ LLM.request({
243
+ id: "req_tool_result_unsupported_media",
244
+ model,
245
+ messages: [
246
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "fetch", input: {} })]),
247
+ Message.tool({
248
+ id: "call_1",
249
+ name: "fetch",
250
+ resultType: "content",
251
+ result: [{ type: "media", mediaType: "audio/mpeg", data: "AAECAw==" }],
252
+ }),
253
+ ],
254
+ cache: "none",
255
+ }),
256
+ ).pipe(Effect.flip)
257
+
258
+ expect(error.message).toContain("Anthropic Messages")
259
+ expect(error.message).toContain("audio/mpeg")
260
+ }),
261
+ )
262
+
263
+ it.effect("prepares the composed native continuation request", () =>
264
+ Effect.gen(function* () {
265
+ const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
266
+ continuationRequest({
267
+ id: "req_native_continuation_anthropic",
268
+ model,
269
+ features: nativeAnthropicMessagesContinuation,
270
+ }),
271
+ )
272
+
273
+ expect(prepared.body).toMatchObject({
274
+ system: [{ type: "text", text: "You are concise. Continue from the provided history." }],
275
+ messages: [
276
+ {
277
+ role: "user",
278
+ content: [
279
+ { type: "text", text: "What is shown here?" },
280
+ { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } },
281
+ ],
282
+ },
283
+ {
284
+ role: "assistant",
285
+ content: [
286
+ { type: "thinking", thinking: "I inspected the previous turn.", signature: "sig_continuation_1" },
287
+ { type: "text", text: "It shows a small test image." },
288
+ ],
289
+ },
290
+ { role: "user", content: [{ type: "text", text: "Check the weather in Paris before continuing." }] },
291
+ {
292
+ role: "assistant",
293
+ content: [{ type: "tool_use", id: "call_weather_1", name: "get_weather", input: { city: "Paris" } }],
294
+ },
295
+ {
296
+ role: "user",
297
+ content: [{ type: "tool_result", tool_use_id: "call_weather_1", content: '{"temperature":22}' }],
298
+ },
299
+ { role: "assistant", content: [{ type: "text", text: "Paris is 22 degrees." }] },
300
+ { role: "user", content: [{ type: "text", text: "Continue from this conversation in one short sentence." }] },
301
+ ],
302
+ })
303
+ expect(prepared.body.tools).toEqual([expect.objectContaining({ name: "get_weather" })])
304
+ }),
305
+ )
306
+
307
+ it.effect("lowers preserved Anthropic reasoning signature metadata", () =>
308
+ Effect.gen(function* () {
309
+ const prepared = yield* LLMClient.prepare(
310
+ LLM.request({
311
+ model,
312
+ messages: [
313
+ Message.assistant([
314
+ { type: "reasoning", text: "thinking", providerMetadata: { anthropic: { signature: "sig_1" } } },
315
+ ]),
316
+ ],
317
+ }),
318
+ )
319
+
320
+ expect(prepared.body).toMatchObject({
321
+ messages: [{ role: "assistant", content: [{ type: "thinking", thinking: "thinking", signature: "sig_1" }] }],
322
+ })
323
+ }),
324
+ )
325
+
326
+ it.effect("parses text, reasoning, and usage stream fixtures", () =>
327
+ Effect.gen(function* () {
328
+ const body = sseEvents(
329
+ { type: "message_start", message: { usage: { input_tokens: 5, cache_read_input_tokens: 1 } } },
330
+ { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
331
+ { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } },
332
+ { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "!" } },
333
+ { type: "content_block_stop", index: 0 },
334
+ { type: "content_block_start", index: 1, content_block: { type: "thinking", thinking: "" } },
335
+ { type: "content_block_delta", index: 1, delta: { type: "thinking_delta", thinking: "thinking" } },
336
+ { type: "content_block_delta", index: 1, delta: { type: "signature_delta", signature: "sig_1" } },
337
+ { type: "content_block_stop", index: 1 },
338
+ {
339
+ type: "message_delta",
340
+ delta: { stop_reason: "end_turn", stop_sequence: "\n\nHuman:" },
341
+ usage: { output_tokens: 2 },
342
+ },
343
+ { type: "message_stop" },
344
+ )
345
+ const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body)))
346
+
347
+ expect(response.text).toBe("Hello!")
348
+ expect(response.reasoning).toBe("thinking")
349
+ expect(response.usage).toMatchObject({
350
+ inputTokens: 6,
351
+ outputTokens: 2,
352
+ nonCachedInputTokens: 5,
353
+ cacheReadInputTokens: 1,
354
+ totalTokens: 8,
355
+ })
356
+ expect(response.events.find((event) => event.type === "reasoning-end")).toMatchObject({
357
+ providerMetadata: { anthropic: { signature: "sig_1" } },
358
+ })
359
+ expect(response.events.at(-1)).toMatchObject({
360
+ type: "finish",
361
+ reason: "stop",
362
+ providerMetadata: { anthropic: { stopSequence: "\n\nHuman:" } },
363
+ })
364
+ }),
365
+ )
366
+
367
+ it.effect("assembles streamed tool call input", () =>
368
+ Effect.gen(function* () {
369
+ const body = sseEvents(
370
+ { type: "message_start", message: { usage: { input_tokens: 5 } } },
371
+ { type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "call_1", name: "lookup" } },
372
+ { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query"' } },
373
+ { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: ':"weather"}' } },
374
+ { type: "content_block_stop", index: 0 },
375
+ { type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } },
376
+ )
377
+ const response = yield* LLMClient.generate(
378
+ LLM.updateRequest(request, {
379
+ tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
380
+ }),
381
+ ).pipe(Effect.provide(fixedResponse(body)))
382
+ const usage = new Usage({
383
+ inputTokens: 5,
384
+ outputTokens: 1,
385
+ nonCachedInputTokens: 5,
386
+ cacheReadInputTokens: undefined,
387
+ cacheWriteInputTokens: undefined,
388
+ totalTokens: 6,
389
+ providerMetadata: { anthropic: { input_tokens: 5, output_tokens: 1 } },
390
+ })
391
+
392
+ expect(response.toolCalls).toEqual([
393
+ {
394
+ type: "tool-call",
395
+ id: "call_1",
396
+ name: "lookup",
397
+ input: { query: "weather" },
398
+ providerExecuted: undefined,
399
+ providerMetadata: undefined,
400
+ },
401
+ ])
402
+ expect(response.events).toEqual([
403
+ { type: "step-start", index: 0 },
404
+ { type: "tool-input-start", id: "call_1", name: "lookup" },
405
+ { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
406
+ { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
407
+ { type: "tool-input-end", id: "call_1", name: "lookup", providerMetadata: undefined },
408
+ {
409
+ type: "tool-call",
410
+ id: "call_1",
411
+ name: "lookup",
412
+ input: { query: "weather" },
413
+ providerExecuted: undefined,
414
+ providerMetadata: undefined,
415
+ },
416
+ { type: "step-finish", index: 0, reason: "tool-calls", usage, providerMetadata: undefined },
417
+ {
418
+ type: "finish",
419
+ reason: "tool-calls",
420
+ providerMetadata: undefined,
421
+ usage,
422
+ },
423
+ ])
424
+ }),
425
+ )
426
+
427
+ it.effect("emits provider-error events for mid-stream provider errors", () =>
428
+ Effect.gen(function* () {
429
+ const response = yield* LLMClient.generate(request).pipe(
430
+ Effect.provide(
431
+ fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "Overloaded" } })),
432
+ ),
433
+ )
434
+
435
+ // Prefix the error type so consumers can distinguish overloads, rate
436
+ // limits, and quota errors without parsing the message string.
437
+ expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error: Overloaded" }])
438
+ }),
439
+ )
440
+
441
+ it.effect("falls back to error type when no message is present", () =>
442
+ Effect.gen(function* () {
443
+ const response = yield* LLMClient.generate(request).pipe(
444
+ Effect.provide(fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "" } }))),
445
+ )
446
+
447
+ expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error" }])
448
+ }),
449
+ )
450
+
451
+ it.effect("falls back to a stable default when error payload is absent", () =>
452
+ Effect.gen(function* () {
453
+ const response = yield* LLMClient.generate(request).pipe(
454
+ Effect.provide(fixedResponse(sseEvents({ type: "error" }))),
455
+ )
456
+
457
+ expect(response.events).toEqual([{ type: "provider-error", message: "Anthropic Messages stream error" }])
458
+ }),
459
+ )
460
+
461
+ it.effect("fails HTTP provider errors before stream parsing", () =>
462
+ Effect.gen(function* () {
463
+ const error = yield* LLMClient.generate(request).pipe(
464
+ Effect.provide(
465
+ fixedResponse('{"type":"error","error":{"type":"invalid_request_error","message":"Bad request"}}', {
466
+ status: 400,
467
+ headers: { "content-type": "application/json" },
468
+ }),
469
+ ),
470
+ Effect.flip,
471
+ )
472
+
473
+ expect(error).toBeInstanceOf(LLMError)
474
+ expect(error.reason).toMatchObject({ _tag: "InvalidRequest" })
475
+ expect(error.message).toContain("HTTP 400")
476
+ }),
477
+ )
478
+
479
+ it.effect("decodes server_tool_use + web_search_tool_result as provider-executed events", () =>
480
+ Effect.gen(function* () {
481
+ const body = sseEvents(
482
+ { type: "message_start", message: { usage: { input_tokens: 5 } } },
483
+ {
484
+ type: "content_block_start",
485
+ index: 0,
486
+ content_block: { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search" },
487
+ },
488
+ {
489
+ type: "content_block_delta",
490
+ index: 0,
491
+ delta: { type: "input_json_delta", partial_json: '{"query":"effect 4"}' },
492
+ },
493
+ { type: "content_block_stop", index: 0 },
494
+ {
495
+ type: "content_block_start",
496
+ index: 1,
497
+ content_block: {
498
+ type: "web_search_tool_result",
499
+ tool_use_id: "srvtoolu_abc",
500
+ content: [{ type: "web_search_result", url: "https://example.com", title: "Example" }],
501
+ },
502
+ },
503
+ { type: "content_block_stop", index: 1 },
504
+ { type: "content_block_start", index: 2, content_block: { type: "text", text: "" } },
505
+ { type: "content_block_delta", index: 2, delta: { type: "text_delta", text: "Found it." } },
506
+ { type: "content_block_stop", index: 2 },
507
+ { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 8 } },
508
+ )
509
+ const response = yield* LLMClient.generate(
510
+ LLM.updateRequest(request, {
511
+ tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
512
+ }),
513
+ ).pipe(Effect.provide(fixedResponse(body)))
514
+
515
+ const toolCall = response.events.find((event) => event.type === "tool-call")
516
+ expect(toolCall).toEqual({
517
+ type: "tool-call",
518
+ id: "srvtoolu_abc",
519
+ name: "web_search",
520
+ input: { query: "effect 4" },
521
+ providerExecuted: true,
522
+ })
523
+ const toolResult = response.events.find((event) => event.type === "tool-result")
524
+ expect(toolResult).toEqual({
525
+ type: "tool-result",
526
+ id: "srvtoolu_abc",
527
+ name: "web_search",
528
+ result: { type: "json", value: [{ type: "web_search_result", url: "https://example.com", title: "Example" }] },
529
+ providerExecuted: true,
530
+ providerMetadata: { anthropic: { blockType: "web_search_tool_result" } },
531
+ })
532
+ expect(response.text).toBe("Found it.")
533
+ expect(response.events.at(-1)).toMatchObject({ type: "finish", reason: "stop" })
534
+ }),
535
+ )
536
+
537
+ it.effect("decodes web_search_tool_result_error as provider-executed error result", () =>
538
+ Effect.gen(function* () {
539
+ const body = sseEvents(
540
+ { type: "message_start", message: { usage: { input_tokens: 5 } } },
541
+ {
542
+ type: "content_block_start",
543
+ index: 0,
544
+ content_block: { type: "server_tool_use", id: "srvtoolu_x", name: "web_search" },
545
+ },
546
+ { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"q"}' } },
547
+ { type: "content_block_stop", index: 0 },
548
+ {
549
+ type: "content_block_start",
550
+ index: 1,
551
+ content_block: {
552
+ type: "web_search_tool_result",
553
+ tool_use_id: "srvtoolu_x",
554
+ content: { type: "web_search_tool_result_error", error_code: "max_uses_exceeded" },
555
+ },
556
+ },
557
+ { type: "content_block_stop", index: 1 },
558
+ { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 1 } },
559
+ )
560
+ const response = yield* LLMClient.generate(
561
+ LLM.updateRequest(request, {
562
+ tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
563
+ }),
564
+ ).pipe(Effect.provide(fixedResponse(body)))
565
+
566
+ const toolResult = response.events.find((event) => event.type === "tool-result")
567
+ expect(toolResult).toMatchObject({
568
+ type: "tool-result",
569
+ id: "srvtoolu_x",
570
+ name: "web_search",
571
+ result: { type: "error" },
572
+ providerExecuted: true,
573
+ })
574
+ }),
575
+ )
576
+
577
+ it.effect("round-trips provider-executed assistant content into server tool blocks", () =>
578
+ Effect.gen(function* () {
579
+ const prepared = yield* LLMClient.prepare(
580
+ LLM.request({
581
+ id: "req_round_trip",
582
+ model,
583
+ messages: [
584
+ Message.user("Search for something."),
585
+ Message.assistant([
586
+ {
587
+ type: "tool-call",
588
+ id: "srvtoolu_abc",
589
+ name: "web_search",
590
+ input: { query: "effect 4" },
591
+ providerExecuted: true,
592
+ },
593
+ {
594
+ type: "tool-result",
595
+ id: "srvtoolu_abc",
596
+ name: "web_search",
597
+ result: { type: "json", value: [{ url: "https://example.com" }] },
598
+ providerExecuted: true,
599
+ },
600
+ { type: "text", text: "Found it." },
601
+ ]),
602
+ Message.user("Thanks."),
603
+ ],
604
+ }),
605
+ )
606
+
607
+ expect(prepared.body).toMatchObject({
608
+ messages: [
609
+ { role: "user", content: [{ type: "text", text: "Search for something." }] },
610
+ {
611
+ role: "assistant",
612
+ content: [
613
+ { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search", input: { query: "effect 4" } },
614
+ {
615
+ type: "web_search_tool_result",
616
+ tool_use_id: "srvtoolu_abc",
617
+ content: [{ url: "https://example.com" }],
618
+ },
619
+ { type: "text", text: "Found it." },
620
+ ],
621
+ },
622
+ { role: "user", content: [{ type: "text", text: "Thanks." }] },
623
+ ],
624
+ })
625
+ }),
626
+ )
627
+
628
+ it.effect("rejects round-trip for unknown server tool names", () =>
629
+ Effect.gen(function* () {
630
+ const error = yield* LLMClient.prepare(
631
+ LLM.request({
632
+ id: "req_unknown_server_tool",
633
+ model,
634
+ messages: [
635
+ Message.assistant([
636
+ {
637
+ type: "tool-result",
638
+ id: "srvtoolu_abc",
639
+ name: "future_server_tool",
640
+ result: { type: "json", value: {} },
641
+ providerExecuted: true,
642
+ },
643
+ ]),
644
+ ],
645
+ }),
646
+ ).pipe(Effect.flip)
647
+
648
+ expect(error.message).toContain("future_server_tool")
649
+ }),
650
+ )
651
+
652
+ it.effect("continues a conversation with user image content", () =>
653
+ Effect.gen(function* () {
654
+ const response = yield* LLMClient.generate(
655
+ LLM.request({
656
+ id: "req_media",
657
+ model,
658
+ messages: [
659
+ Message.user([
660
+ { type: "text", text: "What is in this image?" },
661
+ { type: "media", mediaType: "image/png", data: "AAECAw==" },
662
+ ]),
663
+ ],
664
+ }),
665
+ ).pipe(
666
+ Effect.provide(
667
+ dynamicResponse((input) =>
668
+ Effect.gen(function* () {
669
+ const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie)
670
+ expect(yield* Effect.promise(() => web.json())).toMatchObject({
671
+ messages: [
672
+ {
673
+ role: "user",
674
+ content: [
675
+ { type: "text", text: "What is in this image?" },
676
+ { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } },
677
+ ],
678
+ },
679
+ ],
680
+ })
681
+ return input.respond(
682
+ sseEvents(
683
+ { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
684
+ { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "An image." } },
685
+ { type: "content_block_stop", index: 0 },
686
+ { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 3 } },
687
+ { type: "message_stop" },
688
+ ),
689
+ { headers: { "content-type": "text/event-stream" } },
690
+ )
691
+ }),
692
+ ),
693
+ ),
694
+ )
695
+
696
+ expect(response.text).toBe("An image.")
697
+ }),
698
+ )
699
+
700
+ it.effect("maps ttlSeconds >= 3600 to cache_control ttl: '1h'", () =>
701
+ Effect.gen(function* () {
702
+ const prepared = yield* LLMClient.prepare(
703
+ LLM.request({
704
+ model,
705
+ system: { type: "text", text: "system", cache: new CacheHint({ type: "ephemeral", ttlSeconds: 3600 }) },
706
+ prompt: "hi",
707
+ }),
708
+ )
709
+
710
+ expect(prepared.body).toMatchObject({
711
+ system: [{ type: "text", text: "system", cache_control: { type: "ephemeral", ttl: "1h" } }],
712
+ })
713
+ }),
714
+ )
715
+
716
+ it.effect("emits cache_control on tool definitions and tool-result blocks", () =>
717
+ Effect.gen(function* () {
718
+ const prepared = yield* LLMClient.prepare(
719
+ LLM.request({
720
+ model,
721
+ tools: [
722
+ {
723
+ name: "lookup",
724
+ description: "lookup tool",
725
+ inputSchema: { type: "object", properties: {} },
726
+ cache: new CacheHint({ type: "ephemeral" }),
727
+ },
728
+ ],
729
+ messages: [
730
+ Message.user("What's the weather?"),
731
+ Message.assistant([ToolCallPart.make({ id: "call_1", name: "lookup", input: {} })]),
732
+ Message.tool({
733
+ id: "call_1",
734
+ name: "lookup",
735
+ result: { temp: 72 },
736
+ cache: new CacheHint({ type: "ephemeral" }),
737
+ }),
738
+ ],
739
+ }),
740
+ )
741
+
742
+ expect(prepared.body).toMatchObject({
743
+ tools: [{ name: "lookup", cache_control: { type: "ephemeral" } }],
744
+ messages: [
745
+ { role: "user", content: [{ type: "text", text: "What's the weather?" }] },
746
+ { role: "assistant", content: [{ type: "tool_use", id: "call_1", name: "lookup" }] },
747
+ {
748
+ role: "user",
749
+ content: [{ type: "tool_result", tool_use_id: "call_1", cache_control: { type: "ephemeral" } }],
750
+ },
751
+ ],
752
+ })
753
+ }),
754
+ )
755
+
756
+ it.effect("drops cache_control breakpoints past the 4-per-request cap", () =>
757
+ Effect.gen(function* () {
758
+ const hint = new CacheHint({ type: "ephemeral" })
759
+ const prepared = yield* LLMClient.prepare(
760
+ LLM.request({
761
+ model,
762
+ system: [
763
+ { type: "text", text: "a", cache: hint },
764
+ { type: "text", text: "b", cache: hint },
765
+ { type: "text", text: "c", cache: hint },
766
+ { type: "text", text: "d", cache: hint },
767
+ { type: "text", text: "e", cache: hint },
768
+ { type: "text", text: "f", cache: hint },
769
+ ],
770
+ prompt: "hi",
771
+ }),
772
+ )
773
+
774
+ const system = (prepared.body as { system: Array<{ cache_control?: unknown }> }).system
775
+ const marked = system.filter((part) => part.cache_control !== undefined)
776
+ expect(marked).toHaveLength(4)
777
+ expect(system[4]?.cache_control).toBeUndefined()
778
+ expect(system[5]?.cache_control).toBeUndefined()
779
+ }),
780
+ )
781
+
782
+ it.effect("spends breakpoint budget on tools before system before messages", () =>
783
+ Effect.gen(function* () {
784
+ const hint = new CacheHint({ type: "ephemeral" })
785
+ const prepared = yield* LLMClient.prepare(
786
+ LLM.request({
787
+ model,
788
+ tools: [
789
+ {
790
+ name: "t1",
791
+ description: "t1",
792
+ inputSchema: { type: "object", properties: {} },
793
+ cache: hint,
794
+ },
795
+ {
796
+ name: "t2",
797
+ description: "t2",
798
+ inputSchema: { type: "object", properties: {} },
799
+ cache: hint,
800
+ },
801
+ {
802
+ name: "t3",
803
+ description: "t3",
804
+ inputSchema: { type: "object", properties: {} },
805
+ cache: hint,
806
+ },
807
+ {
808
+ name: "t4",
809
+ description: "t4",
810
+ inputSchema: { type: "object", properties: {} },
811
+ cache: hint,
812
+ },
813
+ ],
814
+ system: [{ type: "text", text: "system-tail", cache: hint }],
815
+ messages: [Message.user([{ type: "text", text: "message-tail", cache: hint }])],
816
+ }),
817
+ )
818
+
819
+ const body = prepared.body as {
820
+ tools: Array<{ cache_control?: unknown }>
821
+ system: Array<{ cache_control?: unknown }>
822
+ messages: Array<{ content: Array<{ cache_control?: unknown }> }>
823
+ }
824
+ expect(body.tools.every((t) => t.cache_control !== undefined)).toBe(true)
825
+ expect(body.system[0]?.cache_control).toBeUndefined()
826
+ expect(body.messages[0]?.content[0]?.cache_control).toBeUndefined()
827
+ }),
828
+ )
829
+ })