@codilore/llm 1.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/AGENTS.md +321 -0
  2. package/README.md +131 -0
  3. package/example/call-sites.md +591 -0
  4. package/example/tutorial.ts +255 -0
  5. package/package.json +50 -0
  6. package/script/recording-cost-report.ts +250 -0
  7. package/script/setup-recording-env.ts +542 -0
  8. package/src/cache-policy.ts +111 -0
  9. package/src/index.ts +32 -0
  10. package/src/llm.ts +186 -0
  11. package/src/protocols/anthropic-messages.ts +841 -0
  12. package/src/protocols/bedrock-converse.ts +649 -0
  13. package/src/protocols/bedrock-event-stream.ts +87 -0
  14. package/src/protocols/gemini.ts +465 -0
  15. package/src/protocols/index.ts +6 -0
  16. package/src/protocols/openai-chat.ts +431 -0
  17. package/src/protocols/openai-compatible-chat.ts +24 -0
  18. package/src/protocols/openai-responses.ts +987 -0
  19. package/src/protocols/shared.ts +283 -0
  20. package/src/protocols/utils/bedrock-auth.ts +70 -0
  21. package/src/protocols/utils/bedrock-cache.ts +37 -0
  22. package/src/protocols/utils/bedrock-media.ts +80 -0
  23. package/src/protocols/utils/cache.ts +16 -0
  24. package/src/protocols/utils/gemini-tool-schema.ts +101 -0
  25. package/src/protocols/utils/lifecycle.ts +102 -0
  26. package/src/protocols/utils/openai-options.ts +84 -0
  27. package/src/protocols/utils/tool-stream.ts +218 -0
  28. package/src/provider.ts +37 -0
  29. package/src/providers/amazon-bedrock.ts +43 -0
  30. package/src/providers/anthropic.ts +35 -0
  31. package/src/providers/azure.ts +110 -0
  32. package/src/providers/cloudflare.ts +127 -0
  33. package/src/providers/github-copilot.ts +66 -0
  34. package/src/providers/google.ts +35 -0
  35. package/src/providers/index.ts +11 -0
  36. package/src/providers/openai-compatible-profile.ts +20 -0
  37. package/src/providers/openai-compatible.ts +65 -0
  38. package/src/providers/openai-options.ts +81 -0
  39. package/src/providers/openai.ts +63 -0
  40. package/src/providers/openrouter.ts +98 -0
  41. package/src/providers/xai.ts +56 -0
  42. package/src/route/auth-options.ts +57 -0
  43. package/src/route/auth.ts +156 -0
  44. package/src/route/client.ts +434 -0
  45. package/src/route/endpoint.ts +53 -0
  46. package/src/route/executor.ts +374 -0
  47. package/src/route/framing.ts +27 -0
  48. package/src/route/index.ts +25 -0
  49. package/src/route/protocol.ts +84 -0
  50. package/src/route/transport/http.ts +108 -0
  51. package/src/route/transport/index.ts +33 -0
  52. package/src/route/transport/websocket.ts +280 -0
  53. package/src/schema/errors.ts +203 -0
  54. package/src/schema/events.ts +370 -0
  55. package/src/schema/ids.ts +43 -0
  56. package/src/schema/index.ts +5 -0
  57. package/src/schema/messages.ts +404 -0
  58. package/src/schema/options.ts +221 -0
  59. package/src/tool-runtime.ts +78 -0
  60. package/src/tool.ts +241 -0
  61. package/src/utils/record.ts +3 -0
  62. package/sst-env.d.ts +10 -0
  63. package/test/adapter.test.ts +164 -0
  64. package/test/auth-options.types.ts +168 -0
  65. package/test/auth.test.ts +103 -0
  66. package/test/cache-policy.test.ts +262 -0
  67. package/test/continuation-scenarios.ts +104 -0
  68. package/test/endpoint.test.ts +58 -0
  69. package/test/executor.test.ts +418 -0
  70. package/test/exports.test.ts +62 -0
  71. package/test/fixtures/media/restroom.png +0 -0
  72. package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
  73. package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
  74. package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
  75. package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
  76. package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
  77. package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
  78. package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
  79. package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
  80. package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
  81. package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
  82. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  83. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
  84. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  85. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
  86. package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
  87. package/test/fixtures/recordings/gemini/streams-text.json +28 -0
  88. package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
  89. package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
  90. package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
  91. package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
  92. package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
  93. package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
  94. package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
  95. package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
  96. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
  97. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
  98. package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
  99. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
  100. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
  101. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
  102. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
  103. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
  104. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
  105. package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
  106. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
  107. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
  108. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
  109. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
  110. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
  111. package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
  112. package/test/generate-object.test.ts +184 -0
  113. package/test/lib/effect.ts +50 -0
  114. package/test/lib/http.ts +98 -0
  115. package/test/lib/openai-chunks.ts +27 -0
  116. package/test/lib/sse.ts +17 -0
  117. package/test/lib/tool-runtime.ts +146 -0
  118. package/test/llm.test.ts +167 -0
  119. package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
  120. package/test/provider/anthropic-messages.recorded.test.ts +46 -0
  121. package/test/provider/anthropic-messages.test.ts +829 -0
  122. package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
  123. package/test/provider/bedrock-converse.test.ts +707 -0
  124. package/test/provider/cloudflare.test.ts +230 -0
  125. package/test/provider/gemini-cache.recorded.test.ts +48 -0
  126. package/test/provider/gemini.test.ts +476 -0
  127. package/test/provider/golden.recorded.test.ts +219 -0
  128. package/test/provider/openai-chat.test.ts +446 -0
  129. package/test/provider/openai-compatible-chat.test.ts +238 -0
  130. package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
  131. package/test/provider/openai-responses.test.ts +1322 -0
  132. package/test/provider/openrouter.test.ts +56 -0
  133. package/test/provider.types.ts +41 -0
  134. package/test/recorded-golden.ts +97 -0
  135. package/test/recorded-runner.ts +100 -0
  136. package/test/recorded-scenarios.ts +531 -0
  137. package/test/recorded-test.ts +74 -0
  138. package/test/recorded-utils.ts +56 -0
  139. package/test/recorded-websocket.ts +26 -0
  140. package/test/route.test.ts +43 -0
  141. package/test/schema.test.ts +97 -0
  142. package/test/tool-runtime.test.ts +802 -0
  143. package/test/tool-stream.test.ts +99 -0
  144. package/test/tool.types.ts +40 -0
  145. package/tsconfig.json +15 -0
@@ -0,0 +1,531 @@
1
+ import { expect } from "bun:test"
2
+ import { Effect, Schema, Stream } from "effect"
3
+ import {
4
+ LLM,
5
+ LLMEvent,
6
+ LLMResponse,
7
+ Message,
8
+ ToolRuntime,
9
+ ToolChoice,
10
+ ToolDefinition,
11
+ toDefinitions,
12
+ type ContentPart,
13
+ type FinishReason,
14
+ type LLMRequest,
15
+ type Model,
16
+ } from "../src"
17
+ import { LLMClient } from "../src/route"
18
+ import { Tool } from "../src/tool"
19
+
20
+ export const weatherToolName = "get_weather"
21
+
22
+ // A deterministic system prompt long enough to clear every supported provider's
23
+ // minimum cacheable-prefix threshold (Anthropic Haiku 3.5: 2048 tokens; Anthropic
24
+ // Opus/Haiku 4.5: 4096 tokens; OpenAI/Gemini/Bedrock: lower). Built by repeating
25
+ // a fixed sentence — the cassette replays bit-for-bit, so the exact text matters
26
+ // only when re-recording with `RECORD=true`.
27
+ export const LARGE_CACHEABLE_SYSTEM = (() => {
28
+ const sentence = "You are a concise, factual assistant. Answer precisely and avoid filler. Cite numbers when known. "
29
+ // ~100 chars per sentence × 250 repeats ≈ 25,000 chars ≈ 5k+ tokens, safely
30
+ // above every provider's threshold.
31
+ return sentence.repeat(250)
32
+ })()
33
+
34
+ export const weatherTool = ToolDefinition.make({
35
+ name: weatherToolName,
36
+ description: "Get current weather for a city.",
37
+ inputSchema: {
38
+ type: "object",
39
+ properties: { city: { type: "string" } },
40
+ required: ["city"],
41
+ additionalProperties: false,
42
+ },
43
+ })
44
+
45
+ export const weatherRuntimeTool = Tool.make({
46
+ description: weatherTool.description,
47
+ parameters: Schema.Struct({ city: Schema.String }),
48
+ success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
49
+ execute: ({ city }) =>
50
+ Effect.succeed(
51
+ city === "Paris" ? { temperature: 22, condition: "sunny" } : { temperature: 0, condition: "unknown" },
52
+ ),
53
+ })
54
+
55
+ export const weatherToolLoopRequest = (input: {
56
+ readonly id: string
57
+ readonly model: Model
58
+ readonly system?: string
59
+ readonly maxTokens?: number
60
+ readonly temperature?: number | false
61
+ }) =>
62
+ LLM.request({
63
+ id: input.id,
64
+ model: input.model,
65
+ system: input.system ?? "Use the get_weather tool, then answer in one short sentence.",
66
+ prompt: "What is the weather in Paris?",
67
+ cache: "none",
68
+ generation:
69
+ input.temperature === false
70
+ ? { maxTokens: input.maxTokens ?? 80 }
71
+ : { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 },
72
+ })
73
+
74
+ export const goldenWeatherToolLoopRequest = (input: {
75
+ readonly id: string
76
+ readonly model: Model
77
+ readonly maxTokens?: number
78
+ readonly temperature?: number | false
79
+ }) =>
80
+ weatherToolLoopRequest({
81
+ ...input,
82
+ system: "Use the get_weather tool exactly once. After the tool result, reply exactly: Paris is sunny.",
83
+ })
84
+
85
+ const RESTROOM_IMAGE_TEXT = "jiggling restroom prison"
86
+ const restroomImage = () =>
87
+ Effect.promise(() => Bun.file(new URL("./fixtures/media/restroom.png", import.meta.url)).bytes()).pipe(
88
+ Effect.map((bytes) => Buffer.from(bytes).toString("base64")),
89
+ )
90
+
91
+ export const runWeatherToolLoop = (request: LLMRequest) =>
92
+ Effect.gen(function* () {
93
+ const tools = { [weatherToolName]: weatherRuntimeTool }
94
+ let next = LLM.updateRequest(request, { tools: toDefinitions(tools) })
95
+ const events: LLMEvent[] = []
96
+
97
+ for (let step = 0; step < 10; step++) {
98
+ const response = yield* LLMClient.generate(next)
99
+ events.push(...response.events.filter((event) => event.type !== "finish"))
100
+ const calls = response.events.filter(LLMEvent.is.toolCall).filter((call) => !call.providerExecuted)
101
+ if (calls.length === 0) {
102
+ const finish = response.events.find(LLMEvent.is.finish)
103
+ if (finish) events.push(finish)
104
+ return events
105
+ }
106
+
107
+ const dispatched = yield* Effect.forEach(calls, (call) =>
108
+ ToolRuntime.dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)),
109
+ )
110
+ events.push(...dispatched.flatMap(([, result]) => result.events))
111
+ next = LLM.updateRequest(next, {
112
+ messages: [
113
+ ...next.messages,
114
+ Message.assistant(assistantContent(response.events)),
115
+ ...dispatched.map(([call, result]) => Message.tool({ id: call.id, name: call.name, result: result.result })),
116
+ ],
117
+ })
118
+ }
119
+
120
+ throw new Error("Weather tool loop exceeded 10 steps")
121
+ })
122
+
123
+ const assistantContent = (events: ReadonlyArray<LLMEvent>) => {
124
+ const content: ContentPart[] = []
125
+ for (const event of events) {
126
+ if (event.type === "text-delta" || event.type === "reasoning-delta") {
127
+ const type = event.type === "text-delta" ? "text" : "reasoning"
128
+ const last = content.at(-1)
129
+ if (last?.type === type) {
130
+ content[content.length - 1] = { ...last, text: `${last.text}${event.text}` }
131
+ } else {
132
+ content.push({ type, text: event.text })
133
+ }
134
+ continue
135
+ }
136
+ if (event.type === "text-end" || event.type === "reasoning-end") {
137
+ const type = event.type === "text-end" ? "text" : "reasoning"
138
+ const last = content.at(-1)
139
+ if (last?.type === type) content[content.length - 1] = { ...last, providerMetadata: event.providerMetadata }
140
+ continue
141
+ }
142
+ if (event.type === "tool-call") content.push(event)
143
+ }
144
+ return content
145
+ }
146
+
147
+ export const expectFinish = (
148
+ events: ReadonlyArray<LLMEvent>,
149
+ reason: Extract<LLMEvent, { readonly type: "finish" }>["reason"],
150
+ ) => expect(events.at(-1)).toMatchObject({ type: "finish", reason })
151
+
152
+ export const expectWeatherToolCall = (response: LLMResponse) =>
153
+ expect(response.toolCalls).toMatchObject([
154
+ { type: "tool-call", id: expect.any(String), name: weatherToolName, input: { city: "Paris" } },
155
+ ])
156
+
157
+ export const expectWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
158
+ const finishes = events.filter(LLMEvent.is.finish)
159
+ expect(finishes).toHaveLength(1)
160
+ expect(finishes[0]?.reason).toBe("stop")
161
+
162
+ const stepFinishes = events.filter(LLMEvent.is.stepFinish)
163
+ expect(stepFinishes.map((event) => event.reason)).toEqual(["tool-calls", "stop"])
164
+
165
+ const toolCalls = events.filter(LLMEvent.is.toolCall)
166
+ expect(toolCalls).toHaveLength(1)
167
+ expect(toolCalls[0]).toMatchObject({ type: "tool-call", name: weatherToolName, input: { city: "Paris" } })
168
+
169
+ const toolResults = events.filter(LLMEvent.is.toolResult)
170
+ expect(toolResults).toHaveLength(1)
171
+ expect(toolResults[0]).toMatchObject({
172
+ type: "tool-result",
173
+ name: weatherToolName,
174
+ result: { type: "json", value: { temperature: 22, condition: "sunny" } },
175
+ })
176
+
177
+ const output = LLMResponse.text({ events })
178
+ expect(output).toContain("Paris")
179
+ expect(output.trim().length).toBeGreaterThan(0)
180
+ }
181
+
182
+ export const expectGoldenWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
183
+ expectWeatherToolLoop(events)
184
+ expect(LLMResponse.text({ events }).trim()).toMatch(/^Paris is sunny\.?$/)
185
+ }
186
+
187
+ export interface GoldenScenarioContext {
188
+ readonly id: string
189
+ readonly model: Model
190
+ readonly maxTokens?: number
191
+ readonly temperature?: number | false
192
+ }
193
+
194
+ const generate = (request: LLMRequest) => LLMClient.generate(request)
195
+
196
+ const generation = (context: GoldenScenarioContext, maxTokens: number) =>
197
+ context.temperature === false ? { maxTokens } : { maxTokens, temperature: context.temperature ?? 0 }
198
+
199
+ const normalizeImageText = (value: string) =>
200
+ value
201
+ .toLowerCase()
202
+ .replace(/[^a-z\s]/g, "")
203
+ .replace(/\s+/g, " ")
204
+ .trim()
205
+
206
+ const encryptedReasoningOptions = {
207
+ openai: {
208
+ store: false,
209
+ include: ["reasoning.encrypted_content"],
210
+ reasoningEffort: "low",
211
+ reasoningSummary: "auto",
212
+ },
213
+ } as const
214
+
215
+ type AssistantTextExpectation = string | RegExp
216
+
217
+ type UserStep = { readonly type: "user"; readonly content: Message.ContentInput }
218
+ type AssistantStep = {
219
+ readonly type: "assistant"
220
+ readonly text?: AssistantTextExpectation
221
+ readonly toolCall?: { readonly name: string; readonly input: unknown }
222
+ readonly reasoning?: "openai-encrypted"
223
+ readonly id?: string
224
+ readonly system?: string
225
+ readonly maxTokens?: number
226
+ readonly finish?: FinishReason
227
+ readonly tools?: LLM.RequestInput["tools"]
228
+ readonly toolChoice?: LLM.RequestInput["toolChoice"]
229
+ readonly providerOptions?: LLMRequest["providerOptions"]
230
+ readonly assert?: (response: LLMResponse) => void
231
+ }
232
+ type ConversationStep = UserStep | AssistantStep
233
+
234
+ const user = (content: Message.ContentInput): ConversationStep => ({ type: "user", content })
235
+
236
+ const assistant = {
237
+ expectText: (
238
+ text: AssistantTextExpectation,
239
+ options?: Omit<AssistantStep, "type" | "text" | "reasoning" | "toolCall">,
240
+ ): ConversationStep => ({ type: "assistant", text, ...options }),
241
+ expectToolCall: (
242
+ name: string,
243
+ input: unknown,
244
+ options?: Omit<AssistantStep, "type" | "text" | "reasoning" | "toolCall" | "finish">,
245
+ ): ConversationStep => ({ type: "assistant", toolCall: { name, input }, finish: "tool-calls", ...options }),
246
+ expectEncryptedReasoningText: (
247
+ text: AssistantTextExpectation,
248
+ options?: Omit<AssistantStep, "type" | "text" | "reasoning" | "toolCall" | "providerOptions">,
249
+ ): ConversationStep => ({
250
+ type: "assistant",
251
+ text,
252
+ reasoning: "openai-encrypted",
253
+ providerOptions: encryptedReasoningOptions,
254
+ ...options,
255
+ }),
256
+ }
257
+
258
+ const assertAssistantText = (actual: string, expected: AssistantTextExpectation) => {
259
+ if (typeof expected === "string") {
260
+ expect(actual.trim()).toBe(expected)
261
+ return
262
+ }
263
+ expect(actual.trim()).toMatch(expected)
264
+ }
265
+
266
+ const assertAssistantToolCall = (response: LLMResponse, expected: NonNullable<AssistantStep["toolCall"]>) => {
267
+ expect(response.toolCalls).toMatchObject([
268
+ { type: "tool-call", id: expect.any(String), name: expected.name, input: expected.input },
269
+ ])
270
+ }
271
+
272
+ // The generated golden scenarios only model one assistant shape at a time:
273
+ // encrypted reasoning + text, text, or tool call. Keep mixed interleavings in
274
+ // focused protocol tests where event order can be asserted directly.
275
+ const assistantMessageFromResponse = (response: LLMResponse, step: AssistantStep) => {
276
+ const content: ContentPart[] = []
277
+ if (step.reasoning === "openai-encrypted") {
278
+ const reasoning = response.events.find(
279
+ (event): event is Extract<LLMEvent, { readonly type: "reasoning-end" }> =>
280
+ LLMEvent.is.reasoningEnd(event) && typeof event.providerMetadata?.openai?.itemId === "string",
281
+ )
282
+ if (!reasoning) throw new Error("OpenAI Responses did not return reasoning metadata")
283
+ expect(reasoning.providerMetadata?.openai?.reasoningEncryptedContent).toEqual(expect.any(String))
284
+ content.push({ type: "reasoning", text: response.reasoning, providerMetadata: reasoning.providerMetadata })
285
+ }
286
+
287
+ if (response.text.length > 0) content.push({ type: "text", text: response.text })
288
+ content.push(...response.toolCalls)
289
+ return Message.assistant(content)
290
+ }
291
+
292
+ const runGeneratedConversation = (context: GoldenScenarioContext, steps: ReadonlyArray<ConversationStep>) =>
293
+ Effect.gen(function* () {
294
+ const messages: Message[] = []
295
+ let generated = 0
296
+ for (const step of steps) {
297
+ if (step.type === "user") {
298
+ messages.push(Message.user(step.content))
299
+ continue
300
+ }
301
+
302
+ generated += 1
303
+ const response = yield* generate(
304
+ LLM.request({
305
+ id: step.id ? `${context.id}_${step.id}` : `${context.id}_${generated}`,
306
+ model: context.model,
307
+ system: step.system,
308
+ cache: "none",
309
+ messages,
310
+ tools: step.tools,
311
+ toolChoice: step.toolChoice,
312
+ providerOptions: step.providerOptions,
313
+ generation: generation(context, step.maxTokens ?? context.maxTokens ?? 80),
314
+ }),
315
+ )
316
+ if (step.text !== undefined) assertAssistantText(response.text, step.text)
317
+ if (step.toolCall) assertAssistantToolCall(response, step.toolCall)
318
+ step.assert?.(response)
319
+ expectFinish(response.events, step.finish ?? "stop")
320
+ messages.push(assistantMessageFromResponse(response, step))
321
+ }
322
+ })
323
+
324
+ const runTextScenario = (context: GoldenScenarioContext) =>
325
+ runGeneratedConversation(context, [
326
+ user("Reply exactly with: Hello!"),
327
+ assistant.expectText(/^Hello!?$/, {
328
+ system: "You are concise.",
329
+ maxTokens: context.maxTokens ?? 40,
330
+ providerOptions:
331
+ context.model.route.id === "gemini" ? { gemini: { thinkingConfig: { thinkingBudget: 0 } } } : undefined,
332
+ }),
333
+ ])
334
+
335
+ const runToolCallScenario = (context: GoldenScenarioContext) =>
336
+ runGeneratedConversation(context, [
337
+ user("Call get_weather with city exactly Paris."),
338
+ assistant.expectToolCall(
339
+ weatherToolName,
340
+ { city: "Paris" },
341
+ {
342
+ system: "Call tools exactly as requested.",
343
+ tools: [weatherTool],
344
+ toolChoice: ToolChoice.make(weatherTool),
345
+ maxTokens: context.maxTokens ?? 80,
346
+ },
347
+ ),
348
+ ])
349
+
350
+ const runImageScenario = (context: GoldenScenarioContext) =>
351
+ Effect.gen(function* () {
352
+ yield* runGeneratedConversation(context, [
353
+ user([
354
+ {
355
+ type: "text",
356
+ text: "The image contains exactly three lowercase English words. Read them left to right and reply with only those words.",
357
+ },
358
+ { type: "media", mediaType: "image/png", data: yield* restroomImage() },
359
+ ]),
360
+ assistant.expectText(/.+/, {
361
+ system: "Read images carefully. Reply only with the visible text.",
362
+ maxTokens: context.maxTokens ?? 20,
363
+ assert: (response) => expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT),
364
+ }),
365
+ ])
366
+ })
367
+
368
+ // Reproduces a tool-result image round trip: a tool returns image bytes, and
369
+ // the next model turn must receive provider-native image content instead of a
370
+ // JSON-stringified base64 blob.
371
+ const screenshotToolName = "read_screenshot"
372
+ const runImageToolResultScenario = (context: GoldenScenarioContext) =>
373
+ Effect.gen(function* () {
374
+ const image = yield* restroomImage()
375
+ const response = yield* generate(
376
+ LLM.request({
377
+ id: `${context.id}_image_tool_result`,
378
+ model: context.model,
379
+ system: "Read images carefully. Reply only with the visible text, lowercase, no punctuation.",
380
+ cache: "none",
381
+ generation: generation(context, context.maxTokens ?? 40),
382
+ messages: [
383
+ Message.user("Use the read_screenshot tool, then reply with the words shown."),
384
+ Message.assistant([{ type: "tool-call", id: "call_screenshot_1", name: screenshotToolName, input: {} }]),
385
+ Message.tool({
386
+ id: "call_screenshot_1",
387
+ name: screenshotToolName,
388
+ resultType: "content",
389
+ result: [
390
+ { type: "text", text: "Image read successfully" },
391
+ { type: "media", mediaType: "image/png", data: image },
392
+ ],
393
+ }),
394
+ ],
395
+ tools: [
396
+ ToolDefinition.make({
397
+ name: screenshotToolName,
398
+ description: "Capture a screenshot of the current screen.",
399
+ inputSchema: { type: "object", properties: {}, additionalProperties: false },
400
+ }),
401
+ ],
402
+ }),
403
+ )
404
+
405
+ expectFinish(response.events, "stop")
406
+ expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT)
407
+ })
408
+
409
+ const runReasoningScenario = (context: GoldenScenarioContext) =>
410
+ runGeneratedConversation(context, [
411
+ user("Think briefly, then reply exactly with: Hello!"),
412
+ assistant.expectText(/^Hello!?$/, {
413
+ system: "Show concise reasoning when the provider supports visible reasoning summaries.",
414
+ providerOptions: { openai: { reasoningEffort: "low", reasoningSummary: "auto" } },
415
+ maxTokens: context.maxTokens ?? 120,
416
+ assert: (response) => expect(response.usage?.reasoningTokens ?? 0).toBeGreaterThan(0),
417
+ }),
418
+ ])
419
+
420
+ const runReasoningContinuationScenario = (context: GoldenScenarioContext) =>
421
+ runGeneratedConversation(context, [
422
+ user("Think briefly, then reply exactly with: Hello!"),
423
+ assistant.expectEncryptedReasoningText(/^Hello!?$/, {
424
+ id: "first",
425
+ system: "Show concise reasoning when the provider supports visible reasoning summaries.",
426
+ maxTokens: context.maxTokens ?? 120,
427
+ }),
428
+ user("Now reply exactly with: Done."),
429
+ assistant.expectText(/^Done\.?$/, { id: "second", maxTokens: 40, providerOptions: encryptedReasoningOptions }),
430
+ ])
431
+
432
+ const runToolLoopScenario = (context: GoldenScenarioContext) =>
433
+ Effect.gen(function* () {
434
+ expectGoldenWeatherToolLoop(
435
+ yield* runWeatherToolLoop(
436
+ goldenWeatherToolLoopRequest({
437
+ id: context.id,
438
+ model: context.model,
439
+ maxTokens: context.maxTokens ?? 80,
440
+ temperature: context.temperature,
441
+ }),
442
+ ),
443
+ )
444
+ })
445
+
446
+ const goldenScenarios = {
447
+ text: { title: "streams text", tags: ["text", "golden"], run: runTextScenario },
448
+ "tool-call": { title: "streams tool call", tags: ["tool", "tool-call", "golden"], run: runToolCallScenario },
449
+ "tool-loop": { title: "drives a tool loop", tags: ["tool", "tool-loop", "golden"], run: runToolLoopScenario },
450
+ image: { title: "reads image text", tags: ["media", "image", "vision", "golden"], run: runImageScenario },
451
+ "image-tool-result": {
452
+ title: "reads image returned from tool result",
453
+ tags: ["media", "image", "vision", "tool", "tool-result", "golden"],
454
+ run: runImageToolResultScenario,
455
+ },
456
+ reasoning: { title: "uses reasoning", tags: ["reasoning", "golden"], run: runReasoningScenario },
457
+ "reasoning-continuation": {
458
+ title: "continues encrypted reasoning",
459
+ tags: ["reasoning", "continuation", "encrypted-reasoning", "golden"],
460
+ run: runReasoningContinuationScenario,
461
+ },
462
+ } as const
463
+
464
+ export type GoldenScenarioID = keyof typeof goldenScenarios
465
+ export const goldenScenarioTitle = (id: GoldenScenarioID) => goldenScenarios[id].title
466
+ export const goldenScenarioTags = (id: GoldenScenarioID) => [...goldenScenarios[id].tags]
467
+ export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) =>
468
+ goldenScenarios[id].run(context)
469
+
470
+ const usageSummary = (usage: LLMResponse["usage"] | undefined) => {
471
+ if (!usage) return undefined
472
+ return Object.fromEntries(
473
+ [
474
+ ["inputTokens", usage.inputTokens],
475
+ ["outputTokens", usage.outputTokens],
476
+ ["reasoningTokens", usage.reasoningTokens],
477
+ ["cacheReadInputTokens", usage.cacheReadInputTokens],
478
+ ["cacheWriteInputTokens", usage.cacheWriteInputTokens],
479
+ ["totalTokens", usage.totalTokens],
480
+ ].filter((entry) => entry[1] !== undefined),
481
+ )
482
+ }
483
+
484
+ const pushText = (summary: Array<Record<string, unknown>>, type: "text" | "reasoning", value: string) => {
485
+ const last = summary.at(-1)
486
+ if (last?.type === type) {
487
+ last.value = `${typeof last.value === "string" ? last.value : ""}${value}`
488
+ return
489
+ }
490
+ summary.push({ type, value })
491
+ }
492
+
493
+ export const eventSummary = (events: ReadonlyArray<LLMEvent>) => {
494
+ const summary: Array<Record<string, unknown>> = []
495
+ for (const event of events) {
496
+ if (event.type === "text-delta") {
497
+ pushText(summary, "text", event.text)
498
+ continue
499
+ }
500
+ if (event.type === "reasoning-delta") {
501
+ pushText(summary, "reasoning", event.text)
502
+ continue
503
+ }
504
+ if (event.type === "tool-call") {
505
+ summary.push({
506
+ type: "tool-call",
507
+ name: event.name,
508
+ input: event.input,
509
+ providerExecuted: event.providerExecuted,
510
+ })
511
+ continue
512
+ }
513
+ if (event.type === "tool-result") {
514
+ summary.push({
515
+ type: "tool-result",
516
+ name: event.name,
517
+ result: event.result,
518
+ providerExecuted: event.providerExecuted,
519
+ })
520
+ continue
521
+ }
522
+ if (event.type === "tool-error") {
523
+ summary.push({ type: "tool-error", name: event.name, message: event.message })
524
+ continue
525
+ }
526
+ if (event.type === "finish") {
527
+ summary.push({ type: "finish", reason: event.reason, usage: usageSummary(event.usage) })
528
+ }
529
+ }
530
+ return summary.map((item) => Object.fromEntries(Object.entries(item).filter((entry) => entry[1] !== undefined)))
531
+ }
@@ -0,0 +1,74 @@
1
+ import { NodeFileSystem } from "@effect/platform-node"
2
+ import { HttpRecorder } from "@codilore/http-recorder"
3
+ import { Layer } from "effect"
4
+ import { FetchHttpClient } from "effect/unstable/http"
5
+ import * as path from "node:path"
6
+ import { fileURLToPath } from "node:url"
7
+ import { LLMClient, RequestExecutor } from "../src/route"
8
+ import type { Service as LLMClientService } from "../src/route/client"
9
+ import type { Service as RequestExecutorService } from "../src/route/executor"
10
+ import type { Service as WebSocketExecutorService } from "../src/route/transport/websocket"
11
+ import {
12
+ recordedEffectGroup,
13
+ type RecordedCaseOptions as RunnerCaseOptions,
14
+ type RecordedGroupOptions,
15
+ } from "./recorded-runner"
16
+ import { webSocketCassetteLayer } from "./recorded-websocket"
17
+
18
+ const __dirname = path.dirname(fileURLToPath(import.meta.url))
19
+ const FIXTURES_DIR = path.resolve(__dirname, "fixtures", "recordings")
20
+
21
+ type RecordedEnv = RequestExecutorService | WebSocketExecutorService | LLMClientService
22
+
23
+ type RecordedTestsOptions = RecordedGroupOptions & {
24
+ readonly options?: HttpRecorder.RecordReplayOptions
25
+ }
26
+
27
+ type RecordedCaseOptions = RunnerCaseOptions & {
28
+ readonly options?: HttpRecorder.RecordReplayOptions
29
+ }
30
+
31
+ const mergeOptions = (
32
+ base: HttpRecorder.RecordReplayOptions | undefined,
33
+ override: HttpRecorder.RecordReplayOptions | undefined,
34
+ ) => {
35
+ if (!base) return override
36
+ if (!override) return base
37
+ return {
38
+ ...base,
39
+ ...override,
40
+ metadata: base.metadata || override.metadata ? { ...base.metadata, ...override.metadata } : undefined,
41
+ }
42
+ }
43
+
44
+ export const recordedTests = (options: RecordedTestsOptions) =>
45
+ recordedEffectGroup<RecordedEnv, never, RecordedTestsOptions, RecordedCaseOptions>({
46
+ duplicateLabel: "recorded cassette",
47
+ options,
48
+ cassetteExists: (cassette) => HttpRecorder.hasCassetteSync(cassette, { directory: FIXTURES_DIR }),
49
+ layer: ({ cassette, metadata, options, caseOptions, recording }) => {
50
+ const recorderOptions = mergeOptions(options.options, caseOptions.options)
51
+ const recorderMetadata = {
52
+ ...recorderOptions?.metadata,
53
+ ...metadata,
54
+ }
55
+ const mode = recorderOptions?.mode ?? (recording ? "record" : "replay")
56
+ const cassetteService = HttpRecorder.Cassette.fileSystem({ directory: FIXTURES_DIR }).pipe(
57
+ Layer.provide(NodeFileSystem.layer),
58
+ )
59
+ const requestExecutor = RequestExecutor.layer.pipe(
60
+ Layer.provide(
61
+ HttpRecorder.recordingLayer(cassette, {
62
+ ...recorderOptions,
63
+ mode,
64
+ metadata: recorderMetadata,
65
+ }).pipe(Layer.provide(FetchHttpClient.layer)),
66
+ ),
67
+ )
68
+ const deps = Layer.mergeAll(
69
+ requestExecutor,
70
+ webSocketCassetteLayer(cassette, { metadata: recorderMetadata, mode }),
71
+ )
72
+ return Layer.mergeAll(deps, LLMClient.layer.pipe(Layer.provide(deps))).pipe(Layer.provide(cassetteService))
73
+ },
74
+ })
@@ -0,0 +1,56 @@
1
+ export const kebab = (value: string) =>
2
+ value
3
+ .trim()
4
+ .replace(/['"]/g, "")
5
+ .replace(/[^a-zA-Z0-9]+/g, "-")
6
+ .replace(/^-|-$/g, "")
7
+ .toLowerCase()
8
+
9
+ export const missingEnv = (names: ReadonlyArray<string>) => names.filter((name) => !process.env[name])
10
+
11
+ export const envList = (name: string) =>
12
+ (process.env[name] ?? "")
13
+ .split(",")
14
+ .map((item) => item.trim().toLowerCase())
15
+ .filter((item) => item !== "")
16
+
17
+ export const unique = (items: ReadonlyArray<string>) => Array.from(new Set(items))
18
+
19
+ export const classifiedTags = (input: {
20
+ readonly prefix?: string
21
+ readonly provider?: string
22
+ readonly protocol?: string
23
+ readonly tags?: ReadonlyArray<string>
24
+ }) =>
25
+ unique([
26
+ ...(input.prefix ? [`prefix:${input.prefix}`] : []),
27
+ ...(input.provider ? [`provider:${input.provider}`] : []),
28
+ ...(input.protocol ? [`protocol:${input.protocol}`] : []),
29
+ ...(input.tags ?? []),
30
+ ])
31
+
32
+ export const matchesSelected = (input: {
33
+ readonly prefix: string
34
+ readonly name: string
35
+ readonly cassette: string
36
+ readonly tags: ReadonlyArray<string>
37
+ }) => {
38
+ const prefixes = envList("RECORDED_PREFIX")
39
+ const providers = envList("RECORDED_PROVIDER")
40
+ const requiredTags = envList("RECORDED_TAGS")
41
+ const tests = envList("RECORDED_TEST")
42
+ const tags = input.tags.map((tag) => tag.toLowerCase())
43
+ const names = [input.name, kebab(input.name), input.cassette].map((item) => item.toLowerCase())
44
+
45
+ if (prefixes.length > 0 && !prefixes.includes(input.prefix.toLowerCase())) return false
46
+ if (providers.length > 0 && !providers.some((provider) => tags.includes(`provider:${provider}`))) return false
47
+ if (requiredTags.length > 0 && !requiredTags.every((tag) => tags.includes(tag))) return false
48
+ if (tests.length > 0 && !tests.some((test) => names.some((name) => name.includes(test)))) return false
49
+ return true
50
+ }
51
+
52
+ export const cassetteName = (
53
+ prefix: string,
54
+ name: string,
55
+ options: { readonly cassette?: string; readonly id?: string },
56
+ ) => options.cassette ?? `${prefix}/${options.id ?? kebab(name)}`
@@ -0,0 +1,26 @@
1
+ import { Cassette, makeWebSocketExecutor, type RecordReplayMode } from "@codilore/http-recorder"
2
+ import { Effect, Layer } from "effect"
3
+ import { WebSocketExecutor } from "../src/route"
4
+ import type { Service as WebSocketExecutorService } from "../src/route/transport/websocket"
5
+
6
+ const liveWebSocket = WebSocketExecutor.open
7
+
8
+ export const webSocketCassetteLayer = (
9
+ cassette: string,
10
+ input: { readonly metadata?: Record<string, unknown>; readonly mode: RecordReplayMode },
11
+ ): Layer.Layer<WebSocketExecutorService, never, Cassette.Service> =>
12
+ Layer.effect(
13
+ WebSocketExecutor.Service,
14
+ Effect.gen(function* () {
15
+ const cassetteService = yield* Cassette.Service
16
+ const executor = yield* makeWebSocketExecutor({
17
+ name: cassette,
18
+ mode: input.mode,
19
+ metadata: input.metadata,
20
+ cassette: cassetteService,
21
+ live: { open: liveWebSocket },
22
+ compareClientMessagesAsJson: true,
23
+ })
24
+ return WebSocketExecutor.Service.of(executor)
25
+ }),
26
+ )