@codilore/llm 1.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/AGENTS.md +321 -0
  2. package/README.md +131 -0
  3. package/example/call-sites.md +591 -0
  4. package/example/tutorial.ts +255 -0
  5. package/package.json +50 -0
  6. package/script/recording-cost-report.ts +250 -0
  7. package/script/setup-recording-env.ts +542 -0
  8. package/src/cache-policy.ts +111 -0
  9. package/src/index.ts +32 -0
  10. package/src/llm.ts +186 -0
  11. package/src/protocols/anthropic-messages.ts +841 -0
  12. package/src/protocols/bedrock-converse.ts +649 -0
  13. package/src/protocols/bedrock-event-stream.ts +87 -0
  14. package/src/protocols/gemini.ts +465 -0
  15. package/src/protocols/index.ts +6 -0
  16. package/src/protocols/openai-chat.ts +431 -0
  17. package/src/protocols/openai-compatible-chat.ts +24 -0
  18. package/src/protocols/openai-responses.ts +987 -0
  19. package/src/protocols/shared.ts +283 -0
  20. package/src/protocols/utils/bedrock-auth.ts +70 -0
  21. package/src/protocols/utils/bedrock-cache.ts +37 -0
  22. package/src/protocols/utils/bedrock-media.ts +80 -0
  23. package/src/protocols/utils/cache.ts +16 -0
  24. package/src/protocols/utils/gemini-tool-schema.ts +101 -0
  25. package/src/protocols/utils/lifecycle.ts +102 -0
  26. package/src/protocols/utils/openai-options.ts +84 -0
  27. package/src/protocols/utils/tool-stream.ts +218 -0
  28. package/src/provider.ts +37 -0
  29. package/src/providers/amazon-bedrock.ts +43 -0
  30. package/src/providers/anthropic.ts +35 -0
  31. package/src/providers/azure.ts +110 -0
  32. package/src/providers/cloudflare.ts +127 -0
  33. package/src/providers/github-copilot.ts +66 -0
  34. package/src/providers/google.ts +35 -0
  35. package/src/providers/index.ts +11 -0
  36. package/src/providers/openai-compatible-profile.ts +20 -0
  37. package/src/providers/openai-compatible.ts +65 -0
  38. package/src/providers/openai-options.ts +81 -0
  39. package/src/providers/openai.ts +63 -0
  40. package/src/providers/openrouter.ts +98 -0
  41. package/src/providers/xai.ts +56 -0
  42. package/src/route/auth-options.ts +57 -0
  43. package/src/route/auth.ts +156 -0
  44. package/src/route/client.ts +434 -0
  45. package/src/route/endpoint.ts +53 -0
  46. package/src/route/executor.ts +374 -0
  47. package/src/route/framing.ts +27 -0
  48. package/src/route/index.ts +25 -0
  49. package/src/route/protocol.ts +84 -0
  50. package/src/route/transport/http.ts +108 -0
  51. package/src/route/transport/index.ts +33 -0
  52. package/src/route/transport/websocket.ts +280 -0
  53. package/src/schema/errors.ts +203 -0
  54. package/src/schema/events.ts +370 -0
  55. package/src/schema/ids.ts +43 -0
  56. package/src/schema/index.ts +5 -0
  57. package/src/schema/messages.ts +404 -0
  58. package/src/schema/options.ts +221 -0
  59. package/src/tool-runtime.ts +78 -0
  60. package/src/tool.ts +241 -0
  61. package/src/utils/record.ts +3 -0
  62. package/sst-env.d.ts +10 -0
  63. package/test/adapter.test.ts +164 -0
  64. package/test/auth-options.types.ts +168 -0
  65. package/test/auth.test.ts +103 -0
  66. package/test/cache-policy.test.ts +262 -0
  67. package/test/continuation-scenarios.ts +104 -0
  68. package/test/endpoint.test.ts +58 -0
  69. package/test/executor.test.ts +418 -0
  70. package/test/exports.test.ts +62 -0
  71. package/test/fixtures/media/restroom.png +0 -0
  72. package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
  73. package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
  74. package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
  75. package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
  76. package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
  77. package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
  78. package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
  79. package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
  80. package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
  81. package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
  82. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  83. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
  84. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  85. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
  86. package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
  87. package/test/fixtures/recordings/gemini/streams-text.json +28 -0
  88. package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
  89. package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
  90. package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
  91. package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
  92. package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
  93. package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
  94. package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
  95. package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
  96. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
  97. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
  98. package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
  99. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
  100. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
  101. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
  102. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
  103. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
  104. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
  105. package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
  106. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
  107. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
  108. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
  109. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
  110. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
  111. package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
  112. package/test/generate-object.test.ts +184 -0
  113. package/test/lib/effect.ts +50 -0
  114. package/test/lib/http.ts +98 -0
  115. package/test/lib/openai-chunks.ts +27 -0
  116. package/test/lib/sse.ts +17 -0
  117. package/test/lib/tool-runtime.ts +146 -0
  118. package/test/llm.test.ts +167 -0
  119. package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
  120. package/test/provider/anthropic-messages.recorded.test.ts +46 -0
  121. package/test/provider/anthropic-messages.test.ts +829 -0
  122. package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
  123. package/test/provider/bedrock-converse.test.ts +707 -0
  124. package/test/provider/cloudflare.test.ts +230 -0
  125. package/test/provider/gemini-cache.recorded.test.ts +48 -0
  126. package/test/provider/gemini.test.ts +476 -0
  127. package/test/provider/golden.recorded.test.ts +219 -0
  128. package/test/provider/openai-chat.test.ts +446 -0
  129. package/test/provider/openai-compatible-chat.test.ts +238 -0
  130. package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
  131. package/test/provider/openai-responses.test.ts +1322 -0
  132. package/test/provider/openrouter.test.ts +56 -0
  133. package/test/provider.types.ts +41 -0
  134. package/test/recorded-golden.ts +97 -0
  135. package/test/recorded-runner.ts +100 -0
  136. package/test/recorded-scenarios.ts +531 -0
  137. package/test/recorded-test.ts +74 -0
  138. package/test/recorded-utils.ts +56 -0
  139. package/test/recorded-websocket.ts +26 -0
  140. package/test/route.test.ts +43 -0
  141. package/test/schema.test.ts +97 -0
  142. package/test/tool-runtime.test.ts +802 -0
  143. package/test/tool-stream.test.ts +99 -0
  144. package/test/tool.types.ts +40 -0
  145. package/tsconfig.json +15 -0
@@ -0,0 +1,591 @@
1
+ # LLM Call Site Sketches
2
+
3
+ Scratchpad for examples first, abstractions second. Current direction: routes
4
+ execute, provider facades organize configured route sets, and models carry route
5
+ values directly.
6
+
7
+ ## Conversation Summary
8
+
9
+ Kit and Aidan want provider-specific LLM behavior to move out of Codilore's AI
10
+ SDK transform path and into `packages/llm` where possible. The goal is not a big
11
+ generic transform layer; the goal is small composable route definitions backed by
12
+ recorded golden tests.
13
+
14
+ Things to keep testing against:
15
+
16
+ - Cache placement: `cache: "auto"`, manual cache breakpoints, provider cache usage.
17
+ - Images: golden image tests for providers/protocols that claim image support.
18
+ - Reasoning: canonical reasoning parts/events versus provider-native knobs.
19
+ - Auth: bearer, custom headers, multiple credentials, query auth, SigV4, OAuth, no auth.
20
+ - OpenAI-compatible providers: DeepSeek, Together, Groq, Alibaba/DashScope, custom routers.
21
+ - Provider switching: stale signatures, encrypted reasoning, provider metadata, incompatible parts.
22
+ - Error quality: typed errors instead of generic SDK/server failures.
23
+
24
+ ## Final Guide: Routes Execute, Providers Organize
25
+
26
+ Do not introduce a first-class `Deployment` abstraction unless it gains real
27
+ semantics. Provider facades are ergonomic configured route groups, not execution
28
+ registries. The executable/composable thing is still a route. Do not make route
29
+ construction publish to a global registry; models should carry their route value
30
+ directly.
31
+
32
+ Keep durable identity separate from runtime capability:
33
+
34
+ - Durable identity is small serializable data like `{ providerID, modelID }` for
35
+ config, sessions, logs, and catalogs.
36
+ - Runtime capability is a `Model` with a route value, protocol, transport, auth,
37
+ and defaults. It is allowed to contain functions and schemas.
38
+ - If persisted identity needs to become executable, resolve it through an app
39
+ boundary first. Do not make `LLMRequest` recover behavior from a global route
40
+ side table.
41
+
42
+ Keep unconfigured behavior values as values, not factories. A transport like
43
+ `HttpTransport.sseJson` should be a reusable immutable value. Use a function only
44
+ when the caller supplies options or when construction needs fresh state.
45
+
46
+ Use constants to remove repetition before inventing abstractions. Provider ids
47
+ are branded once per provider facade and reused across routes; a plain exported
48
+ object is enough for the provider-facing API unless a helper earns its keep by
49
+ removing repeated route projection.
50
+
51
+ Expose default configured provider instances, and put provider-specific setup on
52
+ `.configure(...)`. Model selectors stay pure: `model(id)`, `responses(id)`,
53
+ `chat(id)`, etc. Endpoint/auth/resource/api-version configuration happens before
54
+ model selection, not as a second argument to model selection.
55
+
56
+ Use provider/product facades consistently:
57
+
58
+ - One coherent provider/product config surface gets one top-level facade.
59
+ - APIs/model kinds that share that config are methods on the facade.
60
+ - Different products with different required config get separate top-level
61
+ facades, not a shared namespace with unrelated children.
62
+ - Default facades are exposed only when concrete defaults or lazy env/credential
63
+ defaults make the facade valid.
64
+
65
+ Examples:
66
+
67
+ ```ts
68
+ OpenAI.responses("gpt-4o")
69
+ OpenAI.chat("gpt-4o")
70
+ OpenAI.responsesWebSocket("gpt-4o")
71
+
72
+ Azure.configure({ resourceName, apiKey }).responses("my-deployment")
73
+ AmazonBedrock.configure({ region, credentials }).model("anthropic.claude-3-5-sonnet-20241022-v2:0")
74
+
75
+ CloudflareAIGateway.configure({ accountId, gatewayId, gatewayApiKey, apiKey }).model("openai/gpt-4o")
76
+ CloudflareWorkersAI.configure({ accountId, apiKey }).model("@cf/meta/llama-3.1-8b-instruct")
77
+
78
+ OpenAICompatible.configure({
79
+ provider: "custom",
80
+ baseURL: "https://custom.example/v1",
81
+ auth: Auth.bearer(apiKey),
82
+ }).model("custom-model")
83
+ ```
84
+
85
+ Standardize the provider facade contract before abstracting construction. A
86
+ plain object is enough at first; add a helper only if repeated route projection
87
+ starts hiding the real provider-specific config.
88
+
89
+ `Route.with(...)` patch semantics should be boring and explicit:
90
+
91
+ - Omitted fields inherit from the original route.
92
+ - `endpoint` patches merge with the existing endpoint, so overriding `baseURL`
93
+ keeps the existing `path`.
94
+ - `endpoint.query` merges by default; later values win.
95
+ - `auth` replaces.
96
+ - `headers` merge by default; undefined values are omitted.
97
+ - `id` is optional in patches. Route ids are diagnostic/provider API labels, not
98
+ global runtime registry keys.
99
+
100
+ 1. **Route**
101
+ - route id
102
+ - provider id
103
+ - protocol
104
+ - body schema
105
+ - body builder
106
+ - stream event schema
107
+ - parser/state machine
108
+ - transport
109
+ - method / IO shape
110
+ - framing
111
+ - request preparation
112
+ - constants when unconfigured; functions only when configured
113
+ - endpoint
114
+ - base URL
115
+ - static path
116
+ - body/model-derived path
117
+ - query params
118
+ - auth
119
+ - bearer
120
+ - custom header
121
+ - multiple credentials
122
+ - SigV4
123
+ - none
124
+ - defaults
125
+ - headers
126
+ - generation defaults
127
+ - provider options
128
+ - limits
129
+ 2. **Provider Facade**
130
+ - default configured provider instance
131
+ - provider-specific `.configure(...)`
132
+ - plain object/function facade over one or more routes
133
+ - top-level export only when it represents one coherent config surface
134
+ - no passive `Provider.make(...)` wrapper unless it gains runtime behavior
135
+ 3. **Model Selector**
136
+ - route/provider-owned selector
137
+ - accepts model id only
138
+ - returns executable models
139
+ - does not accept endpoint/auth/deployment overrides
140
+ 4. **Model**
141
+ - model id
142
+ - route value
143
+ - provider id
144
+ - configured route value at selection time
145
+ 5. **LLM Request**
146
+ - model
147
+ - messages/tools
148
+ - generation/cache/reasoning/response-format options
149
+ - request-level HTTP overlays for per-request headers/query/body additions,
150
+ not provider endpoint/auth reconfiguration
151
+ 6. **Compile**
152
+ - read route from model
153
+ - merge route defaults and request overrides
154
+ - build final URL from route endpoint
155
+ - apply auth from the configured route
156
+ - build body with protocol
157
+ - execute with transport and parse with protocol
158
+
159
+ ## Provider Facade Shape
160
+
161
+ The provider abstraction is a facade over configured routes, not the runtime
162
+ execution mechanism:
163
+
164
+ ```ts
165
+ type ProviderFacade<APIs, Config> = {
166
+ readonly id: ProviderID
167
+ readonly model: (id: string) => Model
168
+ readonly configure: (input?: Config) => ProviderFacade<APIs, Config>
169
+ } & APIs
170
+ ```
171
+
172
+ Manual construction is fine and should be the default until duplication earns a
173
+ helper:
174
+
175
+ ```ts
176
+ export const OpenAI = {
177
+ id: openAIProvider,
178
+ model: openAIResponses.model,
179
+ responses: openAIResponses.model,
180
+ chat: openAIChat.model,
181
+ configure: configureOpenAI,
182
+ } satisfies ProviderFacade<
183
+ {
184
+ responses: (id: string) => Model
185
+ chat: (id: string) => Model
186
+ },
187
+ OpenAIConfig
188
+ >
189
+ ```
190
+
191
+ If several providers repeat the same projection from route values to model
192
+ methods, the helper can stay deliberately tiny:
193
+
194
+ ```ts
195
+ const configureOpenAI = (input: OpenAIConfig = {}) =>
196
+ Provider.define({
197
+ id: openAIProvider,
198
+ routes: {
199
+ responses: openAIResponses.with(openAIConfig(input)),
200
+ chat: openAIChat.with(openAIConfig(input)),
201
+ },
202
+ default: "responses",
203
+ configure: configureOpenAI,
204
+ })
205
+
206
+ export const OpenAI = configureOpenAI()
207
+ ```
208
+
209
+ `Provider.define(...)` would only project route methods and preserve types:
210
+
211
+ ```ts
212
+ OpenAI.model("gpt-4o")
213
+ OpenAI.responses("gpt-4o")
214
+ OpenAI.chat("gpt-4o")
215
+ OpenAI.configure({ apiKey }).responses("gpt-4o")
216
+ ```
217
+
218
+ It must not register routes, select routes dynamically, or participate in
219
+ execution. Execution still reads the route value carried by the model.
220
+
221
+ ## Ideal Call Sites
222
+
223
+ Define concrete routes for a native provider, then project them through a
224
+ provider facade:
225
+
226
+ ```ts
227
+ const openAIProvider = ProviderID.make("openai")
228
+
229
+ const openAIResponses = Route.make({
230
+ id: "openai-responses",
231
+ provider: openAIProvider,
232
+ protocol: OpenAIResponses.protocol,
233
+ transport: HttpTransport.sseJson,
234
+ endpoint: {
235
+ baseURL: "https://api.openai.com/v1",
236
+ path: "/responses",
237
+ },
238
+ auth: Auth.envBearer("OPENAI_API_KEY"),
239
+ })
240
+
241
+ const openAIChat = Route.make({
242
+ id: "openai-chat",
243
+ provider: openAIProvider,
244
+ protocol: OpenAIChat.protocol,
245
+ transport: HttpTransport.sseJson,
246
+ endpoint: {
247
+ baseURL: "https://api.openai.com/v1",
248
+ path: "/chat/completions",
249
+ },
250
+ auth: Auth.envBearer("OPENAI_API_KEY"),
251
+ })
252
+
253
+ const openAIResponsesWebSocket = openAIResponses.with({
254
+ id: "openai-responses-websocket",
255
+ transport: WebSocketTransport.json,
256
+ })
257
+
258
+ const openAIConfig = (input: OpenAIConfig) => ({
259
+ endpoint: input.endpoint,
260
+ auth: input.auth ?? (input.apiKey ? Auth.bearer(input.apiKey) : undefined),
261
+ headers: {
262
+ "OpenAI-Organization": input.organization,
263
+ "OpenAI-Project": input.project,
264
+ },
265
+ })
266
+
267
+ const configureOpenAI = (input: OpenAIConfig = {}) => {
268
+ const responses = openAIResponses.with(openAIConfig(input))
269
+ const responsesWebSocket = openAIResponsesWebSocket.with(openAIConfig(input))
270
+ const chat = openAIChat.with(openAIConfig(input))
271
+
272
+ return {
273
+ id: openAIProvider,
274
+ responses: responses.model,
275
+ responsesWebSocket: responsesWebSocket.model,
276
+ chat: chat.model,
277
+ model: responses.model,
278
+ configure: configureOpenAI,
279
+ }
280
+ }
281
+
282
+ export const OpenAI = configureOpenAI()
283
+ ```
284
+
285
+ Specialize it functionally for concrete providers:
286
+
287
+ ```ts
288
+ const deepSeekProvider = ProviderID.make("deepseek")
289
+
290
+ const deepseekChat = openAIChat.with({
291
+ id: "deepseek-chat",
292
+ provider: deepSeekProvider,
293
+ endpoint: {
294
+ baseURL: "https://api.deepseek.com/v1",
295
+ },
296
+ auth: Auth.envBearer("DEEPSEEK_API_KEY"),
297
+ })
298
+
299
+ const configureDeepSeek = (input: OpenAICompatibleConfig = {}) => {
300
+ const route = deepseekChat.with({
301
+ endpoint: input.endpoint,
302
+ auth: input.auth ?? (input.apiKey ? Auth.bearer(input.apiKey) : undefined),
303
+ })
304
+
305
+ return {
306
+ id: deepSeekProvider,
307
+ model: route.model,
308
+ configure: configureDeepSeek,
309
+ }
310
+ }
311
+
312
+ export const DeepSeek = {
313
+ id: deepSeekProvider,
314
+ model: deepseekChat.model,
315
+ configure: configureDeepSeek,
316
+ }
317
+ ```
318
+
319
+ Provider-specific configuration happens before model selection:
320
+
321
+ ```ts
322
+ const deepseek = DeepSeek.configure({
323
+ endpoint: {
324
+ baseURL: "https://proxy.example.com/v1",
325
+ },
326
+ auth: Auth.bearer(apiKey),
327
+ })
328
+
329
+ const model = deepseek.model("deepseek-chat")
330
+ ```
331
+
332
+ Final request call site stays boring:
333
+
334
+ ```ts
335
+ const response =
336
+ yield *
337
+ LLM.generate(
338
+ LLM.request({
339
+ model: DeepSeek.model("deepseek-chat"),
340
+ prompt: "Hello.",
341
+ }),
342
+ )
343
+ ```
344
+
345
+ HTTP versus WebSocket is represented as named route selectors, not as model or
346
+ request overrides. Same protocol, different transport, different route:
347
+
348
+ ```ts
349
+ OpenAI.responses("gpt-4o")
350
+ OpenAI.responsesWebSocket("gpt-4o")
351
+ ```
352
+
353
+ The client should not require a different public layer just because a selected
354
+ route uses WebSocket. Use one `LLMClient.layer` with HTTP and WebSocket runtime
355
+ capabilities available; routes that do not need WebSocket simply never touch it.
356
+ If a WebSocket route is selected in an environment without WebSocket support,
357
+ fail with a typed transport configuration error.
358
+
359
+ Azure is a route specialization with auth/path/default changes plus input
360
+ mapping. The public API configures the Azure resource once, then selects
361
+ deployment ids with pure model selectors:
362
+
363
+ ```ts
364
+ const azureProvider = ProviderID.make("azure")
365
+
366
+ const azureResponses = openAIResponses.with({
367
+ id: "azure-openai-responses",
368
+ provider: azureProvider,
369
+ auth: Auth.envHeader("api-key", "AZURE_OPENAI_API_KEY"),
370
+ })
371
+
372
+ const configureAzure = (input: AzureConfig = {}) => {
373
+ const route = azureResponses.with({
374
+ endpoint: {
375
+ baseURL:
376
+ input.baseURL ??
377
+ Endpoint.envBaseURL(
378
+ "AZURE_RESOURCE_NAME",
379
+ (resourceName) => `https://${resourceName}.openai.azure.com/openai/v1`,
380
+ ),
381
+ query: { "api-version": input.apiVersion ?? "v1" },
382
+ },
383
+ auth: input.apiKey ? Auth.header("api-key", input.apiKey) : Auth.envHeader("api-key", "AZURE_OPENAI_API_KEY"),
384
+ })
385
+
386
+ return {
387
+ id: azureProvider,
388
+ model: route.model,
389
+ responses: route.model,
390
+ configure: configureAzure,
391
+ }
392
+ }
393
+
394
+ export const Azure = configureAzure()
395
+
396
+ const azure = Azure.configure({
397
+ resourceName: "my-resource",
398
+ apiVersion: "v1",
399
+ })
400
+
401
+ const model = azure.responses("my-deployment")
402
+ ```
403
+
404
+ Default provider facades are only valid when required configuration has a lazy
405
+ default source. `Azure.responses("my-deployment")` can be valid if endpoint
406
+ resolution reads `AZURE_RESOURCE_NAME` lazily and fails with a typed
407
+ configuration error when missing. If a provider has no sensible lazy default,
408
+ do not expose a default model selector; expose only a configured entrypoint.
409
+
410
+ Cloudflare AI Gateway and Workers AI are separate product facades because their
411
+ configuration surfaces differ. Do not make a root `Cloudflare.configure(...)`
412
+ pretend there is one coherent Cloudflare provider configuration:
413
+
414
+ ```ts
415
+ const cloudflareProvider = ProviderID.make("cloudflare-ai-gateway")
416
+
417
+ const cloudflareOpenAIChat = openAIChat.with({
418
+ id: "cloudflare-ai-gateway-openai-chat",
419
+ provider: cloudflareProvider,
420
+ auth: Auth.bearerHeader("cf-aig-authorization").andThen(Auth.bearer()),
421
+ })
422
+
423
+ const configureCloudflareAIGateway = (input: CloudflareAIGatewayConfig) => {
424
+ const route = cloudflareOpenAIChat.with({
425
+ endpoint: {
426
+ baseURL: `https://gateway.ai.cloudflare.com/v1/${input.accountId}/${input.gatewayId}/openai`,
427
+ },
428
+ auth: Auth.bearerHeader("cf-aig-authorization", input.gatewayApiKey).andThen(Auth.bearer(input.apiKey)),
429
+ })
430
+
431
+ return {
432
+ id: cloudflareProvider,
433
+ model: (modelID: string) => route.model({ id: modelID }),
434
+ configure: configureCloudflareAIGateway,
435
+ }
436
+ }
437
+
438
+ export const CloudflareAIGateway = {
439
+ id: cloudflareProvider,
440
+ configure: configureCloudflareAIGateway,
441
+ }
442
+
443
+ const gateway = CloudflareAIGateway.configure({
444
+ accountId: "account",
445
+ gatewayId: "gateway",
446
+ gatewayApiKey,
447
+ apiKey,
448
+ })
449
+
450
+ const model = gateway.model("openai/gpt-4o")
451
+ ```
452
+
453
+ If a Cloudflare product gains a full lazy env default, it can expose a direct
454
+ selector too. Until then, omitting `CloudflareAIGateway.model(...)` makes missing
455
+ account/gateway configuration unrepresentable.
456
+
457
+ Codilore's dynamic runtime should construct executable models at its app
458
+ boundary instead of exposing a giant unstructured public model constructor or a
459
+ generic dynamic resolver:
460
+
461
+ ```ts
462
+ const model =
463
+ providerID === "azure"
464
+ ? Azure.configure(resolvedAzureConfig).responses(apiModelID)
465
+ : endpoint.websocket
466
+ ? OpenAI.responsesWebSocket(apiModelID)
467
+ : OpenAI.responses(apiModelID)
468
+ ```
469
+
470
+ That boundary can branch on durable config/catalog metadata and call typed
471
+ provider APIs directly. Transport selection belongs there too: map metadata like
472
+ `endpoint.websocket` to `OpenAI.responsesWebSocket(apiModelID)`; otherwise use
473
+ the normal `OpenAI.responses(apiModelID)` route. The client runtime only executes
474
+ the route carried by the model.
475
+
476
+ ## Competitive Shape
477
+
478
+ This follows the strongest parts of adjacent libraries:
479
+
480
+ - AI SDK: configured provider instances expose provider-specific model methods.
481
+ - Effect AI: executable models carry provider requirements and can be resolved by
482
+ an app boundary.
483
+ - LiteLLM/Codilore config: dynamic `providerID/modelID` branching belongs at the
484
+ app boundary, not in the typed public provider API or a global runtime
485
+ resolver.
486
+ - LangChain/LlamaIndex: constructor-style config plus model id is convenient,
487
+ but we avoid making model selection also configure endpoint/auth.
488
+
489
+ The chosen split is:
490
+
491
+ ```txt
492
+ Route = execution mechanics
493
+ Provider facade = configured route group
494
+ Model = selected executable model carrying route value
495
+ App boundary = explicit durable-config -> typed-provider call
496
+ ```
497
+
498
+ ## What This Removes
499
+
500
+ - No `Provider.make(...)` as a core abstraction.
501
+ - No `Provider.make(...)` wrapper just to bind an id to model functions. Use a
502
+ branded provider id constant and a plain exported provider facade.
503
+ - No `Deployment.define(...)` unless future examples force it.
504
+ - No global route registry as the normal execution path.
505
+ - No import side effects required before a model can execute.
506
+ - No duplicate `provider.id` object when selected models already carry provider
507
+ id.
508
+ - No `model(id, overrides)` escape hatch. Model selection takes the model id;
509
+ endpoint/auth/deployment customization happens by configuring the route first.
510
+ - No transport override on model/request. HTTP SSE versus WebSocket is a named
511
+ route selector such as `responses` versus `responsesWebSocket`.
512
+ - No separate public `LLMClient.layerWithWebSocket`. The runtime should expose one
513
+ client layer with the available transport capabilities.
514
+ - No executable `ModelRef`. The executable handle is `Model`; durable model
515
+ identity stays separate and cannot execute on its own.
516
+
517
+ ## Implementation Todo
518
+
519
+ - [x] Replace the current executable `ModelRef` with `Model`.
520
+ - [x] Change `Model.route` to carry a route value, not a `RouteID` string.
521
+ - [ ] Keep a separate durable model identity type for persisted/session/catalog
522
+ data, likely `{ providerID, modelID }`, and make it clear that it cannot
523
+ execute without resolver context.
524
+ - [x] Change route model selectors so `route.model(id)` returns an executable
525
+ model with the route value attached, not a globally registered route id.
526
+ - [x] Remove the standalone `Route.model(route, defaults, mapInput)` helper;
527
+ configured route instances own model selection.
528
+ - [x] Remove endpoint/auth escape hatches from route model selection; callers must
529
+ configure endpoint/auth through `route.with(...)` or provider facades before
530
+ calling `.model(...)`.
531
+ - [x] Remove request-shaping defaults from `Model`; selected models now carry only
532
+ id, provider, and configured route while defaults live on routes or requests.
533
+ - [x] Rework `LLMClient.prepare` / `stream` / `generate` to read
534
+ `request.model.route` directly instead of calling `registeredRoute(...)`.
535
+ - [x] Remove `Route.make(...)` global registration from the normal execution
536
+ path; keep route ids only as diagnostics/provider API labels.
537
+ - [x] Model endpoint as `{ baseURL, path, query }` on routes, then remove the
538
+ current split where host/query live on the model and path lives in route
539
+ transport setup.
540
+ - [x] Define `Route.with(...)` with explicit patch semantics for endpoint merge,
541
+ query merge, header merge, auth replacement, and optional diagnostic id.
542
+ - [x] Make unconfigured transports reusable constants such as
543
+ `HttpTransport.sseJson`; keep transport functions only for configured/fresh
544
+ state construction.
545
+ - [x] Collapse the public WebSocket runtime split so one `LLMClient.layer`
546
+ exposes available transport capabilities and selected routes fail with typed
547
+ transport config errors when a required capability is missing.
548
+ - [x] Convert OpenAI provider APIs to provider-facade shape:
549
+ `OpenAI.configure(config).responses(id)`, `.chat(id)`, and
550
+ `.responsesWebSocket(id)`.
551
+ - [x] Convert Azure to a configured facade where resource/base URL/api version
552
+ setup happens before selecting deployment ids.
553
+ - [x] Split Cloudflare products into separate facades such as
554
+ `CloudflareAIGateway` and `CloudflareWorkersAI`; do not expose a shared root
555
+ config surface unless one product actually exists.
556
+ - [x] Migrate remaining built-in provider facades one at a time so configuration
557
+ happens before model selection and selectors accept only ids:
558
+ xAI, GitHub Copilot, OpenRouter, OpenAI-compatible families, Anthropic,
559
+ Google/Gemini, and Amazon Bedrock now use configured facades such as
560
+ `Provider.configure(options).model(id)` with named selectors where needed.
561
+ - [ ] Decide whether a tiny `Provider.define(...)` helper is warranted after two
562
+ or three provider conversions; start with plain objects if duplication is not
563
+ yet painful.
564
+ - [x] Update `packages/codilore/src/session/llm/native-request.ts` to construct
565
+ executable models at the session boundary with explicit provider facade
566
+ calls, mapping catalog metadata such as `endpoint.websocket` to the correct
567
+ named route selector.
568
+ - [ ] Update tests so direct route/provider tests assert route values are carried
569
+ by executable models, and Codilore/native tests assert boundary-based route
570
+ selection.
571
+ - [ ] Remove compatibility exports or stale docs only after internal call sites
572
+ are migrated; do not keep duplicate constructor paths without an external
573
+ compatibility need.
574
+
575
+ ## Open Questions
576
+
577
+ - Default facades with required setup: should providers like Azure and Bedrock
578
+ expose default model selectors only when all required setup has lazy env or
579
+ credential-chain defaults? If not, omit the default selector so missing config
580
+ is impossible at the type/API level.
581
+ - Lazy endpoint/auth values: should `Endpoint.envBaseURL(...)` and env-backed
582
+ auth produce typed configuration/authentication errors at compile/prepare time
583
+ or only when executing the transport?
584
+ - `Route.with(...)` clearing semantics: endpoint/query/header patches merge by
585
+ default, but what is the explicit way to remove an inherited value?
586
+ - Provider facade helper: keep plain objects until duplication hurts, or add a
587
+ tiny `Provider.define(...)` immediately to enforce shape and method projection?
588
+ - Auth shape: should auth stay as today's composable `Auth`, or split into an
589
+ auth placement/strategy and credential sources?
590
+ - Naming: is `baseURL` still the right endpoint field name, or should it be
591
+ `origin` / `urlPrefix` to clarify that route `path` is appended?