@codilore/llm 1.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/AGENTS.md +321 -0
  2. package/README.md +131 -0
  3. package/example/call-sites.md +591 -0
  4. package/example/tutorial.ts +255 -0
  5. package/package.json +50 -0
  6. package/script/recording-cost-report.ts +250 -0
  7. package/script/setup-recording-env.ts +542 -0
  8. package/src/cache-policy.ts +111 -0
  9. package/src/index.ts +32 -0
  10. package/src/llm.ts +186 -0
  11. package/src/protocols/anthropic-messages.ts +841 -0
  12. package/src/protocols/bedrock-converse.ts +649 -0
  13. package/src/protocols/bedrock-event-stream.ts +87 -0
  14. package/src/protocols/gemini.ts +465 -0
  15. package/src/protocols/index.ts +6 -0
  16. package/src/protocols/openai-chat.ts +431 -0
  17. package/src/protocols/openai-compatible-chat.ts +24 -0
  18. package/src/protocols/openai-responses.ts +987 -0
  19. package/src/protocols/shared.ts +283 -0
  20. package/src/protocols/utils/bedrock-auth.ts +70 -0
  21. package/src/protocols/utils/bedrock-cache.ts +37 -0
  22. package/src/protocols/utils/bedrock-media.ts +80 -0
  23. package/src/protocols/utils/cache.ts +16 -0
  24. package/src/protocols/utils/gemini-tool-schema.ts +101 -0
  25. package/src/protocols/utils/lifecycle.ts +102 -0
  26. package/src/protocols/utils/openai-options.ts +84 -0
  27. package/src/protocols/utils/tool-stream.ts +218 -0
  28. package/src/provider.ts +37 -0
  29. package/src/providers/amazon-bedrock.ts +43 -0
  30. package/src/providers/anthropic.ts +35 -0
  31. package/src/providers/azure.ts +110 -0
  32. package/src/providers/cloudflare.ts +127 -0
  33. package/src/providers/github-copilot.ts +66 -0
  34. package/src/providers/google.ts +35 -0
  35. package/src/providers/index.ts +11 -0
  36. package/src/providers/openai-compatible-profile.ts +20 -0
  37. package/src/providers/openai-compatible.ts +65 -0
  38. package/src/providers/openai-options.ts +81 -0
  39. package/src/providers/openai.ts +63 -0
  40. package/src/providers/openrouter.ts +98 -0
  41. package/src/providers/xai.ts +56 -0
  42. package/src/route/auth-options.ts +57 -0
  43. package/src/route/auth.ts +156 -0
  44. package/src/route/client.ts +434 -0
  45. package/src/route/endpoint.ts +53 -0
  46. package/src/route/executor.ts +374 -0
  47. package/src/route/framing.ts +27 -0
  48. package/src/route/index.ts +25 -0
  49. package/src/route/protocol.ts +84 -0
  50. package/src/route/transport/http.ts +108 -0
  51. package/src/route/transport/index.ts +33 -0
  52. package/src/route/transport/websocket.ts +280 -0
  53. package/src/schema/errors.ts +203 -0
  54. package/src/schema/events.ts +370 -0
  55. package/src/schema/ids.ts +43 -0
  56. package/src/schema/index.ts +5 -0
  57. package/src/schema/messages.ts +404 -0
  58. package/src/schema/options.ts +221 -0
  59. package/src/tool-runtime.ts +78 -0
  60. package/src/tool.ts +241 -0
  61. package/src/utils/record.ts +3 -0
  62. package/sst-env.d.ts +10 -0
  63. package/test/adapter.test.ts +164 -0
  64. package/test/auth-options.types.ts +168 -0
  65. package/test/auth.test.ts +103 -0
  66. package/test/cache-policy.test.ts +262 -0
  67. package/test/continuation-scenarios.ts +104 -0
  68. package/test/endpoint.test.ts +58 -0
  69. package/test/executor.test.ts +418 -0
  70. package/test/exports.test.ts +62 -0
  71. package/test/fixtures/media/restroom.png +0 -0
  72. package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
  73. package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
  74. package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
  75. package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
  76. package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
  77. package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
  78. package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
  79. package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
  80. package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
  81. package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
  82. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  83. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
  84. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  85. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
  86. package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
  87. package/test/fixtures/recordings/gemini/streams-text.json +28 -0
  88. package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
  89. package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
  90. package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
  91. package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
  92. package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
  93. package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
  94. package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
  95. package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
  96. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
  97. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
  98. package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
  99. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
  100. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
  101. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
  102. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
  103. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
  104. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
  105. package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
  106. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
  107. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
  108. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
  109. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
  110. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
  111. package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
  112. package/test/generate-object.test.ts +184 -0
  113. package/test/lib/effect.ts +50 -0
  114. package/test/lib/http.ts +98 -0
  115. package/test/lib/openai-chunks.ts +27 -0
  116. package/test/lib/sse.ts +17 -0
  117. package/test/lib/tool-runtime.ts +146 -0
  118. package/test/llm.test.ts +167 -0
  119. package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
  120. package/test/provider/anthropic-messages.recorded.test.ts +46 -0
  121. package/test/provider/anthropic-messages.test.ts +829 -0
  122. package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
  123. package/test/provider/bedrock-converse.test.ts +707 -0
  124. package/test/provider/cloudflare.test.ts +230 -0
  125. package/test/provider/gemini-cache.recorded.test.ts +48 -0
  126. package/test/provider/gemini.test.ts +476 -0
  127. package/test/provider/golden.recorded.test.ts +219 -0
  128. package/test/provider/openai-chat.test.ts +446 -0
  129. package/test/provider/openai-compatible-chat.test.ts +238 -0
  130. package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
  131. package/test/provider/openai-responses.test.ts +1322 -0
  132. package/test/provider/openrouter.test.ts +56 -0
  133. package/test/provider.types.ts +41 -0
  134. package/test/recorded-golden.ts +97 -0
  135. package/test/recorded-runner.ts +100 -0
  136. package/test/recorded-scenarios.ts +531 -0
  137. package/test/recorded-test.ts +74 -0
  138. package/test/recorded-utils.ts +56 -0
  139. package/test/recorded-websocket.ts +26 -0
  140. package/test/route.test.ts +43 -0
  141. package/test/schema.test.ts +97 -0
  142. package/test/tool-runtime.test.ts +802 -0
  143. package/test/tool-stream.test.ts +99 -0
  144. package/test/tool.types.ts +40 -0
  145. package/tsconfig.json +15 -0
@@ -0,0 +1,374 @@
1
+ import { Cause, Context, Effect, Layer, Random } from "effect"
2
+ import {
3
+ FetchHttpClient,
4
+ Headers,
5
+ HttpClient,
6
+ HttpClientError,
7
+ HttpClientRequest,
8
+ HttpClientResponse,
9
+ } from "effect/unstable/http"
10
+ import {
11
+ AuthenticationReason,
12
+ ContentPolicyReason,
13
+ HttpContext,
14
+ HttpRateLimitDetails,
15
+ HttpRequestDetails,
16
+ HttpResponseDetails,
17
+ InvalidRequestReason,
18
+ LLMError,
19
+ ProviderInternalReason,
20
+ QuotaExceededReason,
21
+ RateLimitReason,
22
+ TransportReason,
23
+ UnknownProviderReason,
24
+ } from "../schema"
25
+
26
+ export interface Interface {
27
+ readonly execute: (
28
+ request: HttpClientRequest.HttpClientRequest,
29
+ ) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
30
+ }
31
+
32
+ export class Service extends Context.Service<Service, Interface>()("@Codilore/LLM/RequestExecutor") {}
33
+
34
+ const BODY_LIMIT = 16_384
35
+ const MAX_RETRIES = 2
36
+ const BASE_DELAY_MS = 500
37
+ const MAX_DELAY_MS = 10_000
38
+ const REDACTED = "<redacted>"
39
+
40
+ // One source of truth for what counts as a sensitive name across headers,
41
+ // URL query keys, and field names embedded inside request/response bodies.
42
+ //
43
+ // `SENSITIVE_NAME` is used as both a substring matcher (for free-form header
44
+ // names like `Authorization` / `X-API-Key`) and as the body-field alternation
45
+ // list. `SHORT_QUERY_NAME` covers anchored short keys like `?key=…` / `?sig=…`
46
+ // that are too generic to redact substring-style without false positives.
47
+ const SENSITIVE_NAME_SOURCE =
48
+ "authorization|api[-_]?key|access[-_]?token|refresh[-_]?token|id[-_]?token|token|secret|credential|signature|x-amz-signature"
49
+ const SENSITIVE_NAME = new RegExp(SENSITIVE_NAME_SOURCE, "i")
50
+ const SHORT_QUERY_NAME = /^(key|sig)$/i
51
+ const SENSITIVE_BODY_FIELD = new RegExp(`(?:${SENSITIVE_NAME_SOURCE}|key)`, "i")
52
+ const REDACT_JSON_FIELD = new RegExp(`("(?:${SENSITIVE_BODY_FIELD.source})"\\s*:\\s*)"[^"]*"`, "gi")
53
+ const REDACT_QUERY_FIELD = new RegExp(`((?:${SENSITIVE_BODY_FIELD.source})=)[^&\\s"]+`, "gi")
54
+
55
+ const isSensitiveHeaderName = (name: string) => SENSITIVE_NAME.test(name)
56
+
57
+ const isSensitiveQueryName = (name: string) => isSensitiveHeaderName(name) || SHORT_QUERY_NAME.test(name)
58
+
59
+ const redactHeaders = (headers: Headers.Headers, redactedNames: ReadonlyArray<string | RegExp>) =>
60
+ Object.fromEntries(
61
+ Object.entries(Headers.redact(headers, [...redactedNames, SENSITIVE_NAME])).map(([name, value]) => [
62
+ name,
63
+ String(value),
64
+ ]),
65
+ )
66
+
67
+ const redactUrl = (value: string) => {
68
+ if (!URL.canParse(value)) return REDACTED
69
+ const url = new URL(value)
70
+ url.searchParams.forEach((_, key) => {
71
+ if (isSensitiveQueryName(key)) url.searchParams.set(key, REDACTED)
72
+ })
73
+ return url.toString()
74
+ }
75
+
76
+ const normalizedHeaders = (headers: Headers.Headers) =>
77
+ Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]))
78
+
79
+ const requestId = (headers: Record<string, string>) => {
80
+ return (
81
+ headers["x-request-id"] ??
82
+ headers["request-id"] ??
83
+ headers["x-amzn-requestid"] ??
84
+ headers["x-amz-request-id"] ??
85
+ headers["x-goog-request-id"] ??
86
+ headers["cf-ray"]
87
+ )
88
+ }
89
+
90
+ const retryableStatus = (status: number) => status === 429 || status === 503 || status === 504 || status === 529
91
+
92
+ const retryAfterMs = (headers: Record<string, string>) => {
93
+ const millis = Number(headers["retry-after-ms"])
94
+ if (Number.isFinite(millis)) return Math.max(0, millis)
95
+
96
+ const value = headers["retry-after"]
97
+ if (!value) return undefined
98
+
99
+ const seconds = Number(value)
100
+ if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000)
101
+
102
+ const date = Date.parse(value)
103
+ if (!Number.isNaN(date)) return Math.max(0, date - Date.now())
104
+ return undefined
105
+ }
106
+
107
+ const addRateLimitValue = (target: Record<string, string>, key: string, value: string) => {
108
+ if (key.length > 0) target[key] = value
109
+ }
110
+
111
+ const rateLimitDetails = (headers: Record<string, string>, retryAfter: number | undefined) => {
112
+ const limit: Record<string, string> = {}
113
+ const remaining: Record<string, string> = {}
114
+ const reset: Record<string, string> = {}
115
+
116
+ Object.entries(headers).forEach(([name, value]) => {
117
+ const openaiLimit = /^x-ratelimit-limit-(.+)$/.exec(name)?.[1]
118
+ if (openaiLimit) return addRateLimitValue(limit, openaiLimit, value)
119
+
120
+ const openaiRemaining = /^x-ratelimit-remaining-(.+)$/.exec(name)?.[1]
121
+ if (openaiRemaining) return addRateLimitValue(remaining, openaiRemaining, value)
122
+
123
+ const openaiReset = /^x-ratelimit-reset-(.+)$/.exec(name)?.[1]
124
+ if (openaiReset) return addRateLimitValue(reset, openaiReset, value)
125
+
126
+ const anthropic = /^anthropic-ratelimit-(.+)-(limit|remaining|reset)$/.exec(name)
127
+ if (!anthropic) return
128
+ if (anthropic[2] === "limit") return addRateLimitValue(limit, anthropic[1], value)
129
+ if (anthropic[2] === "remaining") return addRateLimitValue(remaining, anthropic[1], value)
130
+ return addRateLimitValue(reset, anthropic[1], value)
131
+ })
132
+
133
+ if (
134
+ retryAfter === undefined &&
135
+ Object.keys(limit).length === 0 &&
136
+ Object.keys(remaining).length === 0 &&
137
+ Object.keys(reset).length === 0
138
+ )
139
+ return undefined
140
+
141
+ return new HttpRateLimitDetails({
142
+ retryAfterMs: retryAfter,
143
+ limit: Object.keys(limit).length === 0 ? undefined : limit,
144
+ remaining: Object.keys(remaining).length === 0 ? undefined : remaining,
145
+ reset: Object.keys(reset).length === 0 ? undefined : reset,
146
+ })
147
+ }
148
+
149
+ const requestDetails = (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
150
+ new HttpRequestDetails({
151
+ method: request.method,
152
+ url: redactUrl(request.url),
153
+ headers: redactHeaders(request.headers, redactedNames),
154
+ })
155
+
156
+ const responseDetails = (
157
+ response: HttpClientResponse.HttpClientResponse,
158
+ redactedNames: ReadonlyArray<string | RegExp>,
159
+ ) =>
160
+ new HttpResponseDetails({
161
+ status: response.status,
162
+ headers: redactHeaders(response.headers, redactedNames),
163
+ })
164
+
165
+ const secretValues = (request: HttpClientRequest.HttpClientRequest) => {
166
+ const values = new Set<string>()
167
+ const add = (value: string) => {
168
+ if (value.length < 4) return
169
+ values.add(value)
170
+ values.add(encodeURIComponent(value))
171
+ }
172
+
173
+ Object.entries(request.headers).forEach(([name, value]) => {
174
+ if (!isSensitiveHeaderName(name)) return
175
+ add(value)
176
+ const bearer = /^Bearer\s+(.+)$/i.exec(value)?.[1]
177
+ if (bearer) add(bearer)
178
+ })
179
+
180
+ if (!URL.canParse(request.url)) return values
181
+ new URL(request.url).searchParams.forEach((value, key) => {
182
+ if (isSensitiveQueryName(key)) add(value)
183
+ })
184
+ return values
185
+ }
186
+
187
+ // Two passes: structural (redact `"name": "value"` and `name=value` patterns
188
+ // for any field name that looks sensitive) plus literal (replace any actual
189
+ // secret values we sent in the request, in case the response echoes one back).
190
+ const redactBody = (body: string, request: HttpClientRequest.HttpClientRequest) =>
191
+ Array.from(secretValues(request)).reduce(
192
+ (text, secret) => text.split(secret).join(REDACTED),
193
+ body.replace(REDACT_JSON_FIELD, `$1"${REDACTED}"`).replace(REDACT_QUERY_FIELD, `$1${REDACTED}`),
194
+ )
195
+
196
+ const responseBody = (body: string | void, request: HttpClientRequest.HttpClientRequest) => {
197
+ if (body === undefined) return {}
198
+ const redacted = redactBody(body, request)
199
+ if (redacted.length <= BODY_LIMIT) return { body: redacted }
200
+ return { body: redacted.slice(0, BODY_LIMIT), bodyTruncated: true }
201
+ }
202
+
203
+ const providerMessage = (status: number, body: { readonly body?: string }) => {
204
+ if (body.body && body.body.length <= 500) return `Provider request failed with HTTP ${status}: ${body.body}`
205
+ return `Provider request failed with HTTP ${status}`
206
+ }
207
+
208
+ const responseHttp = (input: {
209
+ readonly request: HttpClientRequest.HttpClientRequest
210
+ readonly response: HttpClientResponse.HttpClientResponse
211
+ readonly redactedNames: ReadonlyArray<string | RegExp>
212
+ readonly body: ReturnType<typeof responseBody>
213
+ readonly requestId?: string | undefined
214
+ readonly rateLimit?: HttpRateLimitDetails | undefined
215
+ }) =>
216
+ new HttpContext({
217
+ request: requestDetails(input.request, input.redactedNames),
218
+ response: responseDetails(input.response, input.redactedNames),
219
+ ...input.body,
220
+ requestId: input.requestId,
221
+ rateLimit: input.rateLimit,
222
+ })
223
+
224
+ const statusReason = (input: {
225
+ readonly status: number
226
+ readonly message: string
227
+ readonly retryAfterMs?: number | undefined
228
+ readonly rateLimit?: HttpRateLimitDetails | undefined
229
+ readonly http: HttpContext
230
+ }) => {
231
+ const body = input.http.body ?? ""
232
+ if (/content[-_\s]?policy|content_filter|safety/i.test(body)) {
233
+ return new ContentPolicyReason({ message: input.message, http: input.http })
234
+ }
235
+ if (input.status === 401) {
236
+ return new AuthenticationReason({ message: input.message, kind: "invalid", http: input.http })
237
+ }
238
+ if (input.status === 403) {
239
+ return new AuthenticationReason({ message: input.message, kind: "insufficient-permissions", http: input.http })
240
+ }
241
+ if (input.status === 429) {
242
+ if (/insufficient[-_\s]?quota|quota[-_\s]?exceeded/i.test(body)) {
243
+ return new QuotaExceededReason({ message: input.message, http: input.http })
244
+ }
245
+ return new RateLimitReason({
246
+ message: input.message,
247
+ retryAfterMs: input.retryAfterMs,
248
+ rateLimit: input.rateLimit,
249
+ http: input.http,
250
+ })
251
+ }
252
+ if (input.status === 400 || input.status === 404 || input.status === 409 || input.status === 422) {
253
+ return new InvalidRequestReason({ message: input.message, http: input.http })
254
+ }
255
+ if (input.status >= 500 || retryableStatus(input.status)) {
256
+ return new ProviderInternalReason({
257
+ message: input.message,
258
+ status: input.status,
259
+ retryAfterMs: input.retryAfterMs,
260
+ http: input.http,
261
+ })
262
+ }
263
+ return new UnknownProviderReason({ message: input.message, status: input.status, http: input.http })
264
+ }
265
+
266
+ const statusError =
267
+ (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
268
+ (response: HttpClientResponse.HttpClientResponse) =>
269
+ Effect.gen(function* () {
270
+ if (response.status < 400) return response
271
+ const body = yield* response.text.pipe(Effect.catch(() => Effect.void))
272
+ const headers = normalizedHeaders(response.headers)
273
+ const retryAfter = retryAfterMs(headers)
274
+ const rateLimit = rateLimitDetails(headers, retryAfter)
275
+ const details = responseBody(body, request)
276
+ return yield* new LLMError({
277
+ module: "RequestExecutor",
278
+ method: "execute",
279
+ reason: statusReason({
280
+ status: response.status,
281
+ message: providerMessage(response.status, details),
282
+ retryAfterMs: retryAfter,
283
+ rateLimit,
284
+ http: responseHttp({
285
+ request,
286
+ response,
287
+ redactedNames,
288
+ body: details,
289
+ requestId: requestId(headers),
290
+ rateLimit,
291
+ }),
292
+ }),
293
+ })
294
+ })
295
+
296
+ const toHttpError = (redactedNames: ReadonlyArray<string | RegExp>) => (error: unknown) => {
297
+ const transportError = (input: {
298
+ readonly message: string
299
+ readonly kind?: string | undefined
300
+ readonly request?: HttpClientRequest.HttpClientRequest | undefined
301
+ }) =>
302
+ new LLMError({
303
+ module: "RequestExecutor",
304
+ method: "execute",
305
+ reason: new TransportReason({
306
+ message: input.message,
307
+ kind: input.kind,
308
+ url: input.request ? redactUrl(input.request.url) : undefined,
309
+ http: input.request ? new HttpContext({ request: requestDetails(input.request, redactedNames) }) : undefined,
310
+ }),
311
+ })
312
+
313
+ if (Cause.isTimeoutError(error)) {
314
+ return transportError({ message: error.message, kind: "Timeout" })
315
+ }
316
+ if (!HttpClientError.isHttpClientError(error)) {
317
+ return transportError({ message: "HTTP transport failed" })
318
+ }
319
+ const request = "request" in error ? error.request : undefined
320
+ if (error.reason._tag === "TransportError") {
321
+ return transportError({
322
+ message: error.reason.description ?? "HTTP transport failed",
323
+ kind: error.reason._tag,
324
+ request,
325
+ })
326
+ }
327
+ return transportError({
328
+ message: `HTTP transport failed: ${error.reason._tag}`,
329
+ kind: error.reason._tag,
330
+ request,
331
+ })
332
+ }
333
+
334
+ const retryDelay = (error: LLMError, attempt: number) => {
335
+ if (error.retryAfterMs !== undefined) return Effect.succeed(Math.min(error.retryAfterMs, MAX_DELAY_MS))
336
+ return Random.nextBetween(
337
+ Math.min(BASE_DELAY_MS * 2 ** attempt * 0.8, MAX_DELAY_MS),
338
+ Math.min(BASE_DELAY_MS * 2 ** attempt * 1.2, MAX_DELAY_MS),
339
+ ).pipe(Effect.map((delay) => Math.round(delay)))
340
+ }
341
+
342
+ const retryStatusFailures = <A, R>(
343
+ effect: Effect.Effect<A, LLMError, R>,
344
+ retries = MAX_RETRIES,
345
+ attempt = 0,
346
+ ): Effect.Effect<A, LLMError, R> =>
347
+ Effect.catchTag(effect, "LLM.Error", (error): Effect.Effect<A, LLMError, R> => {
348
+ if (!error.retryable || retries <= 0) return Effect.fail(error)
349
+ return retryDelay(error, attempt).pipe(
350
+ Effect.flatMap((delay) => Effect.sleep(delay)),
351
+ Effect.flatMap(() => retryStatusFailures(effect, retries - 1, attempt + 1)),
352
+ )
353
+ })
354
+
355
+ export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
356
+ Service,
357
+ Effect.gen(function* () {
358
+ const http = yield* HttpClient.HttpClient
359
+ const executeOnce = (request: HttpClientRequest.HttpClientRequest) =>
360
+ Effect.gen(function* () {
361
+ const redactedNames = yield* Headers.CurrentRedactedNames
362
+ return yield* http
363
+ .execute(request)
364
+ .pipe(Effect.mapError(toHttpError(redactedNames)), Effect.flatMap(statusError(request, redactedNames)))
365
+ })
366
+ return Service.of({
367
+ execute: (request) => retryStatusFailures(executeOnce(request)),
368
+ })
369
+ }),
370
+ )
371
+
372
+ export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
373
+
374
+ export * as RequestExecutor from "./executor"
@@ -0,0 +1,27 @@
1
+ import type { Stream } from "effect"
2
+ import * as ProviderShared from "../protocols/shared"
3
+ import type { LLMError } from "../schema"
4
+
5
+ /**
6
+ * Decode a streaming HTTP response body into provider-protocol frames.
7
+ *
8
+ * `Framing` is the byte-stream-shaped seam between transport and protocol:
9
+ *
10
+ * - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
11
+ * drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
12
+ * payload of one event.
13
+ * - AWS event stream — length-prefixed binary frames with CRC checksums.
14
+ * Each emitted frame is one parsed binary event record.
15
+ *
16
+ * The frame type is opaque to this layer; the protocol's `decode` step turns
17
+ * a frame into a typed chunk.
18
+ */
19
+ export interface Framing<Frame> {
20
+ readonly id: string
21
+ readonly frame: (bytes: Stream.Stream<Uint8Array, LLMError>) => Stream.Stream<Frame, LLMError>
22
+ }
23
+
24
+ /** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
25
+ export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
26
+
27
+ export * as Framing from "./framing"
@@ -0,0 +1,25 @@
1
+ export { Route, LLMClient } from "./client"
2
+ export type {
3
+ Route as RouteShape,
4
+ RouteModelInput,
5
+ RouteRoutedModelInput,
6
+ RouteDefaults,
7
+ RouteDefaultsInput,
8
+ AnyRoute,
9
+ Interface as LLMClientShape,
10
+ Service as LLMClientService,
11
+ } from "./client"
12
+ export * from "./executor"
13
+ export { Auth } from "./auth"
14
+ export { AuthOptions } from "./auth-options"
15
+ export { Endpoint } from "./endpoint"
16
+ export { Framing } from "./framing"
17
+ export { Protocol } from "./protocol"
18
+ export { HttpTransport, WebSocketExecutor, WebSocketTransport } from "./transport"
19
+ export * as Transport from "./transport"
20
+ export type { Auth as AuthShape, AuthInput, Credential, CredentialError } from "./auth"
21
+ export type { ApiKeyMode, AuthOverride, ProviderAuthOption } from "./auth-options"
22
+ export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
23
+ export type { Framing as FramingDef } from "./framing"
24
+ export type { Protocol as ProtocolDef } from "./protocol"
25
+ export type { Transport as TransportDef, TransportRuntime } from "./transport"
@@ -0,0 +1,84 @@
1
+ import { Schema, type Effect } from "effect"
2
+ import type { LLMError, LLMEvent, LLMRequest, ProtocolID } from "../schema"
3
+
4
+ /**
5
+ * The semantic API contract of one model server family.
6
+ *
7
+ * A `Protocol` owns the parts of a route that are intrinsic to "what does
8
+ * this API look like": how a common `LLMRequest` becomes a provider-native
9
+ * body, what schema that body must satisfy before it is JSON-encoded, and
10
+ * how the streaming response decodes back into common `LLMEvent`s.
11
+ *
12
+ * Examples:
13
+ *
14
+ * - `OpenAIChat.protocol` — chat completions style
15
+ * - `OpenAIResponses.protocol` — responses API
16
+ * - `AnthropicMessages.protocol` — messages API with content blocks
17
+ * - `Gemini.protocol` — generateContent
18
+ * - `BedrockConverse.protocol` — Converse with binary event-stream framing
19
+ *
20
+ * A `Protocol` is **not** a deployment. It does not know which URL, which
21
+ * headers, or which auth scheme to use. Those are deployment concerns owned
22
+ * by `Route.make(...)` along with the chosen `Endpoint`, `Auth`,
23
+ * and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
24
+ * etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
25
+ *
26
+ * The four type parameters reflect the pipeline:
27
+ *
28
+ * - `Body` — provider-native request body candidate. `Route.make(...)`
29
+ * validates and JSON-encodes it with `body.schema`.
30
+ * - `Frame` — one unit of the framed response stream. SSE: a JSON data
31
+ * string. AWS event stream: a parsed binary frame.
32
+ * - `Event` — schema-decoded provider event produced from one frame.
33
+ * - `State` — accumulator threaded through `stream.step` to translate event
34
+ * sequences into `LLMEvent` sequences.
35
+ */
36
+ export interface Protocol<Body, Frame, Event, State> {
37
+ /** Stable id for the wire protocol implementation. */
38
+ readonly id: ProtocolID
39
+ /** Request side: schema for the provider-native body and how to build it. */
40
+ readonly body: ProtocolBody<Body>
41
+ /** Response side: streaming state machine. */
42
+ readonly stream: ProtocolStream<Frame, Event, State>
43
+ }
44
+
45
+ export interface ProtocolBody<Body> {
46
+ /** Schema for the validated provider-native body sent as the JSON request. */
47
+ readonly schema: Schema.Codec<Body, unknown>
48
+ /** Build the provider-native body from a common `LLMRequest`. */
49
+ readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
50
+ }
51
+
52
+ export interface ProtocolStream<Frame, Event, State> {
53
+ /** Schema for one decoded streaming event, decoded from a transport frame. */
54
+ readonly event: Schema.Codec<Event, Frame>
55
+ /** Initial parser state. Called once per response with the resolved request. */
56
+ readonly initial: (request: LLMRequest) => State
57
+ /** Translate one event into emitted `LLMEvent`s plus the next state. */
58
+ readonly step: (state: State, event: Event) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], LLMError>
59
+ /** Optional request-completion signal for transports that do not end naturally. */
60
+ readonly terminal?: (event: Event) => boolean
61
+ /** Optional flush emitted when the framed stream ends. */
62
+ readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
63
+ }
64
+
65
+ /**
66
+ * Construct a `Protocol` from its body and stream pieces:
67
+ *
68
+ * - `body.schema` infers the provider-native request body shape.
69
+ * - `body.from` ties the common `LLMRequest` to the provider body.
70
+ * - `stream.event` infers the decoded streaming event and the wire frame.
71
+ * - `stream.initial`, `stream.step`, and `stream.onHalt` infer the parser state.
72
+ *
73
+ * Provider implementations should usually call `Protocol.make({ ... })`
74
+ * without explicit type arguments; the schemas and parser functions are the
75
+ * source of truth. The constructor remains as the public seam for future
76
+ * cross-cutting concerns such as tracing or instrumentation.
77
+ */
78
+ export const make = <Body, Frame, Event, State>(
79
+ input: Protocol<Body, Frame, Event, State>,
80
+ ): Protocol<Body, Frame, Event, State> => input
81
+
82
+ export const jsonEvent = <const S extends Schema.Top>(schema: S) => Schema.fromJsonString(schema)
83
+
84
+ export * as Protocol from "./protocol"
@@ -0,0 +1,108 @@
1
+ import { Effect, Stream } from "effect"
2
+ import { Headers, HttpClientRequest } from "effect/unstable/http"
3
+ import { Auth } from "../auth"
4
+ import { render as renderEndpoint } from "../endpoint"
5
+ import { Framing, type Framing as FramingDef } from "../framing"
6
+ import type { Transport, TransportPrepareInput } from "./index"
7
+ import * as ProviderShared from "../../protocols/shared"
8
+ import { mergeJsonRecords, type LLMRequest } from "../../schema"
9
+
10
+ export type JsonRequestInput<Body> = TransportPrepareInput<Body>
11
+
12
+ export interface JsonRequestParts<Body = unknown> {
13
+ readonly url: string
14
+ readonly jsonBody: Body | Record<string, unknown>
15
+ readonly bodyText: string
16
+ readonly headers: Headers.Headers
17
+ }
18
+
19
+ export interface HttpPrepared<Frame> {
20
+ readonly request: HttpClientRequest.HttpClientRequest
21
+ readonly framing: FramingDef<Frame>
22
+ }
23
+
24
+ const applyQuery = (url: string, query: Record<string, string> | undefined) => {
25
+ if (!query) return url
26
+ const next = new URL(url)
27
+ Object.entries(query).forEach(([key, value]) => next.searchParams.set(key, value))
28
+ return next.toString()
29
+ }
30
+
31
+ const bodyWithOverlay = <Body>(body: Body, request: LLMRequest, encodeBody: (body: Body) => string) =>
32
+ Effect.gen(function* () {
33
+ if (request.http?.body === undefined) return { jsonBody: body, bodyText: encodeBody(body) }
34
+ if (ProviderShared.isRecord(body)) {
35
+ const overlaid = mergeJsonRecords(body, request.http.body) ?? {}
36
+ return { jsonBody: overlaid, bodyText: ProviderShared.encodeJson(overlaid) }
37
+ }
38
+ return yield* ProviderShared.invalidRequest("http.body can only overlay JSON object request bodies")
39
+ })
40
+
41
+ export const jsonRequestParts = <Body>(input: JsonRequestInput<Body>) =>
42
+ Effect.gen(function* () {
43
+ const url = applyQuery(
44
+ renderEndpoint(input.endpoint, { request: input.request, body: input.body }).toString(),
45
+ input.request.http?.query,
46
+ )
47
+ const body = yield* bodyWithOverlay(input.body, input.request, input.encodeBody)
48
+ const headers = yield* Auth.toEffect(input.auth)({
49
+ request: input.request,
50
+ method: "POST",
51
+ url,
52
+ body: body.bodyText,
53
+ headers: Headers.fromInput({
54
+ ...input.headers?.({ request: input.request }),
55
+ ...input.request.http?.headers,
56
+ }),
57
+ })
58
+ return { url, jsonBody: body.jsonBody, bodyText: body.bodyText, headers }
59
+ })
60
+
61
+ export interface HttpJsonInput<_Body, Frame> {
62
+ readonly framing: FramingDef<Frame>
63
+ }
64
+
65
+ export type HttpJsonPatch<Body, Frame> = Partial<HttpJsonInput<Body, Frame>>
66
+
67
+ export interface HttpJsonTransport<Body, Frame> extends Transport<Body, HttpPrepared<Frame>, Frame> {
68
+ readonly with: (patch: HttpJsonPatch<Body, Frame>) => HttpJsonTransport<Body, Frame>
69
+ }
70
+
71
+ export const httpJson = <Body, Frame>(input: HttpJsonInput<Body, Frame>): HttpJsonTransport<Body, Frame> => ({
72
+ id: "http-json",
73
+ with: (patch) => httpJson({ ...input, ...patch }),
74
+ prepare: (prepareInput) =>
75
+ jsonRequestParts({
76
+ ...prepareInput,
77
+ }).pipe(
78
+ Effect.map((parts) => ({
79
+ request: ProviderShared.jsonPost({ url: parts.url, body: parts.bodyText, headers: parts.headers }),
80
+ framing: input.framing,
81
+ })),
82
+ ),
83
+ frames: (prepared, request, runtime) =>
84
+ Stream.unwrap(
85
+ runtime.http
86
+ .execute(prepared.request)
87
+ .pipe(
88
+ Effect.map((response) =>
89
+ prepared.framing.frame(
90
+ response.stream.pipe(
91
+ Stream.mapError((error) =>
92
+ ProviderShared.eventError(
93
+ `${request.model.provider}/${request.model.route.id}`,
94
+ `Failed to read ${request.model.provider}/${request.model.route.id} stream`,
95
+ ProviderShared.errorText(error),
96
+ ),
97
+ ),
98
+ ),
99
+ ),
100
+ ),
101
+ ),
102
+ ),
103
+ })
104
+
105
+ export const sseJson = {
106
+ id: "http-json/sse",
107
+ with: <Body>() => httpJson<Body, string>({ framing: Framing.sse }),
108
+ } as const
@@ -0,0 +1,33 @@
1
+ import type { Effect, Stream } from "effect"
2
+ import type { Endpoint } from "../endpoint"
3
+ import type { Auth } from "../auth"
4
+ import type { Interface as RequestExecutorInterface } from "../executor"
5
+ import type { Interface as WebSocketExecutorInterface } from "./websocket"
6
+ import type { LLMError, LLMRequest } from "../../schema"
7
+
8
+ export interface TransportRuntime {
9
+ readonly http: RequestExecutorInterface
10
+ readonly webSocket?: WebSocketExecutorInterface
11
+ }
12
+
13
+ export interface Transport<Body, Prepared, Frame> {
14
+ readonly id: string
15
+ readonly prepare: (input: TransportPrepareInput<Body>) => Effect.Effect<Prepared, LLMError>
16
+ readonly frames: (
17
+ prepared: Prepared,
18
+ request: LLMRequest,
19
+ runtime: TransportRuntime,
20
+ ) => Stream.Stream<Frame, LLMError>
21
+ }
22
+
23
+ export interface TransportPrepareInput<Body> {
24
+ readonly body: Body
25
+ readonly request: LLMRequest
26
+ readonly endpoint: Endpoint<Body>
27
+ readonly auth: Auth
28
+ readonly encodeBody: (body: Body) => string
29
+ readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
30
+ }
31
+
32
+ export * as HttpTransport from "./http"
33
+ export { WebSocketExecutor, WebSocketTransport } from "./websocket"