@codilore/llm 1.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/AGENTS.md +321 -0
  2. package/README.md +131 -0
  3. package/example/call-sites.md +591 -0
  4. package/example/tutorial.ts +255 -0
  5. package/package.json +50 -0
  6. package/script/recording-cost-report.ts +250 -0
  7. package/script/setup-recording-env.ts +542 -0
  8. package/src/cache-policy.ts +111 -0
  9. package/src/index.ts +32 -0
  10. package/src/llm.ts +186 -0
  11. package/src/protocols/anthropic-messages.ts +841 -0
  12. package/src/protocols/bedrock-converse.ts +649 -0
  13. package/src/protocols/bedrock-event-stream.ts +87 -0
  14. package/src/protocols/gemini.ts +465 -0
  15. package/src/protocols/index.ts +6 -0
  16. package/src/protocols/openai-chat.ts +431 -0
  17. package/src/protocols/openai-compatible-chat.ts +24 -0
  18. package/src/protocols/openai-responses.ts +987 -0
  19. package/src/protocols/shared.ts +283 -0
  20. package/src/protocols/utils/bedrock-auth.ts +70 -0
  21. package/src/protocols/utils/bedrock-cache.ts +37 -0
  22. package/src/protocols/utils/bedrock-media.ts +80 -0
  23. package/src/protocols/utils/cache.ts +16 -0
  24. package/src/protocols/utils/gemini-tool-schema.ts +101 -0
  25. package/src/protocols/utils/lifecycle.ts +102 -0
  26. package/src/protocols/utils/openai-options.ts +84 -0
  27. package/src/protocols/utils/tool-stream.ts +218 -0
  28. package/src/provider.ts +37 -0
  29. package/src/providers/amazon-bedrock.ts +43 -0
  30. package/src/providers/anthropic.ts +35 -0
  31. package/src/providers/azure.ts +110 -0
  32. package/src/providers/cloudflare.ts +127 -0
  33. package/src/providers/github-copilot.ts +66 -0
  34. package/src/providers/google.ts +35 -0
  35. package/src/providers/index.ts +11 -0
  36. package/src/providers/openai-compatible-profile.ts +20 -0
  37. package/src/providers/openai-compatible.ts +65 -0
  38. package/src/providers/openai-options.ts +81 -0
  39. package/src/providers/openai.ts +63 -0
  40. package/src/providers/openrouter.ts +98 -0
  41. package/src/providers/xai.ts +56 -0
  42. package/src/route/auth-options.ts +57 -0
  43. package/src/route/auth.ts +156 -0
  44. package/src/route/client.ts +434 -0
  45. package/src/route/endpoint.ts +53 -0
  46. package/src/route/executor.ts +374 -0
  47. package/src/route/framing.ts +27 -0
  48. package/src/route/index.ts +25 -0
  49. package/src/route/protocol.ts +84 -0
  50. package/src/route/transport/http.ts +108 -0
  51. package/src/route/transport/index.ts +33 -0
  52. package/src/route/transport/websocket.ts +280 -0
  53. package/src/schema/errors.ts +203 -0
  54. package/src/schema/events.ts +370 -0
  55. package/src/schema/ids.ts +43 -0
  56. package/src/schema/index.ts +5 -0
  57. package/src/schema/messages.ts +404 -0
  58. package/src/schema/options.ts +221 -0
  59. package/src/tool-runtime.ts +78 -0
  60. package/src/tool.ts +241 -0
  61. package/src/utils/record.ts +3 -0
  62. package/sst-env.d.ts +10 -0
  63. package/test/adapter.test.ts +164 -0
  64. package/test/auth-options.types.ts +168 -0
  65. package/test/auth.test.ts +103 -0
  66. package/test/cache-policy.test.ts +262 -0
  67. package/test/continuation-scenarios.ts +104 -0
  68. package/test/endpoint.test.ts +58 -0
  69. package/test/executor.test.ts +418 -0
  70. package/test/exports.test.ts +62 -0
  71. package/test/fixtures/media/restroom.png +0 -0
  72. package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
  73. package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
  74. package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
  75. package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
  76. package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
  77. package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
  78. package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
  79. package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
  80. package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
  81. package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
  82. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  83. package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
  84. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
  85. package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
  86. package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
  87. package/test/fixtures/recordings/gemini/streams-text.json +28 -0
  88. package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
  89. package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
  90. package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
  91. package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
  92. package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
  93. package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
  94. package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
  95. package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
  96. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
  97. package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
  98. package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
  99. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
  100. package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
  101. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
  102. package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
  103. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
  104. package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
  105. package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
  106. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
  107. package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
  108. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
  109. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
  110. package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
  111. package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
  112. package/test/generate-object.test.ts +184 -0
  113. package/test/lib/effect.ts +50 -0
  114. package/test/lib/http.ts +98 -0
  115. package/test/lib/openai-chunks.ts +27 -0
  116. package/test/lib/sse.ts +17 -0
  117. package/test/lib/tool-runtime.ts +146 -0
  118. package/test/llm.test.ts +167 -0
  119. package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
  120. package/test/provider/anthropic-messages.recorded.test.ts +46 -0
  121. package/test/provider/anthropic-messages.test.ts +829 -0
  122. package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
  123. package/test/provider/bedrock-converse.test.ts +707 -0
  124. package/test/provider/cloudflare.test.ts +230 -0
  125. package/test/provider/gemini-cache.recorded.test.ts +48 -0
  126. package/test/provider/gemini.test.ts +476 -0
  127. package/test/provider/golden.recorded.test.ts +219 -0
  128. package/test/provider/openai-chat.test.ts +446 -0
  129. package/test/provider/openai-compatible-chat.test.ts +238 -0
  130. package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
  131. package/test/provider/openai-responses.test.ts +1322 -0
  132. package/test/provider/openrouter.test.ts +56 -0
  133. package/test/provider.types.ts +41 -0
  134. package/test/recorded-golden.ts +97 -0
  135. package/test/recorded-runner.ts +100 -0
  136. package/test/recorded-scenarios.ts +531 -0
  137. package/test/recorded-test.ts +74 -0
  138. package/test/recorded-utils.ts +56 -0
  139. package/test/recorded-websocket.ts +26 -0
  140. package/test/route.test.ts +43 -0
  141. package/test/schema.test.ts +97 -0
  142. package/test/tool-runtime.test.ts +802 -0
  143. package/test/tool-stream.test.ts +99 -0
  144. package/test/tool.types.ts +40 -0
  145. package/tsconfig.json +15 -0
@@ -0,0 +1,255 @@
1
+ import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect"
2
+ import { LLM, LLMClient, Message, ProviderID, Tool, ToolRuntime } from "@codilore/llm"
3
+ import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor, WebSocketExecutor } from "@codilore/llm/route"
4
+ import { OpenAI } from "@codilore/llm/providers"
5
+
6
+ /**
7
+ * A runnable walkthrough of the LLM package use-site API.
8
+ *
9
+ * Run from `packages/llm` with an OpenAI key in the environment:
10
+ *
11
+ * OPENAI_API_KEY=... bun example/tutorial.ts
12
+ *
13
+ * The file is intentionally written as a normal TypeScript program. You can
14
+ * hover imports and local values to see how the public API is typed.
15
+ */
16
+
17
+ const apiKey = Config.redacted("OPENAI_API_KEY")
18
+
19
+ // 1. Pick a model. The provider helper records provider identity, protocol
20
+ // choice, capabilities, deployment options, authentication, and defaults.
21
+ const model = OpenAI.configure({
22
+ apiKey,
23
+ generation: { maxTokens: 160 },
24
+ providerOptions: {
25
+ openai: { store: false },
26
+ },
27
+ }).model("gpt-4o-mini")
28
+
29
+ // 2. Build a provider-neutral request. This is useful when reusing one request
30
+ // across generate and stream examples.
31
+ //
32
+ // Options can live on both the configured route/provider facade and the request:
33
+ //
34
+ // - `generation`: common controls such as max tokens, temperature, topP/topK,
35
+ // penalties, seed, and stop sequences.
36
+ // - `providerOptions`: namespaced provider-native behavior. For example,
37
+ // OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking
38
+ // config, or OpenRouter routing/reasoning.
39
+ // - `http`: last-resort serializable overlays for final request body, headers,
40
+ // and query params. Prefer typed `providerOptions` when a field is stable.
41
+ //
42
+ // Route/provider options are defaults. Request options override them for this call.
43
+ const request = LLM.request({
44
+ model,
45
+ system: "You are concise and practical.",
46
+ prompt: "Tell me a joke",
47
+ generation: { maxTokens: 80, temperature: 0.7 },
48
+ providerOptions: {
49
+ openai: { promptCacheKey: "tutorial-joke" },
50
+ },
51
+ })
52
+
53
+ // `http` is intentionally not needed for normal calls. This shows the shape for
54
+ // newly released provider fields before they deserve a typed provider option.
55
+ const rawOverlayExample = LLM.request({
56
+ model,
57
+ prompt: "Show the final HTTP overlay shape.",
58
+ http: {
59
+ body: { metadata: { example: "tutorial" } },
60
+ headers: { "x-Codilore-tutorial": "1" },
61
+ query: { debug: "1" },
62
+ },
63
+ })
64
+
65
+ // 3. `generate` sends the request and collects the event stream into one
66
+ // response object. `response.text` is the collected text output.
67
+ const generateOnce = Effect.gen(function* () {
68
+ const response = yield* LLM.generate(request)
69
+
70
+ console.log("\n== generate ==")
71
+ console.log("generated text:", response.text)
72
+ console.log("usage", Formatter.formatJson(response.usage, { space: 2 }))
73
+ })
74
+
75
+ // 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want
76
+ // incremental text, reasoning, tool input, usage, or finish events.
77
+ const streamText = LLM.stream(request).pipe(
78
+ Stream.tap((event) =>
79
+ Effect.sync(() => {
80
+ if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`)
81
+ if (event.type === "finish") process.stdout.write(`\nfinish: ${event.reason}\n`)
82
+ }),
83
+ ),
84
+ Stream.runDrain,
85
+ )
86
+
87
+ // 5. Tools are typed with Effect Schema. Provider turns remain explicit:
88
+ // advertise definitions on the request, stream one turn, dispatch local calls,
89
+ // then persist/build follow-up history in the enclosing product flow.
90
+ const tools = {
91
+ get_weather: Tool.make({
92
+ description: "Get current weather for a city.",
93
+ parameters: Schema.Struct({ city: Schema.String }),
94
+ success: Schema.Struct({ forecast: Schema.String }),
95
+ execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }),
96
+ }),
97
+ }
98
+
99
+ const streamWithTools = Effect.gen(function* () {
100
+ const request = LLM.request({
101
+ model,
102
+ prompt: "Use get_weather for San Francisco, then answer in one sentence.",
103
+ generation: { maxTokens: 80, temperature: 0 },
104
+ tools: Tool.toDefinitions(tools),
105
+ })
106
+ const events = Array.from(yield* LLM.stream(request).pipe(Stream.runCollect))
107
+ for (const event of events) {
108
+ if (event.type === "tool-call") console.log("tool call", event.name, event.input)
109
+ if (event.type === "text-delta") process.stdout.write(event.text)
110
+ if (event.type !== "tool-call" || event.providerExecuted) continue
111
+ const dispatched = yield* ToolRuntime.dispatch(tools, event)
112
+ console.log("tool result", event.name, dispatched.result)
113
+
114
+ // A durable agent would persist these messages before starting another
115
+ // raw model turn. This tutorial keeps the boundary visible instead.
116
+ const followUp = LLM.updateRequest(request, {
117
+ messages: [
118
+ ...request.messages,
119
+ Message.assistant([event]),
120
+ Message.tool({ ...event, result: dispatched.result }),
121
+ ],
122
+ })
123
+ console.log("follow-up history messages:", followUp.messages.length)
124
+ }
125
+ })
126
+
127
+ // 6. `generateObject` is the structured-output helper. It forces a synthetic
128
+ // tool call internally, so the same call site works across providers instead of
129
+ // depending on provider-specific JSON mode flags.
130
+ const WeatherReport = Schema.Struct({
131
+ city: Schema.String,
132
+ forecast: Schema.String,
133
+ highFahrenheit: Schema.Number,
134
+ })
135
+
136
+ const generateStructuredObject = Effect.gen(function* () {
137
+ const response = yield* LLM.generateObject({
138
+ model,
139
+ system: "Return only structured weather data.",
140
+ prompt: "Give me today's weather for San Francisco.",
141
+ schema: WeatherReport,
142
+ generation: { maxTokens: 120, temperature: 0 },
143
+ })
144
+
145
+ console.log("\n== generateObject ==")
146
+ console.log(Formatter.formatJson(response.object, { space: 2 }))
147
+ })
148
+
149
+ // If the shape is only known at runtime, pass raw JSON Schema instead. The
150
+ // `.object` type is `unknown`; callers that need static types should validate it.
151
+ const generateDynamicObject = LLM.generateObject({
152
+ model,
153
+ prompt: "Extract the city and forecast from: San Francisco is sunny.",
154
+ jsonSchema: {
155
+ type: "object",
156
+ properties: {
157
+ city: { type: "string" },
158
+ forecast: { type: "string" },
159
+ },
160
+ required: ["city", "forecast"],
161
+ },
162
+ })
163
+
164
+ // -----------------------------------------------------------------------------
165
+ // Part 2: provider composition with a fake provider
166
+ // -----------------------------------------------------------------------------
167
+
168
+ // A protocol is the provider-native API shape: common request -> body, response
169
+ // frames -> common events. This fake one turns text prompts into a JSON body
170
+ // and treats every SSE frame as output text.
171
+ const FakeBody = Schema.Struct({
172
+ model: Schema.String,
173
+ input: Schema.String,
174
+ })
175
+ type FakeBody = Schema.Schema.Type<typeof FakeBody>
176
+
177
+ const FakeProtocol = Protocol.make<FakeBody, string, string, void>({
178
+ // Protocol ids are open strings, so external packages can define their own
179
+ // protocols without changing this package.
180
+ id: "fake-echo",
181
+ body: {
182
+ schema: FakeBody,
183
+ from: (request) =>
184
+ Effect.succeed({
185
+ model: request.model.id,
186
+ input: request.messages
187
+ .flatMap((message) => message.content)
188
+ .filter((part) => part.type === "text")
189
+ .map((part) => part.text)
190
+ .join("\n"),
191
+ }),
192
+ },
193
+ stream: {
194
+ event: Schema.String,
195
+ initial: () => undefined,
196
+ step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", id: "text-0", text: frame }]] as const),
197
+ onHalt: () => [{ type: "finish", reason: "stop" }],
198
+ },
199
+ })
200
+
201
+ // An route is the runnable binding for that protocol. It adds the deployment
202
+ // axes that the protocol deliberately does not know: URL, auth, and framing.
203
+ const FakeAdapter = Route.make({
204
+ id: "fake-echo",
205
+ provider: "fake-echo",
206
+ protocol: FakeProtocol,
207
+ endpoint: Endpoint.path("/v1/echo", { baseURL: "https://fake.local" }),
208
+ auth: Auth.passthrough,
209
+ framing: Framing.sse,
210
+ })
211
+
212
+ // A provider module exports a configured facade. Configuration happens before
213
+ // model selection; model selectors accept ids only.
214
+ const FakeEcho = {
215
+ id: ProviderID.make("fake-echo"),
216
+ configure: () => ({
217
+ id: ProviderID.make("fake-echo"),
218
+ model: (id: string) => FakeAdapter.model({ id }),
219
+ }),
220
+ }
221
+
222
+ // `LLMClient.prepare` is the lower-level inspection hook: it compiles through
223
+ // body conversion, validation, endpoint, auth, and HTTP construction without
224
+ // sending anything over the network.
225
+ const inspectFakeProvider = Effect.gen(function* () {
226
+ const prepared = yield* LLMClient.prepare(
227
+ LLM.request({
228
+ model: FakeEcho.configure().model("tiny-echo"),
229
+ prompt: "Show me the provider pipeline.",
230
+ }),
231
+ )
232
+
233
+ console.log("\n== fake provider prepare ==")
234
+ console.log("route:", prepared.route)
235
+ console.log("body:", Formatter.formatJson(prepared.body, { space: 2 }))
236
+ })
237
+
238
+ // Provide the LLM runtime and the HTTP request executor once. Keep one path
239
+ // enabled at a time so the tutorial can demonstrate generate, prepare, stream,
240
+ // or tool-loop behavior without spending tokens on every example.
241
+ const requestExecutorLayer = RequestExecutor.defaultLayer
242
+ const llmDeps = Layer.mergeAll(requestExecutorLayer, WebSocketExecutor.layer)
243
+ const llmClientLayer = LLMClient.layer.pipe(Layer.provide(llmDeps))
244
+
245
+ const program = Effect.gen(function* () {
246
+ // yield* generateOnce
247
+ // yield* inspectFakeProvider
248
+ // yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body))))
249
+ // yield* streamText
250
+ // yield* generateStructuredObject
251
+ // yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object))))
252
+ yield* streamWithTools
253
+ }).pipe(Effect.provide(Layer.mergeAll(llmDeps, llmClientLayer)))
254
+
255
+ Effect.runPromise(program)
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/package.json",
3
+ "version": "1.15.13",
4
+ "name": "@codilore/llm",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "scripts": {
8
+ "setup:recording-env": "bun run script/setup-recording-env.ts",
9
+ "test": "bun test --timeout 30000",
10
+ "typecheck": "tsgo --noEmit"
11
+ },
12
+ "exports": {
13
+ ".": "./src/index.ts",
14
+ "./route": "./src/route/index.ts",
15
+ "./provider": "./src/provider.ts",
16
+ "./providers": "./src/providers/index.ts",
17
+ "./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts",
18
+ "./providers/anthropic": "./src/providers/anthropic.ts",
19
+ "./providers/azure": "./src/providers/azure.ts",
20
+ "./providers/cloudflare": "./src/providers/cloudflare.ts",
21
+ "./providers/github-copilot": "./src/providers/github-copilot.ts",
22
+ "./providers/google": "./src/providers/google.ts",
23
+ "./providers/openai": "./src/providers/openai.ts",
24
+ "./providers/openai-compatible": "./src/providers/openai-compatible.ts",
25
+ "./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts",
26
+ "./providers/openrouter": "./src/providers/openrouter.ts",
27
+ "./providers/xai": "./src/providers/xai.ts",
28
+ "./protocols": "./src/protocols/index.ts",
29
+ "./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts",
30
+ "./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts",
31
+ "./protocols/gemini": "./src/protocols/gemini.ts",
32
+ "./protocols/openai-chat": "./src/protocols/openai-chat.ts",
33
+ "./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts",
34
+ "./protocols/openai-responses": "./src/protocols/openai-responses.ts"
35
+ },
36
+ "devDependencies": {
37
+ "@clack/prompts": "1.0.0-alpha.1",
38
+ "@effect/platform-node": "^1.15.13",
39
+ "@codilore/http-recorder": "1.15.13",
40
+ "@tsconfig/bun": "^1.15.13",
41
+ "@types/bun": "^1.15.13",
42
+ "@typescript/native-preview": "^1.15.13"
43
+ },
44
+ "dependencies": {
45
+ "@smithy/eventstream-codec": "4.2.14",
46
+ "@smithy/util-utf8": "4.2.2",
47
+ "aws4fetch": "1.0.20",
48
+ "effect": "^1.15.13"
49
+ }
50
+ }
@@ -0,0 +1,250 @@
1
+ import * as fs from "node:fs/promises"
2
+ import * as path from "node:path"
3
+
4
+ const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings")
5
+ const MODELS_DEV_URL = "https://models.dev/api.json"
6
+
7
+ type JsonRecord = Record<string, unknown>
8
+
9
+ type Pricing = {
10
+ readonly input?: number
11
+ readonly output?: number
12
+ readonly cache_read?: number
13
+ readonly cache_write?: number
14
+ readonly reasoning?: number
15
+ }
16
+
17
+ type Usage = {
18
+ readonly inputTokens: number
19
+ readonly outputTokens: number
20
+ readonly cacheReadTokens: number
21
+ readonly cacheWriteTokens: number
22
+ readonly reasoningTokens: number
23
+ readonly reportedCost: number
24
+ }
25
+
26
+ type Row = Usage & {
27
+ readonly cassette: string
28
+ readonly provider: string
29
+ readonly model: string
30
+ readonly estimatedCost: number
31
+ readonly pricingSource: string
32
+ }
33
+
34
+ const isRecord = (value: unknown): value is JsonRecord =>
35
+ value !== null && typeof value === "object" && !Array.isArray(value)
36
+
37
+ const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0)
38
+
39
+ const asString = (value: unknown) => (typeof value === "string" ? value : undefined)
40
+
41
+ const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown
42
+
43
+ const walk = async (dir: string): Promise<ReadonlyArray<string>> =>
44
+ (await fs.readdir(dir, { withFileTypes: true }))
45
+ .flatMap((entry) => {
46
+ const file = path.join(dir, entry.name)
47
+ return entry.isDirectory() ? [] : [file]
48
+ })
49
+ .concat(
50
+ ...(await Promise.all(
51
+ (await fs.readdir(dir, { withFileTypes: true }))
52
+ .filter((entry) => entry.isDirectory())
53
+ .map((entry) => walk(path.join(dir, entry.name))),
54
+ )),
55
+ )
56
+
57
+ const providerFromUrl = (url: string) => {
58
+ if (url.includes("api.openai.com")) return "openai"
59
+ if (url.includes("api.anthropic.com")) return "anthropic"
60
+ if (url.includes("generativelanguage.googleapis.com")) return "google"
61
+ if (url.includes("bedrock")) return "amazon-bedrock"
62
+ if (url.includes("openrouter.ai")) return "openrouter"
63
+ if (url.includes("api.x.ai")) return "xai"
64
+ if (url.includes("api.groq.com")) return "groq"
65
+ if (url.includes("api.deepseek.com")) return "deepseek"
66
+ if (url.includes("api.together.xyz")) return "togetherai"
67
+ return "unknown"
68
+ }
69
+
70
+ const providerAliases: Record<string, ReadonlyArray<string>> = {
71
+ openai: ["openai"],
72
+ anthropic: ["anthropic"],
73
+ google: ["google"],
74
+ "amazon-bedrock": ["amazon-bedrock"],
75
+ openrouter: ["openrouter", "openai", "anthropic", "google"],
76
+ xai: ["xai"],
77
+ groq: ["groq"],
78
+ deepseek: ["deepseek"],
79
+ togetherai: ["togetherai"],
80
+ }
81
+
82
+ const modelAliases = (model: string) => [
83
+ model,
84
+ model.replace(/^models\//, ""),
85
+ model.replace(/-\d{8}$/, ""),
86
+ model.replace(/-\d{4}-\d{2}-\d{2}$/, ""),
87
+ model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""),
88
+ model.replace(/^openai\//, ""),
89
+ model.replace(/^anthropic\//, ""),
90
+ model.replace(/^google\//, ""),
91
+ ]
92
+
93
+ const pricingFor = (models: JsonRecord, provider: string, model: string) => {
94
+ for (const providerID of providerAliases[provider] ?? [provider]) {
95
+ const providerEntry = models[providerID]
96
+ if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue
97
+ for (const modelID of modelAliases(model)) {
98
+ const modelEntry = providerEntry.models[modelID]
99
+ if (isRecord(modelEntry) && isRecord(modelEntry.cost))
100
+ return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` }
101
+ }
102
+ }
103
+ return { pricing: undefined, source: "missing" }
104
+ }
105
+
106
+ const estimateCost = (usage: Usage, pricing: Pricing | undefined) => {
107
+ if (!pricing) return 0
108
+ return (
109
+ (usage.inputTokens * (pricing.input ?? 0) +
110
+ usage.outputTokens * (pricing.output ?? 0) +
111
+ usage.cacheReadTokens * (pricing.cache_read ?? 0) +
112
+ usage.cacheWriteTokens * (pricing.cache_write ?? 0) +
113
+ usage.reasoningTokens * (pricing.reasoning ?? 0)) /
114
+ 1_000_000
115
+ )
116
+ }
117
+
118
+ const emptyUsage = (): Usage => ({
119
+ inputTokens: 0,
120
+ outputTokens: 0,
121
+ cacheReadTokens: 0,
122
+ cacheWriteTokens: 0,
123
+ reasoningTokens: 0,
124
+ reportedCost: 0,
125
+ })
126
+
127
+ const addUsage = (a: Usage, b: Usage): Usage => ({
128
+ inputTokens: a.inputTokens + b.inputTokens,
129
+ outputTokens: a.outputTokens + b.outputTokens,
130
+ cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
131
+ cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens,
132
+ reasoningTokens: a.reasoningTokens + b.reasoningTokens,
133
+ reportedCost: a.reportedCost + b.reportedCost,
134
+ })
135
+
136
+ const usageFromObject = (usage: unknown): Usage => {
137
+ if (!isRecord(usage)) return emptyUsage()
138
+ const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {}
139
+ const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {}
140
+ const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {}
141
+ const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {}
142
+ const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens)
143
+ return {
144
+ inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens),
145
+ outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens),
146
+ cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens),
147
+ cacheWriteTokens,
148
+ reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens),
149
+ reportedCost: asNumber(usage.cost),
150
+ }
151
+ }
152
+
153
+ const jsonPayloads = (body: string) =>
154
+ body
155
+ .split("\n")
156
+ .map((line) => line.trim())
157
+ .filter((line) => line.startsWith("data:"))
158
+ .map((line) => line.slice("data:".length).trim())
159
+ .filter((line) => line !== "" && line !== "[DONE]")
160
+ .flatMap((line) => {
161
+ try {
162
+ return [JSON.parse(line) as unknown]
163
+ } catch {
164
+ return []
165
+ }
166
+ })
167
+
168
+ const usageFromResponseBody = (body: string) =>
169
+ jsonPayloads(body).reduce<Usage>((usage, payload) => {
170
+ if (!isRecord(payload)) return usage
171
+ return addUsage(
172
+ usage,
173
+ addUsage(
174
+ usageFromObject(payload.usage),
175
+ usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined),
176
+ ),
177
+ )
178
+ }, emptyUsage())
179
+
180
+ const modelFromRequest = (request: unknown) => {
181
+ if (!isRecord(request)) return "unknown"
182
+ const requestBody = asString(request.body)
183
+ if (!requestBody) return "unknown"
184
+ try {
185
+ const body = JSON.parse(requestBody) as unknown
186
+ if (!isRecord(body)) return "unknown"
187
+ return asString(body.model) ?? "unknown"
188
+ } catch {
189
+ return "unknown"
190
+ }
191
+ }
192
+
193
+ const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => {
194
+ if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined
195
+ const first = cassette.interactions.find(isRecord)
196
+ if (!first || !isRecord(first.request)) return undefined
197
+ const provider = providerFromUrl(asString(first.request.url) ?? "")
198
+ const model = modelFromRequest(first.request)
199
+ const usage = cassette.interactions.filter(isRecord).reduce<Usage>((total, interaction) => {
200
+ if (!isRecord(interaction.response)) return total
201
+ const responseBody = asString(interaction.response.body)
202
+ if (!responseBody) return total
203
+ return addUsage(total, usageFromResponseBody(responseBody))
204
+ }, emptyUsage())
205
+ const priced = pricingFor(models, provider, model)
206
+ return {
207
+ cassette: path.relative(RECORDINGS_DIR, file),
208
+ provider,
209
+ model,
210
+ ...usage,
211
+ estimatedCost: estimateCost(usage, priced.pricing),
212
+ pricingSource: priced.source,
213
+ }
214
+ }
215
+
216
+ const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`)
217
+ const tokens = (value: number) => value.toLocaleString("en-US")
218
+
219
+ const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord
220
+ const rows = (
221
+ await Promise.all(
222
+ (await walk(RECORDINGS_DIR))
223
+ .filter((file) => file.endsWith(".json"))
224
+ .map(async (file) => rowFor(models, file, await readJson(file))),
225
+ )
226
+ ).filter((row): row is Row => row !== undefined)
227
+
228
+ const totals = rows.reduce(
229
+ (total, row) => ({
230
+ ...addUsage(total, row),
231
+ estimatedCost: total.estimatedCost + row.estimatedCost,
232
+ }),
233
+ { ...emptyUsage(), estimatedCost: 0 },
234
+ )
235
+
236
+ console.log("# Recording Cost Report")
237
+ console.log("")
238
+ console.log(`Pricing: ${MODELS_DEV_URL}`)
239
+ console.log(`Cassettes: ${rows.length}`)
240
+ console.log(`Reported cost: ${money(totals.reportedCost)}`)
241
+ console.log(`Estimated cost: ${money(totals.estimatedCost)}`)
242
+ console.log("")
243
+ console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |")
244
+ console.log("|---|---:|---:|---:|---:|---:|---:|---|---|")
245
+ for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) {
246
+ if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue
247
+ console.log(
248
+ `| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`,
249
+ )
250
+ }