@codilore/llm 1.15.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +321 -0
- package/README.md +131 -0
- package/example/call-sites.md +591 -0
- package/example/tutorial.ts +255 -0
- package/package.json +50 -0
- package/script/recording-cost-report.ts +250 -0
- package/script/setup-recording-env.ts +542 -0
- package/src/cache-policy.ts +111 -0
- package/src/index.ts +32 -0
- package/src/llm.ts +186 -0
- package/src/protocols/anthropic-messages.ts +841 -0
- package/src/protocols/bedrock-converse.ts +649 -0
- package/src/protocols/bedrock-event-stream.ts +87 -0
- package/src/protocols/gemini.ts +465 -0
- package/src/protocols/index.ts +6 -0
- package/src/protocols/openai-chat.ts +431 -0
- package/src/protocols/openai-compatible-chat.ts +24 -0
- package/src/protocols/openai-responses.ts +987 -0
- package/src/protocols/shared.ts +283 -0
- package/src/protocols/utils/bedrock-auth.ts +70 -0
- package/src/protocols/utils/bedrock-cache.ts +37 -0
- package/src/protocols/utils/bedrock-media.ts +80 -0
- package/src/protocols/utils/cache.ts +16 -0
- package/src/protocols/utils/gemini-tool-schema.ts +101 -0
- package/src/protocols/utils/lifecycle.ts +102 -0
- package/src/protocols/utils/openai-options.ts +84 -0
- package/src/protocols/utils/tool-stream.ts +218 -0
- package/src/provider.ts +37 -0
- package/src/providers/amazon-bedrock.ts +43 -0
- package/src/providers/anthropic.ts +35 -0
- package/src/providers/azure.ts +110 -0
- package/src/providers/cloudflare.ts +127 -0
- package/src/providers/github-copilot.ts +66 -0
- package/src/providers/google.ts +35 -0
- package/src/providers/index.ts +11 -0
- package/src/providers/openai-compatible-profile.ts +20 -0
- package/src/providers/openai-compatible.ts +65 -0
- package/src/providers/openai-options.ts +81 -0
- package/src/providers/openai.ts +63 -0
- package/src/providers/openrouter.ts +98 -0
- package/src/providers/xai.ts +56 -0
- package/src/route/auth-options.ts +57 -0
- package/src/route/auth.ts +156 -0
- package/src/route/client.ts +434 -0
- package/src/route/endpoint.ts +53 -0
- package/src/route/executor.ts +374 -0
- package/src/route/framing.ts +27 -0
- package/src/route/index.ts +25 -0
- package/src/route/protocol.ts +84 -0
- package/src/route/transport/http.ts +108 -0
- package/src/route/transport/index.ts +33 -0
- package/src/route/transport/websocket.ts +280 -0
- package/src/schema/errors.ts +203 -0
- package/src/schema/events.ts +370 -0
- package/src/schema/ids.ts +43 -0
- package/src/schema/index.ts +5 -0
- package/src/schema/messages.ts +404 -0
- package/src/schema/options.ts +221 -0
- package/src/tool-runtime.ts +78 -0
- package/src/tool.ts +241 -0
- package/src/utils/record.ts +3 -0
- package/sst-env.d.ts +10 -0
- package/test/adapter.test.ts +164 -0
- package/test/auth-options.types.ts +168 -0
- package/test/auth.test.ts +103 -0
- package/test/cache-policy.test.ts +262 -0
- package/test/continuation-scenarios.ts +104 -0
- package/test/endpoint.test.ts +58 -0
- package/test/executor.test.ts +418 -0
- package/test/exports.test.ts +62 -0
- package/test/fixtures/media/restroom.png +0 -0
- package/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json +29 -0
- package/test/fixtures/recordings/anthropic-messages/anthropic-opus-4-7-image-tool-result.json +43 -0
- package/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json +56 -0
- package/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json +29 -0
- package/test/fixtures/recordings/anthropic-messages/streams-text.json +29 -0
- package/test/fixtures/recordings/anthropic-messages/streams-tool-call.json +29 -0
- package/test/fixtures/recordings/anthropic-messages-cache/writes-then-reads-cache-control-on-identical-second-call.json +48 -0
- package/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json +55 -0
- package/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json +29 -0
- package/test/fixtures/recordings/bedrock-converse/streams-text.json +29 -0
- package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
- package/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json +32 -0
- package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json +32 -0
- package/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json +32 -0
- package/test/fixtures/recordings/gemini/gemini-2-5-flash-image.json +32 -0
- package/test/fixtures/recordings/gemini/streams-text.json +28 -0
- package/test/fixtures/recordings/gemini/streams-tool-call.json +28 -0
- package/test/fixtures/recordings/gemini-cache/reports-cachedcontenttokencount-on-identical-second-call.json +46 -0
- package/test/fixtures/recordings/openai-chat/continues-after-tool-result.json +28 -0
- package/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json +46 -0
- package/test/fixtures/recordings/openai-chat/streams-text.json +28 -0
- package/test/fixtures/recordings/openai-chat/streams-tool-call.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json +53 -0
- package/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json +54 -0
- package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json +53 -0
- package/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json +54 -0
- package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json +28 -0
- package/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json +28 -0
- package/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json +54 -0
- package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json +28 -0
- package/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json +28 -0
- package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-image-tool-result.json +42 -0
- package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +58 -0
- package/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning.json +32 -0
- package/test/fixtures/recordings/openai-responses-cache/reports-cached-tokens-on-identical-second-call.json +46 -0
- package/test/generate-object.test.ts +184 -0
- package/test/lib/effect.ts +50 -0
- package/test/lib/http.ts +98 -0
- package/test/lib/openai-chunks.ts +27 -0
- package/test/lib/sse.ts +17 -0
- package/test/lib/tool-runtime.ts +146 -0
- package/test/llm.test.ts +167 -0
- package/test/provider/anthropic-messages-cache.recorded.test.ts +54 -0
- package/test/provider/anthropic-messages.recorded.test.ts +46 -0
- package/test/provider/anthropic-messages.test.ts +829 -0
- package/test/provider/bedrock-converse-cache.recorded.test.ts +54 -0
- package/test/provider/bedrock-converse.test.ts +707 -0
- package/test/provider/cloudflare.test.ts +230 -0
- package/test/provider/gemini-cache.recorded.test.ts +48 -0
- package/test/provider/gemini.test.ts +476 -0
- package/test/provider/golden.recorded.test.ts +219 -0
- package/test/provider/openai-chat.test.ts +446 -0
- package/test/provider/openai-compatible-chat.test.ts +238 -0
- package/test/provider/openai-responses-cache.recorded.test.ts +46 -0
- package/test/provider/openai-responses.test.ts +1322 -0
- package/test/provider/openrouter.test.ts +56 -0
- package/test/provider.types.ts +41 -0
- package/test/recorded-golden.ts +97 -0
- package/test/recorded-runner.ts +100 -0
- package/test/recorded-scenarios.ts +531 -0
- package/test/recorded-test.ts +74 -0
- package/test/recorded-utils.ts +56 -0
- package/test/recorded-websocket.ts +26 -0
- package/test/route.test.ts +43 -0
- package/test/schema.test.ts +97 -0
- package/test/tool-runtime.test.ts +802 -0
- package/test/tool-stream.test.ts +99 -0
- package/test/tool.types.ts +40 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect"
|
|
2
|
+
import { LLM, LLMClient, Message, ProviderID, Tool, ToolRuntime } from "@codilore/llm"
|
|
3
|
+
import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor, WebSocketExecutor } from "@codilore/llm/route"
|
|
4
|
+
import { OpenAI } from "@codilore/llm/providers"
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* A runnable walkthrough of the LLM package use-site API.
|
|
8
|
+
*
|
|
9
|
+
* Run from `packages/llm` with an OpenAI key in the environment:
|
|
10
|
+
*
|
|
11
|
+
* OPENAI_API_KEY=... bun example/tutorial.ts
|
|
12
|
+
*
|
|
13
|
+
* The file is intentionally written as a normal TypeScript program. You can
|
|
14
|
+
* hover imports and local values to see how the public API is typed.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const apiKey = Config.redacted("OPENAI_API_KEY")
|
|
18
|
+
|
|
19
|
+
// 1. Pick a model. The provider helper records provider identity, protocol
|
|
20
|
+
// choice, capabilities, deployment options, authentication, and defaults.
|
|
21
|
+
const model = OpenAI.configure({
|
|
22
|
+
apiKey,
|
|
23
|
+
generation: { maxTokens: 160 },
|
|
24
|
+
providerOptions: {
|
|
25
|
+
openai: { store: false },
|
|
26
|
+
},
|
|
27
|
+
}).model("gpt-4o-mini")
|
|
28
|
+
|
|
29
|
+
// 2. Build a provider-neutral request. This is useful when reusing one request
|
|
30
|
+
// across generate and stream examples.
|
|
31
|
+
//
|
|
32
|
+
// Options can live on both the configured route/provider facade and the request:
|
|
33
|
+
//
|
|
34
|
+
// - `generation`: common controls such as max tokens, temperature, topP/topK,
|
|
35
|
+
// penalties, seed, and stop sequences.
|
|
36
|
+
// - `providerOptions`: namespaced provider-native behavior. For example,
|
|
37
|
+
// OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking
|
|
38
|
+
// config, or OpenRouter routing/reasoning.
|
|
39
|
+
// - `http`: last-resort serializable overlays for final request body, headers,
|
|
40
|
+
// and query params. Prefer typed `providerOptions` when a field is stable.
|
|
41
|
+
//
|
|
42
|
+
// Route/provider options are defaults. Request options override them for this call.
|
|
43
|
+
const request = LLM.request({
|
|
44
|
+
model,
|
|
45
|
+
system: "You are concise and practical.",
|
|
46
|
+
prompt: "Tell me a joke",
|
|
47
|
+
generation: { maxTokens: 80, temperature: 0.7 },
|
|
48
|
+
providerOptions: {
|
|
49
|
+
openai: { promptCacheKey: "tutorial-joke" },
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
// `http` is intentionally not needed for normal calls. This shows the shape for
|
|
54
|
+
// newly released provider fields before they deserve a typed provider option.
|
|
55
|
+
const rawOverlayExample = LLM.request({
|
|
56
|
+
model,
|
|
57
|
+
prompt: "Show the final HTTP overlay shape.",
|
|
58
|
+
http: {
|
|
59
|
+
body: { metadata: { example: "tutorial" } },
|
|
60
|
+
headers: { "x-Codilore-tutorial": "1" },
|
|
61
|
+
query: { debug: "1" },
|
|
62
|
+
},
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
// 3. `generate` sends the request and collects the event stream into one
|
|
66
|
+
// response object. `response.text` is the collected text output.
|
|
67
|
+
const generateOnce = Effect.gen(function* () {
|
|
68
|
+
const response = yield* LLM.generate(request)
|
|
69
|
+
|
|
70
|
+
console.log("\n== generate ==")
|
|
71
|
+
console.log("generated text:", response.text)
|
|
72
|
+
console.log("usage", Formatter.formatJson(response.usage, { space: 2 }))
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
// 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want
|
|
76
|
+
// incremental text, reasoning, tool input, usage, or finish events.
|
|
77
|
+
const streamText = LLM.stream(request).pipe(
|
|
78
|
+
Stream.tap((event) =>
|
|
79
|
+
Effect.sync(() => {
|
|
80
|
+
if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`)
|
|
81
|
+
if (event.type === "finish") process.stdout.write(`\nfinish: ${event.reason}\n`)
|
|
82
|
+
}),
|
|
83
|
+
),
|
|
84
|
+
Stream.runDrain,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
// 5. Tools are typed with Effect Schema. Provider turns remain explicit:
|
|
88
|
+
// advertise definitions on the request, stream one turn, dispatch local calls,
|
|
89
|
+
// then persist/build follow-up history in the enclosing product flow.
|
|
90
|
+
const tools = {
|
|
91
|
+
get_weather: Tool.make({
|
|
92
|
+
description: "Get current weather for a city.",
|
|
93
|
+
parameters: Schema.Struct({ city: Schema.String }),
|
|
94
|
+
success: Schema.Struct({ forecast: Schema.String }),
|
|
95
|
+
execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }),
|
|
96
|
+
}),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const streamWithTools = Effect.gen(function* () {
|
|
100
|
+
const request = LLM.request({
|
|
101
|
+
model,
|
|
102
|
+
prompt: "Use get_weather for San Francisco, then answer in one sentence.",
|
|
103
|
+
generation: { maxTokens: 80, temperature: 0 },
|
|
104
|
+
tools: Tool.toDefinitions(tools),
|
|
105
|
+
})
|
|
106
|
+
const events = Array.from(yield* LLM.stream(request).pipe(Stream.runCollect))
|
|
107
|
+
for (const event of events) {
|
|
108
|
+
if (event.type === "tool-call") console.log("tool call", event.name, event.input)
|
|
109
|
+
if (event.type === "text-delta") process.stdout.write(event.text)
|
|
110
|
+
if (event.type !== "tool-call" || event.providerExecuted) continue
|
|
111
|
+
const dispatched = yield* ToolRuntime.dispatch(tools, event)
|
|
112
|
+
console.log("tool result", event.name, dispatched.result)
|
|
113
|
+
|
|
114
|
+
// A durable agent would persist these messages before starting another
|
|
115
|
+
// raw model turn. This tutorial keeps the boundary visible instead.
|
|
116
|
+
const followUp = LLM.updateRequest(request, {
|
|
117
|
+
messages: [
|
|
118
|
+
...request.messages,
|
|
119
|
+
Message.assistant([event]),
|
|
120
|
+
Message.tool({ ...event, result: dispatched.result }),
|
|
121
|
+
],
|
|
122
|
+
})
|
|
123
|
+
console.log("follow-up history messages:", followUp.messages.length)
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
// 6. `generateObject` is the structured-output helper. It forces a synthetic
|
|
128
|
+
// tool call internally, so the same call site works across providers instead of
|
|
129
|
+
// depending on provider-specific JSON mode flags.
|
|
130
|
+
const WeatherReport = Schema.Struct({
|
|
131
|
+
city: Schema.String,
|
|
132
|
+
forecast: Schema.String,
|
|
133
|
+
highFahrenheit: Schema.Number,
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
const generateStructuredObject = Effect.gen(function* () {
|
|
137
|
+
const response = yield* LLM.generateObject({
|
|
138
|
+
model,
|
|
139
|
+
system: "Return only structured weather data.",
|
|
140
|
+
prompt: "Give me today's weather for San Francisco.",
|
|
141
|
+
schema: WeatherReport,
|
|
142
|
+
generation: { maxTokens: 120, temperature: 0 },
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
console.log("\n== generateObject ==")
|
|
146
|
+
console.log(Formatter.formatJson(response.object, { space: 2 }))
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
// If the shape is only known at runtime, pass raw JSON Schema instead. The
|
|
150
|
+
// `.object` type is `unknown`; callers that need static types should validate it.
|
|
151
|
+
const generateDynamicObject = LLM.generateObject({
|
|
152
|
+
model,
|
|
153
|
+
prompt: "Extract the city and forecast from: San Francisco is sunny.",
|
|
154
|
+
jsonSchema: {
|
|
155
|
+
type: "object",
|
|
156
|
+
properties: {
|
|
157
|
+
city: { type: "string" },
|
|
158
|
+
forecast: { type: "string" },
|
|
159
|
+
},
|
|
160
|
+
required: ["city", "forecast"],
|
|
161
|
+
},
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
// -----------------------------------------------------------------------------
|
|
165
|
+
// Part 2: provider composition with a fake provider
|
|
166
|
+
// -----------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
// A protocol is the provider-native API shape: common request -> body, response
|
|
169
|
+
// frames -> common events. This fake one turns text prompts into a JSON body
|
|
170
|
+
// and treats every SSE frame as output text.
|
|
171
|
+
const FakeBody = Schema.Struct({
|
|
172
|
+
model: Schema.String,
|
|
173
|
+
input: Schema.String,
|
|
174
|
+
})
|
|
175
|
+
type FakeBody = Schema.Schema.Type<typeof FakeBody>
|
|
176
|
+
|
|
177
|
+
const FakeProtocol = Protocol.make<FakeBody, string, string, void>({
|
|
178
|
+
// Protocol ids are open strings, so external packages can define their own
|
|
179
|
+
// protocols without changing this package.
|
|
180
|
+
id: "fake-echo",
|
|
181
|
+
body: {
|
|
182
|
+
schema: FakeBody,
|
|
183
|
+
from: (request) =>
|
|
184
|
+
Effect.succeed({
|
|
185
|
+
model: request.model.id,
|
|
186
|
+
input: request.messages
|
|
187
|
+
.flatMap((message) => message.content)
|
|
188
|
+
.filter((part) => part.type === "text")
|
|
189
|
+
.map((part) => part.text)
|
|
190
|
+
.join("\n"),
|
|
191
|
+
}),
|
|
192
|
+
},
|
|
193
|
+
stream: {
|
|
194
|
+
event: Schema.String,
|
|
195
|
+
initial: () => undefined,
|
|
196
|
+
step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", id: "text-0", text: frame }]] as const),
|
|
197
|
+
onHalt: () => [{ type: "finish", reason: "stop" }],
|
|
198
|
+
},
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
// An route is the runnable binding for that protocol. It adds the deployment
|
|
202
|
+
// axes that the protocol deliberately does not know: URL, auth, and framing.
|
|
203
|
+
const FakeAdapter = Route.make({
|
|
204
|
+
id: "fake-echo",
|
|
205
|
+
provider: "fake-echo",
|
|
206
|
+
protocol: FakeProtocol,
|
|
207
|
+
endpoint: Endpoint.path("/v1/echo", { baseURL: "https://fake.local" }),
|
|
208
|
+
auth: Auth.passthrough,
|
|
209
|
+
framing: Framing.sse,
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
// A provider module exports a configured facade. Configuration happens before
|
|
213
|
+
// model selection; model selectors accept ids only.
|
|
214
|
+
const FakeEcho = {
|
|
215
|
+
id: ProviderID.make("fake-echo"),
|
|
216
|
+
configure: () => ({
|
|
217
|
+
id: ProviderID.make("fake-echo"),
|
|
218
|
+
model: (id: string) => FakeAdapter.model({ id }),
|
|
219
|
+
}),
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// `LLMClient.prepare` is the lower-level inspection hook: it compiles through
|
|
223
|
+
// body conversion, validation, endpoint, auth, and HTTP construction without
|
|
224
|
+
// sending anything over the network.
|
|
225
|
+
const inspectFakeProvider = Effect.gen(function* () {
|
|
226
|
+
const prepared = yield* LLMClient.prepare(
|
|
227
|
+
LLM.request({
|
|
228
|
+
model: FakeEcho.configure().model("tiny-echo"),
|
|
229
|
+
prompt: "Show me the provider pipeline.",
|
|
230
|
+
}),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
console.log("\n== fake provider prepare ==")
|
|
234
|
+
console.log("route:", prepared.route)
|
|
235
|
+
console.log("body:", Formatter.formatJson(prepared.body, { space: 2 }))
|
|
236
|
+
})
|
|
237
|
+
|
|
238
|
+
// Provide the LLM runtime and the HTTP request executor once. Keep one path
|
|
239
|
+
// enabled at a time so the tutorial can demonstrate generate, prepare, stream,
|
|
240
|
+
// or tool-loop behavior without spending tokens on every example.
|
|
241
|
+
const requestExecutorLayer = RequestExecutor.defaultLayer
|
|
242
|
+
const llmDeps = Layer.mergeAll(requestExecutorLayer, WebSocketExecutor.layer)
|
|
243
|
+
const llmClientLayer = LLMClient.layer.pipe(Layer.provide(llmDeps))
|
|
244
|
+
|
|
245
|
+
const program = Effect.gen(function* () {
|
|
246
|
+
// yield* generateOnce
|
|
247
|
+
// yield* inspectFakeProvider
|
|
248
|
+
// yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body))))
|
|
249
|
+
// yield* streamText
|
|
250
|
+
// yield* generateStructuredObject
|
|
251
|
+
// yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object))))
|
|
252
|
+
yield* streamWithTools
|
|
253
|
+
}).pipe(Effect.provide(Layer.mergeAll(llmDeps, llmClientLayer)))
|
|
254
|
+
|
|
255
|
+
Effect.runPromise(program)
|
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/package.json",
|
|
3
|
+
"version": "1.15.13",
|
|
4
|
+
"name": "@codilore/llm",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"setup:recording-env": "bun run script/setup-recording-env.ts",
|
|
9
|
+
"test": "bun test --timeout 30000",
|
|
10
|
+
"typecheck": "tsgo --noEmit"
|
|
11
|
+
},
|
|
12
|
+
"exports": {
|
|
13
|
+
".": "./src/index.ts",
|
|
14
|
+
"./route": "./src/route/index.ts",
|
|
15
|
+
"./provider": "./src/provider.ts",
|
|
16
|
+
"./providers": "./src/providers/index.ts",
|
|
17
|
+
"./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts",
|
|
18
|
+
"./providers/anthropic": "./src/providers/anthropic.ts",
|
|
19
|
+
"./providers/azure": "./src/providers/azure.ts",
|
|
20
|
+
"./providers/cloudflare": "./src/providers/cloudflare.ts",
|
|
21
|
+
"./providers/github-copilot": "./src/providers/github-copilot.ts",
|
|
22
|
+
"./providers/google": "./src/providers/google.ts",
|
|
23
|
+
"./providers/openai": "./src/providers/openai.ts",
|
|
24
|
+
"./providers/openai-compatible": "./src/providers/openai-compatible.ts",
|
|
25
|
+
"./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts",
|
|
26
|
+
"./providers/openrouter": "./src/providers/openrouter.ts",
|
|
27
|
+
"./providers/xai": "./src/providers/xai.ts",
|
|
28
|
+
"./protocols": "./src/protocols/index.ts",
|
|
29
|
+
"./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts",
|
|
30
|
+
"./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts",
|
|
31
|
+
"./protocols/gemini": "./src/protocols/gemini.ts",
|
|
32
|
+
"./protocols/openai-chat": "./src/protocols/openai-chat.ts",
|
|
33
|
+
"./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts",
|
|
34
|
+
"./protocols/openai-responses": "./src/protocols/openai-responses.ts"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@clack/prompts": "1.0.0-alpha.1",
|
|
38
|
+
"@effect/platform-node": "^1.15.13",
|
|
39
|
+
"@codilore/http-recorder": "1.15.13",
|
|
40
|
+
"@tsconfig/bun": "^1.15.13",
|
|
41
|
+
"@types/bun": "^1.15.13",
|
|
42
|
+
"@typescript/native-preview": "^1.15.13"
|
|
43
|
+
},
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"@smithy/eventstream-codec": "4.2.14",
|
|
46
|
+
"@smithy/util-utf8": "4.2.2",
|
|
47
|
+
"aws4fetch": "1.0.20",
|
|
48
|
+
"effect": "^1.15.13"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import * as fs from "node:fs/promises"
|
|
2
|
+
import * as path from "node:path"
|
|
3
|
+
|
|
4
|
+
const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings")
|
|
5
|
+
const MODELS_DEV_URL = "https://models.dev/api.json"
|
|
6
|
+
|
|
7
|
+
type JsonRecord = Record<string, unknown>
|
|
8
|
+
|
|
9
|
+
type Pricing = {
|
|
10
|
+
readonly input?: number
|
|
11
|
+
readonly output?: number
|
|
12
|
+
readonly cache_read?: number
|
|
13
|
+
readonly cache_write?: number
|
|
14
|
+
readonly reasoning?: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
type Usage = {
|
|
18
|
+
readonly inputTokens: number
|
|
19
|
+
readonly outputTokens: number
|
|
20
|
+
readonly cacheReadTokens: number
|
|
21
|
+
readonly cacheWriteTokens: number
|
|
22
|
+
readonly reasoningTokens: number
|
|
23
|
+
readonly reportedCost: number
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
type Row = Usage & {
|
|
27
|
+
readonly cassette: string
|
|
28
|
+
readonly provider: string
|
|
29
|
+
readonly model: string
|
|
30
|
+
readonly estimatedCost: number
|
|
31
|
+
readonly pricingSource: string
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const isRecord = (value: unknown): value is JsonRecord =>
|
|
35
|
+
value !== null && typeof value === "object" && !Array.isArray(value)
|
|
36
|
+
|
|
37
|
+
const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0)
|
|
38
|
+
|
|
39
|
+
const asString = (value: unknown) => (typeof value === "string" ? value : undefined)
|
|
40
|
+
|
|
41
|
+
const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown
|
|
42
|
+
|
|
43
|
+
const walk = async (dir: string): Promise<ReadonlyArray<string>> =>
|
|
44
|
+
(await fs.readdir(dir, { withFileTypes: true }))
|
|
45
|
+
.flatMap((entry) => {
|
|
46
|
+
const file = path.join(dir, entry.name)
|
|
47
|
+
return entry.isDirectory() ? [] : [file]
|
|
48
|
+
})
|
|
49
|
+
.concat(
|
|
50
|
+
...(await Promise.all(
|
|
51
|
+
(await fs.readdir(dir, { withFileTypes: true }))
|
|
52
|
+
.filter((entry) => entry.isDirectory())
|
|
53
|
+
.map((entry) => walk(path.join(dir, entry.name))),
|
|
54
|
+
)),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
const providerFromUrl = (url: string) => {
|
|
58
|
+
if (url.includes("api.openai.com")) return "openai"
|
|
59
|
+
if (url.includes("api.anthropic.com")) return "anthropic"
|
|
60
|
+
if (url.includes("generativelanguage.googleapis.com")) return "google"
|
|
61
|
+
if (url.includes("bedrock")) return "amazon-bedrock"
|
|
62
|
+
if (url.includes("openrouter.ai")) return "openrouter"
|
|
63
|
+
if (url.includes("api.x.ai")) return "xai"
|
|
64
|
+
if (url.includes("api.groq.com")) return "groq"
|
|
65
|
+
if (url.includes("api.deepseek.com")) return "deepseek"
|
|
66
|
+
if (url.includes("api.together.xyz")) return "togetherai"
|
|
67
|
+
return "unknown"
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const providerAliases: Record<string, ReadonlyArray<string>> = {
|
|
71
|
+
openai: ["openai"],
|
|
72
|
+
anthropic: ["anthropic"],
|
|
73
|
+
google: ["google"],
|
|
74
|
+
"amazon-bedrock": ["amazon-bedrock"],
|
|
75
|
+
openrouter: ["openrouter", "openai", "anthropic", "google"],
|
|
76
|
+
xai: ["xai"],
|
|
77
|
+
groq: ["groq"],
|
|
78
|
+
deepseek: ["deepseek"],
|
|
79
|
+
togetherai: ["togetherai"],
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const modelAliases = (model: string) => [
|
|
83
|
+
model,
|
|
84
|
+
model.replace(/^models\//, ""),
|
|
85
|
+
model.replace(/-\d{8}$/, ""),
|
|
86
|
+
model.replace(/-\d{4}-\d{2}-\d{2}$/, ""),
|
|
87
|
+
model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""),
|
|
88
|
+
model.replace(/^openai\//, ""),
|
|
89
|
+
model.replace(/^anthropic\//, ""),
|
|
90
|
+
model.replace(/^google\//, ""),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
const pricingFor = (models: JsonRecord, provider: string, model: string) => {
|
|
94
|
+
for (const providerID of providerAliases[provider] ?? [provider]) {
|
|
95
|
+
const providerEntry = models[providerID]
|
|
96
|
+
if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue
|
|
97
|
+
for (const modelID of modelAliases(model)) {
|
|
98
|
+
const modelEntry = providerEntry.models[modelID]
|
|
99
|
+
if (isRecord(modelEntry) && isRecord(modelEntry.cost))
|
|
100
|
+
return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` }
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return { pricing: undefined, source: "missing" }
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const estimateCost = (usage: Usage, pricing: Pricing | undefined) => {
|
|
107
|
+
if (!pricing) return 0
|
|
108
|
+
return (
|
|
109
|
+
(usage.inputTokens * (pricing.input ?? 0) +
|
|
110
|
+
usage.outputTokens * (pricing.output ?? 0) +
|
|
111
|
+
usage.cacheReadTokens * (pricing.cache_read ?? 0) +
|
|
112
|
+
usage.cacheWriteTokens * (pricing.cache_write ?? 0) +
|
|
113
|
+
usage.reasoningTokens * (pricing.reasoning ?? 0)) /
|
|
114
|
+
1_000_000
|
|
115
|
+
)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const emptyUsage = (): Usage => ({
|
|
119
|
+
inputTokens: 0,
|
|
120
|
+
outputTokens: 0,
|
|
121
|
+
cacheReadTokens: 0,
|
|
122
|
+
cacheWriteTokens: 0,
|
|
123
|
+
reasoningTokens: 0,
|
|
124
|
+
reportedCost: 0,
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
const addUsage = (a: Usage, b: Usage): Usage => ({
|
|
128
|
+
inputTokens: a.inputTokens + b.inputTokens,
|
|
129
|
+
outputTokens: a.outputTokens + b.outputTokens,
|
|
130
|
+
cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
|
|
131
|
+
cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens,
|
|
132
|
+
reasoningTokens: a.reasoningTokens + b.reasoningTokens,
|
|
133
|
+
reportedCost: a.reportedCost + b.reportedCost,
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
const usageFromObject = (usage: unknown): Usage => {
|
|
137
|
+
if (!isRecord(usage)) return emptyUsage()
|
|
138
|
+
const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {}
|
|
139
|
+
const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {}
|
|
140
|
+
const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {}
|
|
141
|
+
const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {}
|
|
142
|
+
const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens)
|
|
143
|
+
return {
|
|
144
|
+
inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens),
|
|
145
|
+
outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens),
|
|
146
|
+
cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens),
|
|
147
|
+
cacheWriteTokens,
|
|
148
|
+
reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens),
|
|
149
|
+
reportedCost: asNumber(usage.cost),
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const jsonPayloads = (body: string) =>
|
|
154
|
+
body
|
|
155
|
+
.split("\n")
|
|
156
|
+
.map((line) => line.trim())
|
|
157
|
+
.filter((line) => line.startsWith("data:"))
|
|
158
|
+
.map((line) => line.slice("data:".length).trim())
|
|
159
|
+
.filter((line) => line !== "" && line !== "[DONE]")
|
|
160
|
+
.flatMap((line) => {
|
|
161
|
+
try {
|
|
162
|
+
return [JSON.parse(line) as unknown]
|
|
163
|
+
} catch {
|
|
164
|
+
return []
|
|
165
|
+
}
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
const usageFromResponseBody = (body: string) =>
|
|
169
|
+
jsonPayloads(body).reduce<Usage>((usage, payload) => {
|
|
170
|
+
if (!isRecord(payload)) return usage
|
|
171
|
+
return addUsage(
|
|
172
|
+
usage,
|
|
173
|
+
addUsage(
|
|
174
|
+
usageFromObject(payload.usage),
|
|
175
|
+
usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined),
|
|
176
|
+
),
|
|
177
|
+
)
|
|
178
|
+
}, emptyUsage())
|
|
179
|
+
|
|
180
|
+
const modelFromRequest = (request: unknown) => {
|
|
181
|
+
if (!isRecord(request)) return "unknown"
|
|
182
|
+
const requestBody = asString(request.body)
|
|
183
|
+
if (!requestBody) return "unknown"
|
|
184
|
+
try {
|
|
185
|
+
const body = JSON.parse(requestBody) as unknown
|
|
186
|
+
if (!isRecord(body)) return "unknown"
|
|
187
|
+
return asString(body.model) ?? "unknown"
|
|
188
|
+
} catch {
|
|
189
|
+
return "unknown"
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => {
|
|
194
|
+
if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined
|
|
195
|
+
const first = cassette.interactions.find(isRecord)
|
|
196
|
+
if (!first || !isRecord(first.request)) return undefined
|
|
197
|
+
const provider = providerFromUrl(asString(first.request.url) ?? "")
|
|
198
|
+
const model = modelFromRequest(first.request)
|
|
199
|
+
const usage = cassette.interactions.filter(isRecord).reduce<Usage>((total, interaction) => {
|
|
200
|
+
if (!isRecord(interaction.response)) return total
|
|
201
|
+
const responseBody = asString(interaction.response.body)
|
|
202
|
+
if (!responseBody) return total
|
|
203
|
+
return addUsage(total, usageFromResponseBody(responseBody))
|
|
204
|
+
}, emptyUsage())
|
|
205
|
+
const priced = pricingFor(models, provider, model)
|
|
206
|
+
return {
|
|
207
|
+
cassette: path.relative(RECORDINGS_DIR, file),
|
|
208
|
+
provider,
|
|
209
|
+
model,
|
|
210
|
+
...usage,
|
|
211
|
+
estimatedCost: estimateCost(usage, priced.pricing),
|
|
212
|
+
pricingSource: priced.source,
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`)
|
|
217
|
+
const tokens = (value: number) => value.toLocaleString("en-US")
|
|
218
|
+
|
|
219
|
+
const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord
|
|
220
|
+
const rows = (
|
|
221
|
+
await Promise.all(
|
|
222
|
+
(await walk(RECORDINGS_DIR))
|
|
223
|
+
.filter((file) => file.endsWith(".json"))
|
|
224
|
+
.map(async (file) => rowFor(models, file, await readJson(file))),
|
|
225
|
+
)
|
|
226
|
+
).filter((row): row is Row => row !== undefined)
|
|
227
|
+
|
|
228
|
+
const totals = rows.reduce(
|
|
229
|
+
(total, row) => ({
|
|
230
|
+
...addUsage(total, row),
|
|
231
|
+
estimatedCost: total.estimatedCost + row.estimatedCost,
|
|
232
|
+
}),
|
|
233
|
+
{ ...emptyUsage(), estimatedCost: 0 },
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
console.log("# Recording Cost Report")
|
|
237
|
+
console.log("")
|
|
238
|
+
console.log(`Pricing: ${MODELS_DEV_URL}`)
|
|
239
|
+
console.log(`Cassettes: ${rows.length}`)
|
|
240
|
+
console.log(`Reported cost: ${money(totals.reportedCost)}`)
|
|
241
|
+
console.log(`Estimated cost: ${money(totals.estimatedCost)}`)
|
|
242
|
+
console.log("")
|
|
243
|
+
console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |")
|
|
244
|
+
console.log("|---|---:|---:|---:|---:|---:|---:|---|---|")
|
|
245
|
+
for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) {
|
|
246
|
+
if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue
|
|
247
|
+
console.log(
|
|
248
|
+
`| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`,
|
|
249
|
+
)
|
|
250
|
+
}
|