@oh-my-pi/pi-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,676 @@
1
+ import OpenAI from "openai";
2
+ import type {
3
+ ChatCompletionAssistantMessageParam,
4
+ ChatCompletionChunk,
5
+ ChatCompletionContentPart,
6
+ ChatCompletionContentPartImage,
7
+ ChatCompletionContentPartText,
8
+ ChatCompletionMessageParam,
9
+ ChatCompletionToolMessageParam,
10
+ } from "openai/resources/chat/completions.js";
11
+ import { calculateCost } from "../models";
12
+ import { getEnvApiKey } from "../stream";
13
+ import type {
14
+ AssistantMessage,
15
+ Context,
16
+ Message,
17
+ Model,
18
+ OpenAICompat,
19
+ StopReason,
20
+ StreamFunction,
21
+ StreamOptions,
22
+ TextContent,
23
+ ThinkingContent,
24
+ Tool,
25
+ ToolCall,
26
+ } from "../types";
27
+ import { AssistantMessageEventStream } from "../utils/event-stream";
28
+ import { parseStreamingJson } from "../utils/json-parse";
29
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
30
+ import { transformMessages } from "./transorm-messages";
31
+
32
+ /**
33
+ * Normalize tool call ID for Mistral.
34
+ * Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
35
+ */
36
+ function normalizeMistralToolId(id: string, isMistral: boolean): string {
37
+ if (!isMistral) return id;
38
+ // Remove non-alphanumeric characters
39
+ let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
40
+ // Mistral requires exactly 9 characters
41
+ if (normalized.length < 9) {
42
+ // Pad with deterministic characters based on original ID to ensure matching
43
+ const padding = "ABCDEFGHI";
44
+ normalized = normalized + padding.slice(0, 9 - normalized.length);
45
+ } else if (normalized.length > 9) {
46
+ normalized = normalized.slice(0, 9);
47
+ }
48
+ return normalized;
49
+ }
50
+
51
+ /**
52
+ * Check if conversation messages contain tool calls or tool results.
53
+ * This is needed because Anthropic (via proxy) requires the tools param
54
+ * to be present when messages include tool_calls or tool role messages.
55
+ */
56
+ function hasToolHistory(messages: Message[]): boolean {
57
+ for (const msg of messages) {
58
+ if (msg.role === "toolResult") {
59
+ return true;
60
+ }
61
+ if (msg.role === "assistant") {
62
+ if (msg.content.some((block) => block.type === "toolCall")) {
63
+ return true;
64
+ }
65
+ }
66
+ }
67
+ return false;
68
+ }
69
+
70
+ export interface OpenAICompletionsOptions extends StreamOptions {
71
+ toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
72
+ reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
73
+ }
74
+
75
+ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
76
+ model: Model<"openai-completions">,
77
+ context: Context,
78
+ options?: OpenAICompletionsOptions,
79
+ ): AssistantMessageEventStream => {
80
+ const stream = new AssistantMessageEventStream();
81
+
82
+ (async () => {
83
+ const output: AssistantMessage = {
84
+ role: "assistant",
85
+ content: [],
86
+ api: model.api,
87
+ provider: model.provider,
88
+ model: model.id,
89
+ usage: {
90
+ input: 0,
91
+ output: 0,
92
+ cacheRead: 0,
93
+ cacheWrite: 0,
94
+ totalTokens: 0,
95
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
96
+ },
97
+ stopReason: "stop",
98
+ timestamp: Date.now(),
99
+ };
100
+
101
+ try {
102
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
103
+ const client = createClient(model, context, apiKey);
104
+ const params = buildParams(model, context, options);
105
+ const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
106
+ stream.push({ type: "start", partial: output });
107
+
108
+ let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
109
+ const blocks = output.content;
110
+ const blockIndex = () => blocks.length - 1;
111
+ const finishCurrentBlock = (block?: typeof currentBlock) => {
112
+ if (block) {
113
+ if (block.type === "text") {
114
+ stream.push({
115
+ type: "text_end",
116
+ contentIndex: blockIndex(),
117
+ content: block.text,
118
+ partial: output,
119
+ });
120
+ } else if (block.type === "thinking") {
121
+ stream.push({
122
+ type: "thinking_end",
123
+ contentIndex: blockIndex(),
124
+ content: block.thinking,
125
+ partial: output,
126
+ });
127
+ } else if (block.type === "toolCall") {
128
+ block.arguments = JSON.parse(block.partialArgs || "{}");
129
+ delete block.partialArgs;
130
+ stream.push({
131
+ type: "toolcall_end",
132
+ contentIndex: blockIndex(),
133
+ toolCall: block,
134
+ partial: output,
135
+ });
136
+ }
137
+ }
138
+ };
139
+
140
+ for await (const chunk of openaiStream) {
141
+ if (chunk.usage) {
142
+ const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
143
+ const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
144
+ const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
145
+ const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
146
+ output.usage = {
147
+ // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
148
+ input,
149
+ output: outputTokens,
150
+ cacheRead: cachedTokens,
151
+ cacheWrite: 0,
152
+ // Compute totalTokens ourselves since we add reasoning_tokens to output
153
+ // and some providers (e.g., Groq) don't include them in total_tokens
154
+ totalTokens: input + outputTokens + cachedTokens,
155
+ cost: {
156
+ input: 0,
157
+ output: 0,
158
+ cacheRead: 0,
159
+ cacheWrite: 0,
160
+ total: 0,
161
+ },
162
+ };
163
+ calculateCost(model, output.usage);
164
+ }
165
+
166
+ const choice = chunk.choices[0];
167
+ if (!choice) continue;
168
+
169
+ if (choice.finish_reason) {
170
+ output.stopReason = mapStopReason(choice.finish_reason);
171
+ }
172
+
173
+ if (choice.delta) {
174
+ if (
175
+ choice.delta.content !== null &&
176
+ choice.delta.content !== undefined &&
177
+ choice.delta.content.length > 0
178
+ ) {
179
+ if (!currentBlock || currentBlock.type !== "text") {
180
+ finishCurrentBlock(currentBlock);
181
+ currentBlock = { type: "text", text: "" };
182
+ output.content.push(currentBlock);
183
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
184
+ }
185
+
186
+ if (currentBlock.type === "text") {
187
+ currentBlock.text += choice.delta.content;
188
+ stream.push({
189
+ type: "text_delta",
190
+ contentIndex: blockIndex(),
191
+ delta: choice.delta.content,
192
+ partial: output,
193
+ });
194
+ }
195
+ }
196
+
197
+ // Some endpoints return reasoning in reasoning_content (llama.cpp),
198
+ // or reasoning (other openai compatible endpoints)
199
+ const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
200
+ for (const field of reasoningFields) {
201
+ if (
202
+ (choice.delta as any)[field] !== null &&
203
+ (choice.delta as any)[field] !== undefined &&
204
+ (choice.delta as any)[field].length > 0
205
+ ) {
206
+ if (!currentBlock || currentBlock.type !== "thinking") {
207
+ finishCurrentBlock(currentBlock);
208
+ currentBlock = {
209
+ type: "thinking",
210
+ thinking: "",
211
+ thinkingSignature: field,
212
+ };
213
+ output.content.push(currentBlock);
214
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
215
+ }
216
+
217
+ if (currentBlock.type === "thinking") {
218
+ const delta = (choice.delta as any)[field];
219
+ currentBlock.thinking += delta;
220
+ stream.push({
221
+ type: "thinking_delta",
222
+ contentIndex: blockIndex(),
223
+ delta,
224
+ partial: output,
225
+ });
226
+ }
227
+ }
228
+ }
229
+
230
+ if (choice?.delta?.tool_calls) {
231
+ for (const toolCall of choice.delta.tool_calls) {
232
+ if (
233
+ !currentBlock ||
234
+ currentBlock.type !== "toolCall" ||
235
+ (toolCall.id && currentBlock.id !== toolCall.id)
236
+ ) {
237
+ finishCurrentBlock(currentBlock);
238
+ currentBlock = {
239
+ type: "toolCall",
240
+ id: toolCall.id || "",
241
+ name: toolCall.function?.name || "",
242
+ arguments: {},
243
+ partialArgs: "",
244
+ };
245
+ output.content.push(currentBlock);
246
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
247
+ }
248
+
249
+ if (currentBlock.type === "toolCall") {
250
+ if (toolCall.id) currentBlock.id = toolCall.id;
251
+ if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
252
+ let delta = "";
253
+ if (toolCall.function?.arguments) {
254
+ delta = toolCall.function.arguments;
255
+ currentBlock.partialArgs += toolCall.function.arguments;
256
+ currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
257
+ }
258
+ stream.push({
259
+ type: "toolcall_delta",
260
+ contentIndex: blockIndex(),
261
+ delta,
262
+ partial: output,
263
+ });
264
+ }
265
+ }
266
+ }
267
+
268
+ const reasoningDetails = (choice.delta as any).reasoning_details;
269
+ if (reasoningDetails && Array.isArray(reasoningDetails)) {
270
+ for (const detail of reasoningDetails) {
271
+ if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
272
+ const matchingToolCall = output.content.find(
273
+ (b) => b.type === "toolCall" && b.id === detail.id,
274
+ ) as ToolCall | undefined;
275
+ if (matchingToolCall) {
276
+ matchingToolCall.thoughtSignature = JSON.stringify(detail);
277
+ }
278
+ }
279
+ }
280
+ }
281
+ }
282
+ }
283
+
284
+ finishCurrentBlock(currentBlock);
285
+
286
+ if (options?.signal?.aborted) {
287
+ throw new Error("Request was aborted");
288
+ }
289
+
290
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
291
+ throw new Error("An unkown error ocurred");
292
+ }
293
+
294
+ stream.push({ type: "done", reason: output.stopReason, message: output });
295
+ stream.end();
296
+ } catch (error) {
297
+ for (const block of output.content) delete (block as any).index;
298
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
299
+ output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
300
+ stream.push({ type: "error", reason: output.stopReason, error: output });
301
+ stream.end();
302
+ }
303
+ })();
304
+
305
+ return stream;
306
+ };
307
+
308
+ function createClient(model: Model<"openai-completions">, context: Context, apiKey?: string) {
309
+ if (!apiKey) {
310
+ if (!process.env.OPENAI_API_KEY) {
311
+ throw new Error(
312
+ "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
313
+ );
314
+ }
315
+ apiKey = process.env.OPENAI_API_KEY;
316
+ }
317
+
318
+ const headers = { ...model.headers };
319
+ if (model.provider === "github-copilot") {
320
+ // Copilot expects X-Initiator to indicate whether the request is user-initiated
321
+ // or agent-initiated (e.g. follow-up after assistant/tool messages). If there is
322
+ // no prior message, default to user-initiated.
323
+ const messages = context.messages || [];
324
+ const lastMessage = messages[messages.length - 1];
325
+ const isAgentCall = lastMessage ? lastMessage.role !== "user" : false;
326
+ headers["X-Initiator"] = isAgentCall ? "agent" : "user";
327
+ headers["Openai-Intent"] = "conversation-edits";
328
+
329
+ // Copilot requires this header when sending images
330
+ const hasImages = messages.some((msg) => {
331
+ if (msg.role === "user" && Array.isArray(msg.content)) {
332
+ return msg.content.some((c) => c.type === "image");
333
+ }
334
+ if (msg.role === "toolResult" && Array.isArray(msg.content)) {
335
+ return msg.content.some((c) => c.type === "image");
336
+ }
337
+ return false;
338
+ });
339
+ if (hasImages) {
340
+ headers["Copilot-Vision-Request"] = "true";
341
+ }
342
+ }
343
+
344
+ return new OpenAI({
345
+ apiKey,
346
+ baseURL: model.baseUrl,
347
+ dangerouslyAllowBrowser: true,
348
+ defaultHeaders: headers,
349
+ });
350
+ }
351
+
352
+ function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
353
+ const compat = getCompat(model);
354
+ const messages = convertMessages(model, context, compat);
355
+
356
+ const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
357
+ model: model.id,
358
+ messages,
359
+ stream: true,
360
+ stream_options: { include_usage: true },
361
+ };
362
+
363
+ if (compat.supportsStore) {
364
+ params.store = false;
365
+ }
366
+
367
+ if (options?.maxTokens) {
368
+ if (compat.maxTokensField === "max_tokens") {
369
+ (params as any).max_tokens = options.maxTokens;
370
+ } else {
371
+ params.max_completion_tokens = options.maxTokens;
372
+ }
373
+ }
374
+
375
+ if (options?.temperature !== undefined) {
376
+ params.temperature = options.temperature;
377
+ }
378
+
379
+ if (context.tools) {
380
+ params.tools = convertTools(context.tools);
381
+ } else if (hasToolHistory(context.messages)) {
382
+ // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
383
+ params.tools = [];
384
+ }
385
+
386
+ if (options?.toolChoice) {
387
+ params.tool_choice = options.toolChoice;
388
+ }
389
+
390
+ if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
391
+ params.reasoning_effort = options.reasoningEffort;
392
+ }
393
+
394
+ return params;
395
+ }
396
+
397
+ function convertMessages(
398
+ model: Model<"openai-completions">,
399
+ context: Context,
400
+ compat: Required<OpenAICompat>,
401
+ ): ChatCompletionMessageParam[] {
402
+ const params: ChatCompletionMessageParam[] = [];
403
+
404
+ const transformedMessages = transformMessages(context.messages, model);
405
+
406
+ if (context.systemPrompt) {
407
+ const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
408
+ const role = useDeveloperRole ? "developer" : "system";
409
+ params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
410
+ }
411
+
412
+ let lastRole: string | null = null;
413
+
414
+ for (const msg of transformedMessages) {
415
+ // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
416
+ // Insert a synthetic assistant message to bridge the gap
417
+ if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
418
+ params.push({
419
+ role: "assistant",
420
+ content: "I have processed the tool results.",
421
+ });
422
+ }
423
+
424
+ if (msg.role === "user") {
425
+ if (typeof msg.content === "string") {
426
+ params.push({
427
+ role: "user",
428
+ content: sanitizeSurrogates(msg.content),
429
+ });
430
+ } else {
431
+ const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
432
+ if (item.type === "text") {
433
+ return {
434
+ type: "text",
435
+ text: sanitizeSurrogates(item.text),
436
+ } satisfies ChatCompletionContentPartText;
437
+ } else {
438
+ return {
439
+ type: "image_url",
440
+ image_url: {
441
+ url: `data:${item.mimeType};base64,${item.data}`,
442
+ },
443
+ } satisfies ChatCompletionContentPartImage;
444
+ }
445
+ });
446
+ const filteredContent = !model.input.includes("image")
447
+ ? content.filter((c) => c.type !== "image_url")
448
+ : content;
449
+ if (filteredContent.length === 0) continue;
450
+ params.push({
451
+ role: "user",
452
+ content: filteredContent,
453
+ });
454
+ }
455
+ } else if (msg.role === "assistant") {
456
+ // Some providers (e.g. Mistral) don't accept null content, use empty string instead
457
+ const assistantMsg: ChatCompletionAssistantMessageParam = {
458
+ role: "assistant",
459
+ content: compat.requiresAssistantAfterToolResult ? "" : null,
460
+ };
461
+
462
+ const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
463
+ // Filter out empty text blocks to avoid API validation errors
464
+ const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
465
+ if (nonEmptyTextBlocks.length > 0) {
466
+ // GitHub Copilot requires assistant content as a string, not an array.
467
+ // Sending as array causes Claude models to re-answer all previous prompts.
468
+ if (model.provider === "github-copilot") {
469
+ assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
470
+ } else {
471
+ assistantMsg.content = nonEmptyTextBlocks.map((b) => {
472
+ return { type: "text", text: sanitizeSurrogates(b.text) };
473
+ });
474
+ }
475
+ }
476
+
477
+ // Handle thinking blocks
478
+ const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
479
+ // Filter out empty thinking blocks to avoid API validation errors
480
+ const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
481
+ if (nonEmptyThinkingBlocks.length > 0) {
482
+ if (compat.requiresThinkingAsText) {
483
+ // Convert thinking blocks to text with <thinking> delimiters
484
+ const thinkingText = nonEmptyThinkingBlocks
485
+ .map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
486
+ .join("\n");
487
+ const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
488
+ if (textContent) {
489
+ textContent.unshift({ type: "text", text: thinkingText });
490
+ } else {
491
+ assistantMsg.content = [{ type: "text", text: thinkingText }];
492
+ }
493
+ } else {
494
+ // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
495
+ const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
496
+ if (signature && signature.length > 0) {
497
+ (assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
498
+ }
499
+ }
500
+ }
501
+
502
+ const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
503
+ if (toolCalls.length > 0) {
504
+ assistantMsg.tool_calls = toolCalls.map((tc) => ({
505
+ id: normalizeMistralToolId(tc.id, compat.requiresMistralToolIds),
506
+ type: "function" as const,
507
+ function: {
508
+ name: tc.name,
509
+ arguments: JSON.stringify(tc.arguments),
510
+ },
511
+ }));
512
+ const reasoningDetails = toolCalls
513
+ .filter((tc) => tc.thoughtSignature)
514
+ .map((tc) => {
515
+ try {
516
+ return JSON.parse(tc.thoughtSignature!);
517
+ } catch {
518
+ return null;
519
+ }
520
+ })
521
+ .filter(Boolean);
522
+ if (reasoningDetails.length > 0) {
523
+ (assistantMsg as any).reasoning_details = reasoningDetails;
524
+ }
525
+ }
526
+ // Skip assistant messages that have no content and no tool calls.
527
+ // Mistral explicitly requires "either content or tool_calls, but not none".
528
+ // Other providers also don't accept empty assistant messages.
529
+ // This handles aborted assistant responses that got no content.
530
+ const content = assistantMsg.content;
531
+ const hasContent =
532
+ content !== null &&
533
+ content !== undefined &&
534
+ (typeof content === "string" ? content.length > 0 : content.length > 0);
535
+ if (!hasContent && !assistantMsg.tool_calls) {
536
+ continue;
537
+ }
538
+ params.push(assistantMsg);
539
+ } else if (msg.role === "toolResult") {
540
+ // Extract text and image content
541
+ const textResult = msg.content
542
+ .filter((c) => c.type === "text")
543
+ .map((c) => (c as any).text)
544
+ .join("\n");
545
+ const hasImages = msg.content.some((c) => c.type === "image");
546
+
547
+ // Always send tool result with text (or placeholder if only images)
548
+ const hasText = textResult.length > 0;
549
+ // Some providers (e.g. Mistral) require the 'name' field in tool results
550
+ const toolResultMsg: ChatCompletionToolMessageParam = {
551
+ role: "tool",
552
+ content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
553
+ tool_call_id: normalizeMistralToolId(msg.toolCallId, compat.requiresMistralToolIds),
554
+ };
555
+ if (compat.requiresToolResultName && msg.toolName) {
556
+ (toolResultMsg as any).name = msg.toolName;
557
+ }
558
+ params.push(toolResultMsg);
559
+
560
+ // If there are images and model supports them, send a follow-up user message with images
561
+ if (hasImages && model.input.includes("image")) {
562
+ const contentBlocks: Array<
563
+ { type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }
564
+ > = [];
565
+
566
+ // Add text prefix
567
+ contentBlocks.push({
568
+ type: "text",
569
+ text: "Attached image(s) from tool result:",
570
+ });
571
+
572
+ // Add images
573
+ for (const block of msg.content) {
574
+ if (block.type === "image") {
575
+ contentBlocks.push({
576
+ type: "image_url",
577
+ image_url: {
578
+ url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
579
+ },
580
+ });
581
+ }
582
+ }
583
+
584
+ params.push({
585
+ role: "user",
586
+ content: contentBlocks,
587
+ });
588
+ }
589
+ }
590
+
591
+ lastRole = msg.role;
592
+ }
593
+
594
+ return params;
595
+ }
596
+
597
+ function convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
598
+ return tools.map((tool) => ({
599
+ type: "function",
600
+ function: {
601
+ name: tool.name,
602
+ description: tool.description,
603
+ parameters: tool.parameters as any, // TypeBox already generates JSON Schema
604
+ },
605
+ }));
606
+ }
607
+
608
+ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): StopReason {
609
+ if (reason === null) return "stop";
610
+ switch (reason) {
611
+ case "stop":
612
+ return "stop";
613
+ case "length":
614
+ return "length";
615
+ case "function_call":
616
+ case "tool_calls":
617
+ return "toolUse";
618
+ case "content_filter":
619
+ return "error";
620
+ default: {
621
+ const _exhaustive: never = reason;
622
+ throw new Error(`Unhandled stop reason: ${_exhaustive}`);
623
+ }
624
+ }
625
+ }
626
+
627
+ /**
628
+ * Detect compatibility settings from baseUrl for known providers.
629
+ * Returns a fully resolved OpenAICompat object with all fields set.
630
+ */
631
+ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
632
+ const isNonStandard =
633
+ baseUrl.includes("cerebras.ai") ||
634
+ baseUrl.includes("api.x.ai") ||
635
+ baseUrl.includes("mistral.ai") ||
636
+ baseUrl.includes("chutes.ai") ||
637
+ baseUrl.includes("localhost");
638
+
639
+ const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
640
+
641
+ const isGrok = baseUrl.includes("api.x.ai");
642
+
643
+ const isMistral = baseUrl.includes("mistral.ai");
644
+
645
+ return {
646
+ supportsStore: !isNonStandard,
647
+ supportsDeveloperRole: !isNonStandard,
648
+ supportsReasoningEffort: !isGrok,
649
+ maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
650
+ requiresToolResultName: isMistral,
651
+ requiresAssistantAfterToolResult: false, // Mistral no longer requires this as of Dec 2024
652
+ requiresThinkingAsText: isMistral,
653
+ requiresMistralToolIds: isMistral,
654
+ };
655
+ }
656
+
657
+ /**
658
+ * Get resolved compatibility settings for a model.
659
+ * Uses explicit model.compat if provided, otherwise auto-detects from URL.
660
+ */
661
+ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
662
+ const detected = detectCompatFromUrl(model.baseUrl);
663
+ if (!model.compat) return detected;
664
+
665
+ return {
666
+ supportsStore: model.compat.supportsStore ?? detected.supportsStore,
667
+ supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
668
+ supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
669
+ maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
670
+ requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
671
+ requiresAssistantAfterToolResult:
672
+ model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
673
+ requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
674
+ requiresMistralToolIds: model.compat.requiresMistralToolIds ?? detected.requiresMistralToolIds,
675
+ };
676
+ }