@oh-my-pi/pi-ai 1.337.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,962 @@
1
+ # @oh-my-pi/pi-ai
2
+
3
+ Unified LLM API with automatic model discovery, provider configuration, token and cost tracking, and simple context persistence and hand-off to other models mid-session.
4
+
5
+ **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows.
6
+
7
+ ## Supported Providers
8
+
9
+ - **OpenAI**
10
+ - **Anthropic**
11
+ - **Google**
12
+ - **Mistral**
13
+ - **Groq**
14
+ - **Cerebras**
15
+ - **xAI**
16
+ - **OpenRouter**
17
+ - **GitHub Copilot** (requires OAuth, see below)
18
+ - **Any OpenAI-compatible API**: Ollama, vLLM, LM Studio, etc.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ npm install @oh-my-pi/pi-ai
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ```typescript
29
+ import { Type, getModel, stream, complete, Context, Tool, StringEnum } from "@oh-my-pi/pi-ai";
30
+
31
+ // Fully typed with auto-complete support for both providers and models
32
+ const model = getModel("openai", "gpt-4o-mini");
33
+
34
+ // Define tools with TypeBox schemas for type safety and validation
35
+ const tools: Tool[] = [
36
+ {
37
+ name: "get_time",
38
+ description: "Get the current time",
39
+ parameters: Type.Object({
40
+ timezone: Type.Optional(Type.String({ description: "Optional timezone (e.g., America/New_York)" })),
41
+ }),
42
+ },
43
+ ];
44
+
45
+ // Build a conversation context (easily serializable and transferable between models)
46
+ const context: Context = {
47
+ systemPrompt: "You are a helpful assistant.",
48
+ messages: [{ role: "user", content: "What time is it?" }],
49
+ tools,
50
+ };
51
+
52
+ // Option 1: Streaming with all event types
53
+ const s = stream(model, context);
54
+
55
+ for await (const event of s) {
56
+ switch (event.type) {
57
+ case "start":
58
+ console.log(`Starting with ${event.partial.model}`);
59
+ break;
60
+ case "text_start":
61
+ console.log("\n[Text started]");
62
+ break;
63
+ case "text_delta":
64
+ process.stdout.write(event.delta);
65
+ break;
66
+ case "text_end":
67
+ console.log("\n[Text ended]");
68
+ break;
69
+ case "thinking_start":
70
+ console.log("[Model is thinking...]");
71
+ break;
72
+ case "thinking_delta":
73
+ process.stdout.write(event.delta);
74
+ break;
75
+ case "thinking_end":
76
+ console.log("[Thinking complete]");
77
+ break;
78
+ case "toolcall_start":
79
+ console.log(`\n[Tool call started: index ${event.contentIndex}]`);
80
+ break;
81
+ case "toolcall_delta":
82
+ // Partial tool arguments are being streamed
83
+ const partialCall = event.partial.content[event.contentIndex];
84
+ if (partialCall.type === "toolCall") {
85
+ console.log(`[Streaming args for ${partialCall.name}]`);
86
+ }
87
+ break;
88
+ case "toolcall_end":
89
+ console.log(`\nTool called: ${event.toolCall.name}`);
90
+ console.log(`Arguments: ${JSON.stringify(event.toolCall.arguments)}`);
91
+ break;
92
+ case "done":
93
+ console.log(`\nFinished: ${event.reason}`);
94
+ break;
95
+ case "error":
96
+ console.error(`Error: ${event.error}`);
97
+ break;
98
+ }
99
+ }
100
+
101
+ // Get the final message after streaming, add it to the context
102
+ const finalMessage = await s.result();
103
+ context.messages.push(finalMessage);
104
+
105
+ // Handle tool calls if any
106
+ const toolCalls = finalMessage.content.filter((b) => b.type === "toolCall");
107
+ for (const call of toolCalls) {
108
+ // Execute the tool
109
+ const result =
110
+ call.name === "get_time"
111
+ ? new Date().toLocaleString("en-US", {
112
+ timeZone: call.arguments.timezone || "UTC",
113
+ dateStyle: "full",
114
+ timeStyle: "long",
115
+ })
116
+ : "Unknown tool";
117
+
118
+ // Add tool result to context (supports text and images)
119
+ context.messages.push({
120
+ role: "toolResult",
121
+ toolCallId: call.id,
122
+ toolName: call.name,
123
+ content: [{ type: "text", text: result }],
124
+ isError: false,
125
+ timestamp: Date.now(),
126
+ });
127
+ }
128
+
129
+ // Continue if there were tool calls
130
+ if (toolCalls.length > 0) {
131
+ const continuation = await complete(model, context);
132
+ context.messages.push(continuation);
133
+ console.log("After tool execution:", continuation.content);
134
+ }
135
+
136
+ console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
137
+ console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
138
+
139
+ // Option 2: Get complete response without streaming
140
+ const response = await complete(model, context);
141
+
142
+ for (const block of response.content) {
143
+ if (block.type === "text") {
144
+ console.log(block.text);
145
+ } else if (block.type === "toolCall") {
146
+ console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
147
+ }
148
+ }
149
+ ```
150
+
151
+ ## Tools
152
+
153
+ Tools enable LLMs to interact with external systems. This library uses TypeBox schemas for type-safe tool definitions with automatic validation using AJV. TypeBox schemas can be serialized and deserialized as plain JSON, making them ideal for distributed systems.
154
+
155
+ ### Defining Tools
156
+
157
+ ```typescript
158
+ import { Type, Tool, StringEnum } from "@oh-my-pi/pi-ai";
159
+
160
+ // Define tool parameters with TypeBox
161
+ const weatherTool: Tool = {
162
+ name: "get_weather",
163
+ description: "Get current weather for a location",
164
+ parameters: Type.Object({
165
+ location: Type.String({ description: "City name or coordinates" }),
166
+ units: StringEnum(["celsius", "fahrenheit"], { default: "celsius" }),
167
+ }),
168
+ };
169
+
170
+ // Note: For Google API compatibility, use StringEnum helper instead of Type.Enum
171
+ // Type.Enum generates anyOf/const patterns that Google doesn't support
172
+
173
+ const bookMeetingTool: Tool = {
174
+ name: "book_meeting",
175
+ description: "Schedule a meeting",
176
+ parameters: Type.Object({
177
+ title: Type.String({ minLength: 1 }),
178
+ startTime: Type.String({ format: "date-time" }),
179
+ endTime: Type.String({ format: "date-time" }),
180
+ attendees: Type.Array(Type.String({ format: "email" }), { minItems: 1 }),
181
+ }),
182
+ };
183
+ ```
184
+
185
+ ### Handling Tool Calls
186
+
187
+ Tool results use content blocks and can include both text and images:
188
+
189
+ ```typescript
190
+ import { readFileSync } from "fs";
191
+
192
+ const context: Context = {
193
+ messages: [{ role: "user", content: "What is the weather in London?" }],
194
+ tools: [weatherTool],
195
+ };
196
+
197
+ const response = await complete(model, context);
198
+
199
+ // Check for tool calls in the response
200
+ for (const block of response.content) {
201
+ if (block.type === "toolCall") {
202
+ // Execute your tool with the arguments
203
+ // See "Validating Tool Arguments" section for validation
204
+ const result = await executeWeatherApi(block.arguments);
205
+
206
+ // Add tool result with text content
207
+ context.messages.push({
208
+ role: "toolResult",
209
+ toolCallId: block.id,
210
+ toolName: block.name,
211
+ content: [{ type: "text", text: JSON.stringify(result) }],
212
+ isError: false,
213
+ timestamp: Date.now(),
214
+ });
215
+ }
216
+ }
217
+
218
+ // Tool results can also include images (for vision-capable models)
219
+ const imageBuffer = readFileSync("chart.png");
220
+ context.messages.push({
221
+ role: "toolResult",
222
+ toolCallId: "tool_xyz",
223
+ toolName: "generate_chart",
224
+ content: [
225
+ { type: "text", text: "Generated chart showing temperature trends" },
226
+ { type: "image", data: imageBuffer.toString("base64"), mimeType: "image/png" },
227
+ ],
228
+ isError: false,
229
+ timestamp: Date.now(),
230
+ });
231
+ ```
232
+
233
+ ### Streaming Tool Calls with Partial JSON
234
+
235
+ During streaming, tool call arguments are progressively parsed as they arrive. This enables real-time UI updates before the complete arguments are available:
236
+
237
+ ```typescript
238
+ const s = stream(model, context);
239
+
240
+ for await (const event of s) {
241
+ if (event.type === "toolcall_delta") {
242
+ const toolCall = event.partial.content[event.contentIndex];
243
+
244
+ // toolCall.arguments contains partially parsed JSON during streaming
245
+ // This allows for progressive UI updates
246
+ if (toolCall.type === "toolCall" && toolCall.arguments) {
247
+ // BE DEFENSIVE: arguments may be incomplete
248
+ // Example: Show file path being written even before content is complete
249
+ if (toolCall.name === "write_file" && toolCall.arguments.path) {
250
+ console.log(`Writing to: ${toolCall.arguments.path}`);
251
+
252
+ // Content might be partial or missing
253
+ if (toolCall.arguments.content) {
254
+ console.log(`Content preview: ${toolCall.arguments.content.substring(0, 100)}...`);
255
+ }
256
+ }
257
+ }
258
+ }
259
+
260
+ if (event.type === "toolcall_end") {
261
+ // Here toolCall.arguments is complete (but not yet validated)
262
+ const toolCall = event.toolCall;
263
+ console.log(`Tool completed: ${toolCall.name}`, toolCall.arguments);
264
+ }
265
+ }
266
+ ```
267
+
268
+ **Important notes about partial tool arguments:**
269
+
270
+ - During `toolcall_delta` events, `arguments` contains the best-effort parse of partial JSON
271
+ - Fields may be missing or incomplete - always check for existence before use
272
+ - String values may be truncated mid-word
273
+ - Arrays may be incomplete
274
+ - Nested objects may be partially populated
275
+ - At minimum, `arguments` will be an empty object `{}`, never `undefined`
276
+ - The Google provider does not support function call streaming. Instead, you will receive a single `toolcall_delta` event with the full arguments.
277
+
278
+ ### Validating Tool Arguments
279
+
280
+ When using `agentLoop`, tool arguments are automatically validated against your TypeBox schemas before execution. If validation fails, the error is returned to the model as a tool result, allowing it to retry.
281
+
282
+ When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools:
283
+
284
+ ```typescript
285
+ import { stream, validateToolCall, Tool } from "@oh-my-pi/pi-ai";
286
+
287
+ const tools: Tool[] = [weatherTool, calculatorTool];
288
+ const s = stream(model, { messages, tools });
289
+
290
+ for await (const event of s) {
291
+ if (event.type === "toolcall_end") {
292
+ const toolCall = event.toolCall;
293
+
294
+ try {
295
+ // Validate arguments against the tool's schema (throws on invalid args)
296
+ const validatedArgs = validateToolCall(tools, toolCall);
297
+ const result = await executeMyTool(toolCall.name, validatedArgs);
298
+ // ... add tool result to context
299
+ } catch (error) {
300
+ // Validation failed - return error as tool result so model can retry
301
+ context.messages.push({
302
+ role: "toolResult",
303
+ toolCallId: toolCall.id,
304
+ toolName: toolCall.name,
305
+ content: [{ type: "text", text: error.message }],
306
+ isError: true,
307
+ timestamp: Date.now(),
308
+ });
309
+ }
310
+ }
311
+ }
312
+ ```
313
+
314
+ ### Complete Event Reference
315
+
316
+ All streaming events emitted during assistant message generation:
317
+
318
+ | Event Type | Description | Key Properties |
319
+ | ---------------- | ------------------------ | ------------------------------------------------------------------------------------------- |
320
+ | `start` | Stream begins | `partial`: Initial assistant message structure |
321
+ | `text_start` | Text block starts | `contentIndex`: Position in content array |
322
+ | `text_delta` | Text chunk received | `delta`: New text, `contentIndex`: Position |
323
+ | `text_end` | Text block complete | `content`: Full text, `contentIndex`: Position |
324
+ | `thinking_start` | Thinking block starts | `contentIndex`: Position in content array |
325
+ | `thinking_delta` | Thinking chunk received | `delta`: New text, `contentIndex`: Position |
326
+ | `thinking_end` | Thinking block complete | `content`: Full thinking, `contentIndex`: Position |
327
+ | `toolcall_start` | Tool call begins | `contentIndex`: Position in content array |
328
+ | `toolcall_delta` | Tool arguments streaming | `delta`: JSON chunk, `partial.content[contentIndex].arguments`: Partial parsed args |
329
+ | `toolcall_end` | Tool call complete | `toolCall`: Complete validated tool call with `id`, `name`, `arguments` |
330
+ | `done` | Stream complete | `reason`: Stop reason ("stop", "length", "toolUse"), `message`: Final assistant message |
331
+ | `error` | Error occurred | `reason`: Error type ("error" or "aborted"), `error`: AssistantMessage with partial content |
332
+
333
+ ## Image Input
334
+
335
+ Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
336
+
337
+ ```typescript
338
+ import { readFileSync } from "fs";
339
+ import { getModel, complete } from "@oh-my-pi/pi-ai";
340
+
341
+ const model = getModel("openai", "gpt-4o-mini");
342
+
343
+ // Check if model supports images
344
+ if (model.input.includes("image")) {
345
+ console.log("Model supports vision");
346
+ }
347
+
348
+ const imageBuffer = readFileSync("image.png");
349
+ const base64Image = imageBuffer.toString("base64");
350
+
351
+ const response = await complete(model, {
352
+ messages: [
353
+ {
354
+ role: "user",
355
+ content: [
356
+ { type: "text", text: "What is in this image?" },
357
+ { type: "image", data: base64Image, mimeType: "image/png" },
358
+ ],
359
+ },
360
+ ],
361
+ });
362
+
363
+ // Access the response
364
+ for (const block of response.content) {
365
+ if (block.type === "text") {
366
+ console.log(block.text);
367
+ }
368
+ }
369
+ ```
370
+
371
+ ## Thinking/Reasoning
372
+
373
+ Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
374
+
375
+ ### Unified Interface (streamSimple/completeSimple)
376
+
377
+ ```typescript
378
+ import { getModel, streamSimple, completeSimple } from "@oh-my-pi/pi-ai";
379
+
380
+ // Many models across providers support thinking/reasoning
381
+ const model = getModel("anthropic", "claude-sonnet-4-20250514");
382
+ // or getModel('openai', 'gpt-5-mini');
383
+ // or getModel('google', 'gemini-2.5-flash');
384
+ // or getModel('xai', 'grok-code-fast-1');
385
+ // or getModel('groq', 'openai/gpt-oss-20b');
386
+ // or getModel('cerebras', 'gpt-oss-120b');
387
+ // or getModel('openrouter', 'z-ai/glm-4.5v');
388
+
389
+ // Check if model supports reasoning
390
+ if (model.reasoning) {
391
+ console.log("Model supports reasoning/thinking");
392
+ }
393
+
394
+ // Use the simplified reasoning option
395
+ const response = await completeSimple(
396
+ model,
397
+ {
398
+ messages: [{ role: "user", content: "Solve: 2x + 5 = 13" }],
399
+ },
400
+ {
401
+ reasoning: "medium", // 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' (xhigh maps to high on non-OpenAI providers)
402
+ }
403
+ );
404
+
405
+ // Access thinking and text blocks
406
+ for (const block of response.content) {
407
+ if (block.type === "thinking") {
408
+ console.log("Thinking:", block.thinking);
409
+ } else if (block.type === "text") {
410
+ console.log("Response:", block.text);
411
+ }
412
+ }
413
+ ```
414
+
415
+ ### Provider-Specific Options (stream/complete)
416
+
417
+ For fine-grained control, use the provider-specific options:
418
+
419
+ ```typescript
420
+ import { getModel, complete } from "@oh-my-pi/pi-ai";
421
+
422
+ // OpenAI Reasoning (o1, o3, gpt-5)
423
+ const openaiModel = getModel("openai", "gpt-5-mini");
424
+ await complete(openaiModel, context, {
425
+ reasoningEffort: "medium",
426
+ reasoningSummary: "detailed", // OpenAI Responses API only
427
+ });
428
+
429
+ // Anthropic Thinking (Claude Sonnet 4)
430
+ const anthropicModel = getModel("anthropic", "claude-sonnet-4-20250514");
431
+ await complete(anthropicModel, context, {
432
+ thinkingEnabled: true,
433
+ thinkingBudgetTokens: 8192, // Optional token limit
434
+ });
435
+
436
+ // Google Gemini Thinking
437
+ const googleModel = getModel("google", "gemini-2.5-flash");
438
+ await complete(googleModel, context, {
439
+ thinking: {
440
+ enabled: true,
441
+ budgetTokens: 8192, // -1 for dynamic, 0 to disable
442
+ },
443
+ });
444
+ ```
445
+
446
+ ### Streaming Thinking Content
447
+
448
+ When streaming, thinking content is delivered through specific events:
449
+
450
+ ```typescript
451
+ const s = streamSimple(model, context, { reasoning: "high" });
452
+
453
+ for await (const event of s) {
454
+ switch (event.type) {
455
+ case "thinking_start":
456
+ console.log("[Model started thinking]");
457
+ break;
458
+ case "thinking_delta":
459
+ process.stdout.write(event.delta); // Stream thinking content
460
+ break;
461
+ case "thinking_end":
462
+ console.log("\n[Thinking complete]");
463
+ break;
464
+ }
465
+ }
466
+ ```
467
+
468
+ ## Stop Reasons
469
+
470
+ Every `AssistantMessage` includes a `stopReason` field that indicates how the generation ended:
471
+
472
+ - `"stop"` - Normal completion, the model finished its response
473
+ - `"length"` - Output hit the maximum token limit
474
+ - `"toolUse"` - Model is calling tools and expects tool results
475
+ - `"error"` - An error occurred during generation
476
+ - `"aborted"` - Request was cancelled via abort signal
477
+
478
+ ## Error Handling
479
+
480
+ When a request ends with an error (including aborts and tool call validation errors), the streaming API emits an error event:
481
+
482
+ ```typescript
483
+ // In streaming
484
+ for await (const event of stream) {
485
+ if (event.type === "error") {
486
+ // event.reason is either "error" or "aborted"
487
+ // event.error is the AssistantMessage with partial content
488
+ console.error(`Error (${event.reason}):`, event.error.errorMessage);
489
+ console.log("Partial content:", event.error.content);
490
+ }
491
+ }
492
+
493
+ // The final message will have the error details
494
+ const message = await stream.result();
495
+ if (message.stopReason === "error" || message.stopReason === "aborted") {
496
+ console.error("Request failed:", message.errorMessage);
497
+ // message.content contains any partial content received before the error
498
+ // message.usage contains partial token counts and costs
499
+ }
500
+ ```
501
+
502
+ ### Aborting Requests
503
+
504
+ The abort signal allows you to cancel in-progress requests. Aborted requests have `stopReason === 'aborted'`:
505
+
506
+ ```typescript
507
+ import { getModel, stream } from "@oh-my-pi/pi-ai";
508
+
509
+ const model = getModel("openai", "gpt-4o-mini");
510
+ const controller = new AbortController();
511
+
512
+ // Abort after 2 seconds
513
+ setTimeout(() => controller.abort(), 2000);
514
+
515
+ const s = stream(
516
+ model,
517
+ {
518
+ messages: [{ role: "user", content: "Write a long story" }],
519
+ },
520
+ {
521
+ signal: controller.signal,
522
+ }
523
+ );
524
+
525
+ for await (const event of s) {
526
+ if (event.type === "text_delta") {
527
+ process.stdout.write(event.delta);
528
+ } else if (event.type === "error") {
529
+ // event.reason tells you if it was "error" or "aborted"
530
+ console.log(`${event.reason === "aborted" ? "Aborted" : "Error"}:`, event.error.errorMessage);
531
+ }
532
+ }
533
+
534
+ // Get results (may be partial if aborted)
535
+ const response = await s.result();
536
+ if (response.stopReason === "aborted") {
537
+ console.log("Request was aborted:", response.errorMessage);
538
+ console.log("Partial content received:", response.content);
539
+ console.log("Tokens used:", response.usage);
540
+ }
541
+ ```
542
+
543
+ ### Continuing After Abort
544
+
545
+ Aborted messages can be added to the conversation context and continued in subsequent requests:
546
+
547
+ ```typescript
548
+ const context = {
549
+ messages: [{ role: "user", content: "Explain quantum computing in detail" }],
550
+ };
551
+
552
+ // First request gets aborted after 2 seconds
553
+ const controller1 = new AbortController();
554
+ setTimeout(() => controller1.abort(), 2000);
555
+
556
+ const partial = await complete(model, context, { signal: controller1.signal });
557
+
558
+ // Add the partial response to context
559
+ context.messages.push(partial);
560
+ context.messages.push({ role: "user", content: "Please continue" });
561
+
562
+ // Continue the conversation
563
+ const continuation = await complete(model, context);
564
+ ```
565
+
566
+ ## APIs, Models, and Providers
567
+
568
+ The library implements 4 API interfaces, each with its own streaming function and options:
569
+
570
+ - **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
571
+ - **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
572
+ - **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
573
+ - **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
574
+
575
+ ### Providers and Models
576
+
577
+ A **provider** offers models through a specific API. For example:
578
+
579
+ - **Anthropic** models use the `anthropic-messages` API
580
+ - **Google** models use the `google-generative-ai` API
581
+ - **OpenAI** models use the `openai-responses` API
582
+ - **Mistral, xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
583
+
584
+ ### Querying Providers and Models
585
+
586
+ ```typescript
587
+ import { getProviders, getModels, getModel } from "@oh-my-pi/pi-ai";
588
+
589
+ // Get all available providers
590
+ const providers = getProviders();
591
+ console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
592
+
593
+ // Get all models from a provider (fully typed)
594
+ const anthropicModels = getModels("anthropic");
595
+ for (const model of anthropicModels) {
596
+ console.log(`${model.id}: ${model.name}`);
597
+ console.log(` API: ${model.api}`); // 'anthropic-messages'
598
+ console.log(` Context: ${model.contextWindow} tokens`);
599
+ console.log(` Vision: ${model.input.includes("image")}`);
600
+ console.log(` Reasoning: ${model.reasoning}`);
601
+ }
602
+
603
+ // Get a specific model (both provider and model ID are auto-completed in IDEs)
604
+ const model = getModel("openai", "gpt-4o-mini");
605
+ console.log(`Using ${model.name} via ${model.api} API`);
606
+ ```
607
+
608
+ ### Custom Models
609
+
610
+ You can create custom models for local inference servers or custom endpoints:
611
+
612
+ ```typescript
613
+ import { Model, stream } from "@oh-my-pi/pi-ai";
614
+
615
+ // Example: Ollama using OpenAI-compatible API
616
+ const ollamaModel: Model<"openai-completions"> = {
617
+ id: "llama-3.1-8b",
618
+ name: "Llama 3.1 8B (Ollama)",
619
+ api: "openai-completions",
620
+ provider: "ollama",
621
+ baseUrl: "http://localhost:11434/v1",
622
+ reasoning: false,
623
+ input: ["text"],
624
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
625
+ contextWindow: 128000,
626
+ maxTokens: 32000,
627
+ };
628
+
629
+ // Example: LiteLLM proxy with explicit compat settings
630
+ const litellmModel: Model<"openai-completions"> = {
631
+ id: "gpt-4o",
632
+ name: "GPT-4o (via LiteLLM)",
633
+ api: "openai-completions",
634
+ provider: "litellm",
635
+ baseUrl: "http://localhost:4000/v1",
636
+ reasoning: false,
637
+ input: ["text", "image"],
638
+ cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
639
+ contextWindow: 128000,
640
+ maxTokens: 16384,
641
+ compat: {
642
+ supportsStore: false, // LiteLLM doesn't support the store field
643
+ },
644
+ };
645
+
646
+ // Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
647
+ const proxyModel: Model<"anthropic-messages"> = {
648
+ id: "claude-sonnet-4",
649
+ name: "Claude Sonnet 4 (Proxied)",
650
+ api: "anthropic-messages",
651
+ provider: "custom-proxy",
652
+ baseUrl: "https://proxy.example.com/v1",
653
+ reasoning: true,
654
+ input: ["text", "image"],
655
+ cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
656
+ contextWindow: 200000,
657
+ maxTokens: 8192,
658
+ headers: {
659
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
660
+ "X-Custom-Auth": "bearer-token-here",
661
+ },
662
+ };
663
+
664
+ // Use the custom model
665
+ const response = await stream(ollamaModel, context, {
666
+ apiKey: "dummy", // Ollama doesn't need a real key
667
+ });
668
+ ```
669
+
670
+ ### OpenAI Compatibility Settings
671
+
672
+ The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field:
673
+
674
+ ```typescript
675
+ interface OpenAICompat {
676
+ supportsStore?: boolean; // Whether provider supports the `store` field (default: true)
677
+ supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true)
678
+ supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
679
+ maxTokensField?: "max_completion_tokens" | "max_tokens"; // Which field name to use (default: max_completion_tokens)
680
+ }
681
+ ```
682
+
683
+ If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for:
684
+
685
+ - **LiteLLM proxies**: May not support `store` field
686
+ - **Custom inference servers**: May use non-standard field names
687
+ - **Self-hosted endpoints**: May have different feature support
688
+
689
+ ### Type Safety
690
+
691
+ Models are typed by their API, ensuring type-safe options:
692
+
693
+ ```typescript
694
+ // TypeScript knows this is an Anthropic model
695
+ const claude = getModel("anthropic", "claude-sonnet-4-20250514");
696
+
697
+ // So these options are type-checked for AnthropicOptions
698
+ await stream(claude, context, {
699
+ thinkingEnabled: true, // ✓ Valid for anthropic-messages
700
+ thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
701
+ // reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages
702
+ });
703
+ ```
704
+
705
+ ## Cross-Provider Handoffs
706
+
707
+ The library supports seamless handoffs between different LLM providers within the same conversation. This allows you to switch models mid-conversation while preserving context, including thinking blocks, tool calls, and tool results.
708
+
709
+ ### How It Works
710
+
711
+ When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
712
+
713
+ - **User and tool result messages** are passed through unchanged
714
+ - **Assistant messages from the same provider/API** are preserved as-is
715
+ - **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
716
+ - **Tool calls and regular text** are preserved unchanged
717
+
718
+ ### Example: Multi-Provider Conversation
719
+
720
+ ```typescript
721
+ import { getModel, complete, Context } from "@oh-my-pi/pi-ai";
722
+
723
+ // Start with Claude
724
+ const claude = getModel("anthropic", "claude-sonnet-4-20250514");
725
+ const context: Context = {
726
+ messages: [],
727
+ };
728
+
729
+ context.messages.push({ role: "user", content: "What is 25 * 18?" });
730
+ const claudeResponse = await complete(claude, context, {
731
+ thinkingEnabled: true,
732
+ });
733
+ context.messages.push(claudeResponse);
734
+
735
+ // Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
736
+ const gpt5 = getModel("openai", "gpt-5-mini");
737
+ context.messages.push({ role: "user", content: "Is that calculation correct?" });
738
+ const gptResponse = await complete(gpt5, context);
739
+ context.messages.push(gptResponse);
740
+
741
+ // Switch to Gemini
742
+ const gemini = getModel("google", "gemini-2.5-flash");
743
+ context.messages.push({ role: "user", content: "What was the original question?" });
744
+ const geminiResponse = await complete(gemini, context);
745
+ ```
746
+
747
+ ### Provider Compatibility
748
+
749
+ All providers can handle messages from other providers, including:
750
+
751
+ - Text content
752
+ - Tool calls and tool results (including images in tool results)
753
+ - Thinking/reasoning blocks (transformed to tagged text for cross-provider compatibility)
754
+ - Aborted messages with partial content
755
+
756
+ This enables flexible workflows where you can:
757
+
758
+ - Start with a fast model for initial responses
759
+ - Switch to a more capable model for complex reasoning
760
+ - Use specialized models for specific tasks
761
+ - Maintain conversation continuity across provider outages
762
+
763
+ ## Context Serialization
764
+
765
+ The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
766
+
767
+ ```typescript
768
+ import { Context, getModel, complete } from "@oh-my-pi/pi-ai";
769
+
770
+ // Create and use a context
771
+ const context: Context = {
772
+ systemPrompt: "You are a helpful assistant.",
773
+ messages: [{ role: "user", content: "What is TypeScript?" }],
774
+ };
775
+
776
+ const model = getModel("openai", "gpt-4o-mini");
777
+ const response = await complete(model, context);
778
+ context.messages.push(response);
779
+
780
+ // Serialize the entire context
781
+ const serialized = JSON.stringify(context);
782
+ console.log("Serialized context size:", serialized.length, "bytes");
783
+
784
+ // Save to database, localStorage, file, etc.
785
+ localStorage.setItem("conversation", serialized);
786
+
787
+ // Later: deserialize and continue the conversation
788
+ const restored: Context = JSON.parse(localStorage.getItem("conversation")!);
789
+ restored.messages.push({ role: "user", content: "Tell me more about its type system" });
790
+
791
+ // Continue with any model
792
+ const newModel = getModel("anthropic", "claude-3-5-haiku-20241022");
793
+ const continuation = await complete(newModel, restored);
794
+ ```
795
+
796
+ > **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
797
+
798
+ ## Browser Usage
799
+
800
+ The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
801
+
802
+ ```typescript
803
+ import { getModel, complete } from "@oh-my-pi/pi-ai";
804
+
805
+ // API key must be passed explicitly in browser
806
+ const model = getModel("anthropic", "claude-3-5-haiku-20241022");
807
+
808
+ const response = await complete(
809
+ model,
810
+ {
811
+ messages: [{ role: "user", content: "Hello!" }],
812
+ },
813
+ {
814
+ apiKey: "your-api-key",
815
+ }
816
+ );
817
+ ```
818
+
819
+ > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
820
+
821
+ ### Environment Variables (Node.js only)
822
+
823
+ In Node.js environments, you can set environment variables to avoid passing API keys:
824
+
825
+ ```bash
826
+ OPENAI_API_KEY=sk-...
827
+ ANTHROPIC_API_KEY=sk-ant-...
828
+ GEMINI_API_KEY=...
829
+ MISTRAL_API_KEY=...
830
+ GROQ_API_KEY=gsk_...
831
+ CEREBRAS_API_KEY=csk-...
832
+ XAI_API_KEY=xai-...
833
+ ZAI_API_KEY=...
834
+ OPENROUTER_API_KEY=sk-or-...
835
+ ```
836
+
837
+ When set, the library automatically uses these keys:
838
+
839
+ ```typescript
840
+ // Uses OPENAI_API_KEY from environment
841
+ const model = getModel("openai", "gpt-4o-mini");
842
+ const response = await complete(model, context);
843
+
844
+ // Or override with explicit key
845
+ const response = await complete(model, context, {
846
+ apiKey: "sk-different-key",
847
+ });
848
+ ```
849
+
850
+ ### Checking Environment Variables
851
+
852
+ ```typescript
853
+ import { getEnvApiKey } from "@oh-my-pi/pi-ai";
854
+
855
+ // Check if an API key is set in environment variables
856
+ const key = getEnvApiKey("openai"); // checks OPENAI_API_KEY
857
+ ```
858
+
859
+ ## OAuth Providers
860
+
861
+ Several providers require OAuth authentication instead of static API keys:
862
+
863
+ - **Anthropic** (Claude Pro/Max subscription)
864
+ - **GitHub Copilot** (Copilot subscription)
865
+ - **Google Gemini CLI** (Free Gemini 2.0/2.5 via Google Cloud Code Assist)
866
+ - **Antigravity** (Free Gemini 3, Claude, GPT-OSS via Google Cloud)
867
+
868
+ ### CLI Login
869
+
870
+ The quickest way to authenticate:
871
+
872
+ ```bash
873
+ npx @oh-my-pi/pi-ai login # interactive provider selection
874
+ npx @oh-my-pi/pi-ai login anthropic # login to specific provider
875
+ npx @oh-my-pi/pi-ai list # list available providers
876
+ ```
877
+
878
+ Credentials are saved to `auth.json` in the current directory.
879
+
880
+ ### Programmatic OAuth
881
+
882
+ The library provides login and token refresh functions. Credential storage is the caller's responsibility.
883
+
884
+ ```typescript
885
+ import {
886
+ // Login functions (return credentials, do not store)
887
+ loginAnthropic,
888
+ loginGitHubCopilot,
889
+ loginGeminiCli,
890
+ loginAntigravity,
891
+
892
+ // Token management
893
+ refreshOAuthToken, // (provider, credentials) => new credentials
894
+ getOAuthApiKey, // (provider, credentialsMap) => { newCredentials, apiKey } | null
895
+
896
+ // Types
897
+ type OAuthProvider, // 'anthropic' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity'
898
+ type OAuthCredentials,
899
+ } from "@oh-my-pi/pi-ai";
900
+ ```
901
+
902
+ ### Login Flow Example
903
+
904
+ ```typescript
905
+ import { loginGitHubCopilot } from "@oh-my-pi/pi-ai";
906
+ import { writeFileSync } from "fs";
907
+
908
+ const credentials = await loginGitHubCopilot({
909
+ onAuth: (url, instructions) => {
910
+ console.log(`Open: ${url}`);
911
+ if (instructions) console.log(instructions);
912
+ },
913
+ onPrompt: async (prompt) => {
914
+ return await getUserInput(prompt.message);
915
+ },
916
+ onProgress: (message) => console.log(message),
917
+ });
918
+
919
+ // Store credentials yourself
920
+ const auth = { "github-copilot": { type: "oauth", ...credentials } };
921
+ writeFileSync("auth.json", JSON.stringify(auth, null, 2));
922
+ ```
923
+
924
+ ### Using OAuth Tokens
925
+
926
+ Use `getOAuthApiKey()` to get an API key, automatically refreshing if expired:
927
+
928
+ ```typescript
929
+ import { getModel, complete, getOAuthApiKey } from "@oh-my-pi/pi-ai";
930
+ import { readFileSync, writeFileSync } from "fs";
931
+
932
+ // Load your stored credentials
933
+ const auth = JSON.parse(readFileSync("auth.json", "utf-8"));
934
+
935
+ // Get API key (refreshes if expired)
936
+ const result = await getOAuthApiKey("github-copilot", auth);
937
+ if (!result) throw new Error("Not logged in");
938
+
939
+ // Save refreshed credentials
940
+ auth["github-copilot"] = { type: "oauth", ...result.newCredentials };
941
+ writeFileSync("auth.json", JSON.stringify(auth, null, 2));
942
+
943
+ // Use the API key
944
+ const model = getModel("github-copilot", "gpt-4o");
945
+ const response = await complete(
946
+ model,
947
+ {
948
+ messages: [{ role: "user", content: "Hello!" }],
949
+ },
950
+ { apiKey: result.apiKey }
951
+ );
952
+ ```
953
+
954
+ ### Provider Notes
955
+
956
+ **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable".
957
+
958
+ **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically.
959
+
960
+ ## License
961
+
962
+ MIT