@mariozechner/pi-ai 0.5.27 → 0.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +355 -275
  2. package/dist/generate.d.ts +22 -0
  3. package/dist/generate.d.ts.map +1 -0
  4. package/dist/generate.js +204 -0
  5. package/dist/generate.js.map +1 -0
  6. package/dist/index.d.ts +7 -8
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +7 -12
  9. package/dist/index.js.map +1 -1
  10. package/dist/models.d.ts +10 -71
  11. package/dist/models.d.ts.map +1 -1
  12. package/dist/models.generated.d.ts +3056 -2659
  13. package/dist/models.generated.d.ts.map +1 -1
  14. package/dist/models.generated.js +3063 -2663
  15. package/dist/models.generated.js.map +1 -1
  16. package/dist/models.js +17 -59
  17. package/dist/models.js.map +1 -1
  18. package/dist/providers/anthropic.d.ts +5 -18
  19. package/dist/providers/anthropic.d.ts.map +1 -1
  20. package/dist/providers/anthropic.js +249 -227
  21. package/dist/providers/anthropic.js.map +1 -1
  22. package/dist/providers/google.d.ts +3 -14
  23. package/dist/providers/google.d.ts.map +1 -1
  24. package/dist/providers/google.js +215 -220
  25. package/dist/providers/google.js.map +1 -1
  26. package/dist/providers/openai-completions.d.ts +4 -14
  27. package/dist/providers/openai-completions.d.ts.map +1 -1
  28. package/dist/providers/openai-completions.js +247 -215
  29. package/dist/providers/openai-completions.js.map +1 -1
  30. package/dist/providers/openai-responses.d.ts +6 -13
  31. package/dist/providers/openai-responses.d.ts.map +1 -1
  32. package/dist/providers/openai-responses.js +242 -244
  33. package/dist/providers/openai-responses.js.map +1 -1
  34. package/dist/providers/utils.d.ts +2 -14
  35. package/dist/providers/utils.d.ts.map +1 -1
  36. package/dist/providers/utils.js +2 -15
  37. package/dist/providers/utils.js.map +1 -1
  38. package/dist/types.d.ts +39 -16
  39. package/dist/types.d.ts.map +1 -1
  40. package/dist/types.js +1 -0
  41. package/dist/types.js.map +1 -1
  42. package/package.json +1 -1
package/README.md CHANGED
@@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai
24
24
  ## Quick Start
25
25
 
26
26
  ```typescript
27
- import { createLLM } from '@mariozechner/pi-ai';
27
+ import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai';
28
28
 
29
- const llm = createLLM('openai', 'gpt-4o-mini');
29
+ // Fully typed with auto-complete support for both providers and models
30
+ const model = getModel('openai', 'gpt-4o-mini');
30
31
 
31
- const response = await llm.generate({
32
- messages: [{ role: 'user', content: 'Hello!' }]
33
- });
32
+ // Define tools
33
+ const tools: Tool[] = [{
34
+ name: 'get_time',
35
+ description: 'Get the current time',
36
+ parameters: {
37
+ type: 'object',
38
+ properties: {},
39
+ required: []
40
+ }
41
+ }];
42
+
43
+ // Build a conversation context (easily serializable and transferable between models)
44
+ const context: Context = {
45
+ systemPrompt: 'You are a helpful assistant.',
46
+ messages: [{ role: 'user', content: 'What time is it?' }],
47
+ tools
48
+ };
49
+
50
+ // Option 1: Streaming with all event types
51
+ const s = stream(model, context);
52
+
53
+ for await (const event of s) {
54
+ switch (event.type) {
55
+ case 'start':
56
+ console.log(`Starting with ${event.partial.model}`);
57
+ break;
58
+ case 'text_start':
59
+ console.log('\n[Text started]');
60
+ break;
61
+ case 'text_delta':
62
+ process.stdout.write(event.delta);
63
+ break;
64
+ case 'text_end':
65
+ console.log('\n[Text ended]');
66
+ break;
67
+ case 'thinking_start':
68
+ console.log('[Model is thinking...]');
69
+ break;
70
+ case 'thinking_delta':
71
+ process.stdout.write(event.delta);
72
+ break;
73
+ case 'thinking_end':
74
+ console.log('[Thinking complete]');
75
+ break;
76
+ case 'toolCall':
77
+ console.log(`\nTool called: ${event.toolCall.name}`);
78
+ break;
79
+ case 'done':
80
+ console.log(`\nFinished: ${event.reason}`);
81
+ break;
82
+ case 'error':
83
+ console.error(`Error: ${event.error}`);
84
+ break;
85
+ }
86
+ }
87
+
88
+ // Get the final message after streaming, add it to the context
89
+ const finalMessage = await s.finalMessage();
90
+ context.messages.push(finalMessage);
91
+
92
+ // Handle tool calls if any
93
+ const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall');
94
+ for (const call of toolCalls) {
95
+ // Execute the tool
96
+ const result = call.name === 'get_time'
97
+ ? new Date().toISOString()
98
+ : 'Unknown tool';
99
+
100
+ // Add tool result to context
101
+ context.messages.push({
102
+ role: 'toolResult',
103
+ toolCallId: call.id,
104
+ toolName: call.name,
105
+ content: result,
106
+ isError: false
107
+ });
108
+ }
109
+
110
+ // Continue if there were tool calls
111
+ if (toolCalls.length > 0) {
112
+ const continuation = await complete(model, context);
113
+ context.messages.push(continuation);
114
+ console.log('After tool execution:', continuation.content);
115
+ }
116
+
117
+ console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
118
+ console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
119
+
120
+ // Option 2: Get complete response without streaming
121
+ const response = await complete(model, context);
34
122
 
35
- // response.content is an array of content blocks
36
123
  for (const block of response.content) {
37
124
  if (block.type === 'text') {
38
125
  console.log(block.text);
126
+ } else if (block.type === 'toolCall') {
127
+ console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
39
128
  }
40
129
  }
41
130
  ```
42
131
 
43
132
  ## Image Input
44
133
 
134
+ Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
135
+
45
136
  ```typescript
46
137
  import { readFileSync } from 'fs';
138
+ import { getModel, complete } from '@mariozechner/pi-ai';
139
+
140
+ const model = getModel('openai', 'gpt-4o-mini');
141
+
142
+ // Check if model supports images
143
+ if (model.input.includes('image')) {
144
+ console.log('Model supports vision');
145
+ }
47
146
 
48
147
  const imageBuffer = readFileSync('image.png');
49
148
  const base64Image = imageBuffer.toString('base64');
50
149
 
51
- const response = await llm.generate({
150
+ const response = await complete(model, {
52
151
  messages: [{
53
152
  role: 'user',
54
153
  content: [
@@ -57,166 +156,151 @@ const response = await llm.generate({
57
156
  ]
58
157
  }]
59
158
  });
159
+
160
+ // Access the response
161
+ for (const block of response.content) {
162
+ if (block.type === 'text') {
163
+ console.log(block.text);
164
+ }
165
+ }
60
166
  ```
61
167
 
62
- ## Tool Calling
168
+ ## Thinking/Reasoning
169
+
170
+ Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
171
+
172
+ ### Unified Interface (streamSimple/completeSimple)
63
173
 
64
174
  ```typescript
65
- const tools = [{
66
- name: 'get_weather',
67
- description: 'Get current weather for a location',
68
- parameters: {
69
- type: 'object',
70
- properties: {
71
- location: { type: 'string' }
72
- },
73
- required: ['location']
175
+ import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai';
176
+
177
+ // Many models across providers support thinking/reasoning
178
+ const model = getModel('anthropic', 'claude-sonnet-4-20250514');
179
+ // or getModel('openai', 'gpt-5-mini');
180
+ // or getModel('google', 'gemini-2.5-flash');
181
+ // or getModel('xai', 'grok-code-fast-1');
182
+ // or getModel('groq', 'openai/gpt-oss-20b');
183
+ // or getModel('cerebras', 'gpt-oss-120b');
184
+ // or getModel('openrouter', 'z-ai/glm-4.5v');
185
+
186
+ // Check if model supports reasoning
187
+ if (model.reasoning) {
188
+ console.log('Model supports reasoning/thinking');
189
+ }
190
+
191
+ // Use the simplified reasoning option
192
+ const response = await completeSimple(model, {
193
+ messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
194
+ }, {
195
+ reasoning: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
196
+ });
197
+
198
+ // Access thinking and text blocks
199
+ for (const block of response.content) {
200
+ if (block.type === 'thinking') {
201
+ console.log('Thinking:', block.thinking);
202
+ } else if (block.type === 'text') {
203
+ console.log('Response:', block.text);
74
204
  }
75
- }];
205
+ }
206
+ ```
76
207
 
77
- const messages = [];
78
- messages.push({ role: 'user', content: 'What is the weather in Paris?' });
208
+ ### Provider-Specific Options (stream/complete)
79
209
 
80
- const response = await llm.generate({ messages, tools });
81
- messages.push(response);
210
+ For fine-grained control, use the provider-specific options:
82
211
 
83
- // Check for tool calls in the content blocks
84
- const toolCalls = response.content.filter(block => block.type === 'toolCall');
212
+ ```typescript
213
+ import { getModel, complete } from '@mariozechner/pi-ai';
85
214
 
86
- for (const call of toolCalls) {
87
- // Call your actual function
88
- const result = await getWeather(call.arguments.location);
215
+ // OpenAI Reasoning (o1, o3, gpt-5)
216
+ const openaiModel = getModel('openai', 'gpt-5-mini');
217
+ await complete(openaiModel, context, {
218
+ reasoningEffort: 'medium',
219
+ reasoningSummary: 'detailed' // OpenAI Responses API only
220
+ });
89
221
 
90
- // Add tool result to context
91
- messages.push({
92
- role: 'toolResult',
93
- content: JSON.stringify(result),
94
- toolCallId: call.id,
95
- toolName: call.name,
96
- isError: false
97
- });
98
- }
222
+ // Anthropic Thinking (Claude Sonnet 4)
223
+ const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514');
224
+ await complete(anthropicModel, context, {
225
+ thinkingEnabled: true,
226
+ thinkingBudgetTokens: 8192 // Optional token limit
227
+ });
99
228
 
100
- if (toolCalls.length > 0) {
101
- // Continue conversation with tool results
102
- const followUp = await llm.generate({ messages, tools });
103
- messages.push(followUp);
104
-
105
- // Print text blocks from the response
106
- for (const block of followUp.content) {
107
- if (block.type === 'text') {
108
- console.log(block.text);
109
- }
229
+ // Google Gemini Thinking
230
+ const googleModel = getModel('google', 'gemini-2.5-flash');
231
+ await complete(googleModel, context, {
232
+ thinking: {
233
+ enabled: true,
234
+ budgetTokens: 8192 // -1 for dynamic, 0 to disable
110
235
  }
111
- }
236
+ });
112
237
  ```
113
238
 
114
- ## Streaming
239
+ ### Streaming Thinking Content
240
+
241
+ When streaming, thinking content is delivered through specific events:
115
242
 
116
243
  ```typescript
117
- const response = await llm.generate({
118
- messages: [{ role: 'user', content: 'Write a story' }]
119
- }, {
120
- onEvent: (event) => {
121
- switch (event.type) {
122
- case 'start':
123
- console.log(`Starting ${event.provider} ${event.model}`);
124
- break;
125
- case 'text_start':
126
- console.log('[Starting text block]');
127
- break;
128
- case 'text_delta':
129
- process.stdout.write(event.delta);
130
- break;
131
- case 'text_end':
132
- console.log(`\n[Text block complete: ${event.content.length} chars]`);
133
- break;
134
- case 'thinking_start':
135
- console.error('[Starting thinking]');
136
- break;
137
- case 'thinking_delta':
138
- process.stderr.write(event.delta);
139
- break;
140
- case 'thinking_end':
141
- console.error(`\n[Thinking complete: ${event.content.length} chars]`);
142
- break;
143
- case 'toolCall':
144
- console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`);
145
- break;
146
- case 'done':
147
- console.log(`Completed with reason: ${event.reason}`);
148
- console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`);
149
- break;
150
- case 'error':
151
- console.error('Error:', event.error);
152
- break;
153
- }
244
+ const s = streamSimple(model, context, { reasoning: 'high' });
245
+
246
+ for await (const event of s) {
247
+ switch (event.type) {
248
+ case 'thinking_start':
249
+ console.log('[Model started thinking]');
250
+ break;
251
+ case 'thinking_delta':
252
+ process.stdout.write(event.delta); // Stream thinking content
253
+ break;
254
+ case 'thinking_end':
255
+ console.log('\n[Thinking complete]');
256
+ break;
154
257
  }
155
- });
258
+ }
156
259
  ```
157
260
 
158
- ## Abort Signal
261
+ ## Errors & Abort Signal
159
262
 
160
- The abort signal allows you to cancel in-progress requests. When aborted, providers return partial results accumulated up to the cancellation point, including accurate token counts and cost estimates.
263
+ When a request ends with an error (including aborts), the API returns an `AssistantMessage` with:
264
+ - `stopReason: 'error'` - Indicates the request ended with an error
265
+ - `error: string` - Error message describing what happened
266
+ - `content: array` - **Partial content** accumulated before the error
267
+ - `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred)
161
268
 
162
- ### Basic Usage
269
+ ### Aborting
270
+ The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`.
163
271
 
164
272
  ```typescript
273
+ import { getModel, stream } from '@mariozechner/pi-ai';
274
+
275
+ const model = getModel('openai', 'gpt-4o-mini');
165
276
  const controller = new AbortController();
166
277
 
167
278
  // Abort after 2 seconds
168
279
  setTimeout(() => controller.abort(), 2000);
169
280
 
170
- const response = await llm.generate({
281
+ const s = stream(model, {
171
282
  messages: [{ role: 'user', content: 'Write a long story' }]
172
283
  }, {
173
- signal: controller.signal,
174
- onEvent: (event) => {
175
- if (event.type === 'text_delta') {
176
- process.stdout.write(event.delta);
177
- }
178
- }
284
+ signal: controller.signal
179
285
  });
180
286
 
181
- // Check if the request was aborted
182
- if (response.stopReason === 'error' && response.error) {
183
- console.log('Request was aborted:', response.error);
287
+ for await (const event of s) {
288
+ if (event.type === 'text_delta') {
289
+ process.stdout.write(event.delta);
290
+ } else if (event.type === 'error') {
291
+ console.log('Error:', event.error);
292
+ }
293
+ }
294
+
295
+ // Get results (may be partial if aborted)
296
+ const response = await s.finalMessage();
297
+ if (response.stopReason === 'error') {
298
+ console.log('Error:', response.error);
184
299
  console.log('Partial content received:', response.content);
185
300
  console.log('Tokens used:', response.usage);
186
- } else {
187
- console.log('Request completed successfully');
188
301
  }
189
302
  ```
190
303
 
191
- ### Partial Results and Token Tracking
192
-
193
- When a request is aborted, the API returns an `AssistantMessage` with:
194
- - `stopReason: 'error'` - Indicates the request was aborted
195
- - `error: string` - Error message describing the abort
196
- - `content: array` - **Partial content** accumulated before the abort
197
- - `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred)
198
-
199
- ```typescript
200
- // Example: User interrupts a long-running request
201
- const controller = new AbortController();
202
- document.getElementById('stop-button').onclick = () => controller.abort();
203
-
204
- const response = await llm.generate(context, {
205
- signal: controller.signal,
206
- onEvent: (e) => {
207
- if (e.type === 'text_delta') updateUI(e.delta);
208
- }
209
- });
210
-
211
- // Even if aborted, you get:
212
- // - Partial text that was streamed
213
- // - Token count (may be partial/estimated)
214
- // - Cost calculations (may be incomplete)
215
- console.log(`Generated ${response.content.length} content blocks`);
216
- console.log(`Estimated ${response.usage.output} output tokens`);
217
- console.log(`Estimated cost: $${response.usage.cost.total}`);
218
- ```
219
-
220
304
  ### Continuing After Abort
221
305
 
222
306
  Aborted messages can be added to the conversation context and continued in subsequent requests:
@@ -232,19 +316,99 @@ const context = {
232
316
  const controller1 = new AbortController();
233
317
  setTimeout(() => controller1.abort(), 2000);
234
318
 
235
- const partial = await llm.generate(context, { signal: controller1.signal });
319
+ const partial = await complete(model, context, { signal: controller1.signal });
236
320
 
237
321
  // Add the partial response to context
238
322
  context.messages.push(partial);
239
323
  context.messages.push({ role: 'user', content: 'Please continue' });
240
324
 
241
325
  // Continue the conversation
242
- const continuation = await llm.generate(context);
326
+ const continuation = await complete(model, context);
243
327
  ```
244
328
 
245
- When an aborted message (with `stopReason: 'error'`) is resubmitted in the context:
246
- - **OpenAI Responses**: Filters out thinking blocks and tool calls from aborted messages, as API call will fail if incomplete thinking and tool calls are submitted
247
- - **Anthropic, Google, OpenAI Completions**: Send all blocks as-is (text, thinking, tool calls)
329
+ ## APIs, Models, and Providers
330
+
331
+ The library implements 4 API interfaces, each with its own streaming function and options:
332
+
333
+ - **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
334
+ - **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
335
+ - **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
336
+ - **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
337
+
338
+ ### Providers and Models
339
+
340
+ A **provider** offers models through a specific API. For example:
341
+ - **Anthropic** models use the `anthropic-messages` API
342
+ - **Google** models use the `google-generative-ai` API
343
+ - **OpenAI** models use the `openai-responses` API
344
+ - **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
345
+
346
+ ### Querying Providers and Models
347
+
348
+ ```typescript
349
+ import { getProviders, getModels, getModel } from '@mariozechner/pi-ai';
350
+
351
+ // Get all available providers
352
+ const providers = getProviders();
353
+ console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
354
+
355
+ // Get all models from a provider (fully typed)
356
+ const anthropicModels = getModels('anthropic');
357
+ for (const model of anthropicModels) {
358
+ console.log(`${model.id}: ${model.name}`);
359
+ console.log(` API: ${model.api}`); // 'anthropic-messages'
360
+ console.log(` Context: ${model.contextWindow} tokens`);
361
+ console.log(` Vision: ${model.input.includes('image')}`);
362
+ console.log(` Reasoning: ${model.reasoning}`);
363
+ }
364
+
365
+ // Get a specific model (both provider and model ID are auto-completed in IDEs)
366
+ const model = getModel('openai', 'gpt-4o-mini');
367
+ console.log(`Using ${model.name} via ${model.api} API`);
368
+ ```
369
+
370
+ ### Custom Models
371
+
372
+ You can create custom models for local inference servers or custom endpoints:
373
+
374
+ ```typescript
375
+ import { Model, stream } from '@mariozechner/pi-ai';
376
+
377
+ // Example: Ollama using OpenAI-compatible API
378
+ const ollamaModel: Model<'openai-completions'> = {
379
+ id: 'llama-3.1-8b',
380
+ name: 'Llama 3.1 8B (Ollama)',
381
+ api: 'openai-completions',
382
+ provider: 'ollama',
383
+ baseUrl: 'http://localhost:11434/v1',
384
+ reasoning: false,
385
+ input: ['text'],
386
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
387
+ contextWindow: 128000,
388
+ maxTokens: 32000
389
+ };
390
+
391
+ // Use the custom model
392
+ const response = await stream(ollamaModel, context, {
393
+ apiKey: 'dummy' // Ollama doesn't need a real key
394
+ });
395
+ ```
396
+
397
+ ### Type Safety
398
+
399
+ Models are typed by their API, ensuring type-safe options:
400
+
401
+ ```typescript
402
+ // TypeScript knows this is an Anthropic model
403
+ const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
404
+
405
+ // So these options are type-checked for AnthropicOptions
406
+ await stream(claude, context, {
407
+ thinkingEnabled: true, // ✓ Valid for anthropic-messages
408
+ thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
409
+ // reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages
410
+ });
411
+ ```
248
412
 
249
413
  ## Cross-Provider Handoffs
250
414
 
@@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th
255
419
  When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
256
420
 
257
421
  - **User and tool result messages** are passed through unchanged
258
- - **Assistant messages from the same provider/model** are preserved as-is
422
+ - **Assistant messages from the same provider/API** are preserved as-is
259
423
  - **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
260
424
  - **Tool calls and regular text** are preserved unchanged
261
425
 
262
426
  ### Example: Multi-Provider Conversation
263
427
 
264
428
  ```typescript
265
- import { createLLM } from '@mariozechner/pi-ai';
429
+ import { getModel, complete, Context } from '@mariozechner/pi-ai';
266
430
 
267
431
  // Start with Claude
268
- const claude = createLLM('anthropic', 'claude-sonnet-4-0');
269
- const messages = [];
432
+ const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
433
+ const context: Context = {
434
+ messages: []
435
+ };
270
436
 
271
- messages.push({ role: 'user', content: 'What is 25 * 18?' });
272
- const claudeResponse = await claude.generate({ messages }, {
273
- thinking: { enabled: true }
437
+ context.messages.push({ role: 'user', content: 'What is 25 * 18?' });
438
+ const claudeResponse = await complete(claude, context, {
439
+ thinkingEnabled: true
274
440
  });
275
- messages.push(claudeResponse);
441
+ context.messages.push(claudeResponse);
276
442
 
277
443
  // Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
278
- const gpt5 = createLLM('openai', 'gpt-5-mini');
279
- messages.push({ role: 'user', content: 'Is that calculation correct?' });
280
- const gptResponse = await gpt5.generate({ messages });
281
- messages.push(gptResponse);
444
+ const gpt5 = getModel('openai', 'gpt-5-mini');
445
+ context.messages.push({ role: 'user', content: 'Is that calculation correct?' });
446
+ const gptResponse = await complete(gpt5, context);
447
+ context.messages.push(gptResponse);
282
448
 
283
449
  // Switch to Gemini
284
- const gemini = createLLM('google', 'gemini-2.5-flash');
285
- messages.push({ role: 'user', content: 'What was the original question?' });
286
- const geminiResponse = await gemini.generate({ messages });
450
+ const gemini = getModel('google', 'gemini-2.5-flash');
451
+ context.messages.push({ role: 'user', content: 'What was the original question?' });
452
+ const geminiResponse = await complete(gemini, context);
287
453
  ```
288
454
 
289
455
  ### Provider Compatibility
@@ -300,155 +466,65 @@ This enables flexible workflows where you can:
300
466
  - Use specialized models for specific tasks
301
467
  - Maintain conversation continuity across provider outages
302
468
 
303
- ## Provider-Specific Options
304
-
305
- ### OpenAI Reasoning (o1, o3)
306
- ```typescript
307
- const llm = createLLM('openai', 'o1-mini');
469
+ ## Context Serialization
308
470
 
309
- await llm.generate(context, {
310
- reasoningEffort: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
311
- });
312
- ```
471
+ The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
313
472
 
314
- ### Anthropic Thinking
315
473
  ```typescript
316
- const llm = createLLM('anthropic', 'claude-3-5-sonnet-20241022');
317
-
318
- await llm.generate(context, {
319
- thinking: {
320
- enabled: true,
321
- budgetTokens: 2048 // Optional thinking token limit
322
- }
323
- });
324
- ```
474
+ import { Context, getModel, complete } from '@mariozechner/pi-ai';
325
475
 
326
- ### Google Gemini Thinking
327
- ```typescript
328
- const llm = createLLM('google', 'gemini-2.5-pro');
329
-
330
- await llm.generate(context, {
331
- thinking: { enabled: true }
332
- });
333
- ```
334
-
335
- ## Custom Models
336
-
337
- ### Local Models (Ollama, vLLM, etc.)
338
- ```typescript
339
- import { OpenAICompletionsLLM } from '@mariozechner/pi-ai';
340
-
341
- const model = {
342
- id: 'gpt-oss:20b',
343
- provider: 'ollama',
344
- baseUrl: 'http://localhost:11434/v1',
345
- reasoning: false,
346
- input: ['text'],
347
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
348
- contextWindow: 126000,
349
- maxTokens: 32000,
350
- name: 'Llama 3.1 8B'
351
- };
352
-
353
- const llm = new OpenAICompletionsLLM(model, 'dummy-key');
354
- ```
355
-
356
- ### Custom OpenAI-Compatible Endpoints
357
- ```typescript
358
- const model = {
359
- id: 'custom-model',
360
- provider: 'custom',
361
- baseUrl: 'https://your-api.com/v1',
362
- reasoning: true,
363
- input: ['text', 'image'],
364
- cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
365
- contextWindow: 32768,
366
- maxTokens: 8192,
367
- name: 'Custom Model'
476
+ // Create and use a context
477
+ const context: Context = {
478
+ systemPrompt: 'You are a helpful assistant.',
479
+ messages: [
480
+ { role: 'user', content: 'What is TypeScript?' }
481
+ ]
368
482
  };
369
483
 
370
- const llm = new OpenAICompletionsLLM(model, 'your-api-key');
371
- ```
372
-
373
- ## Model Discovery
374
-
375
- All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time.
484
+ const model = getModel('openai', 'gpt-4o-mini');
485
+ const response = await complete(model, context);
486
+ context.messages.push(response);
376
487
 
377
- ### List Available Models
378
- ```typescript
379
- import { PROVIDERS } from '@mariozechner/pi-ai';
488
+ // Serialize the entire context
489
+ const serialized = JSON.stringify(context);
490
+ console.log('Serialized context size:', serialized.length, 'bytes');
380
491
 
381
- // List all OpenAI models (all support tool calling)
382
- for (const [modelId, model] of Object.entries(PROVIDERS.openai.models)) {
383
- console.log(`${modelId}: ${model.name}`);
384
- console.log(` Context: ${model.contextWindow} tokens`);
385
- console.log(` Reasoning: ${model.reasoning}`);
386
- console.log(` Vision: ${model.input.includes('image')}`);
387
- console.log(` Cost: $${model.cost.input}/$${model.cost.output} per million tokens`);
388
- }
492
+ // Save to database, localStorage, file, etc.
493
+ localStorage.setItem('conversation', serialized);
389
494
 
390
- // Find all models with reasoning support
391
- const reasoningModels = [];
392
- for (const provider of Object.values(PROVIDERS)) {
393
- for (const model of Object.values(provider.models)) {
394
- if (model.reasoning) {
395
- reasoningModels.push(model);
396
- }
397
- }
398
- }
495
+ // Later: deserialize and continue the conversation
496
+ const restored: Context = JSON.parse(localStorage.getItem('conversation')!);
497
+ restored.messages.push({ role: 'user', content: 'Tell me more about its type system' });
399
498
 
400
- // Find all vision-capable models
401
- const visionModels = [];
402
- for (const provider of Object.values(PROVIDERS)) {
403
- for (const model of Object.values(provider.models)) {
404
- if (model.input.includes('image')) {
405
- visionModels.push(model);
406
- }
407
- }
408
- }
499
+ // Continue with any model
500
+ const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022');
501
+ const continuation = await complete(newModel, restored);
409
502
  ```
410
503
 
411
- ### Check Model Capabilities
412
- ```typescript
413
- import { getModel } from '@mariozechner/pi-ai';
414
-
415
- const model = getModel('openai', 'gpt-4o-mini');
416
- if (model) {
417
- console.log(`Model: ${model.name}`);
418
- console.log(`Provider: ${model.provider}`);
419
- console.log(`Context window: ${model.contextWindow} tokens`);
420
- console.log(`Max output: ${model.maxTokens} tokens`);
421
- console.log(`Supports reasoning: ${model.reasoning}`);
422
- console.log(`Supports images: ${model.input.includes('image')}`);
423
- console.log(`Input cost: $${model.cost.input} per million tokens`);
424
- console.log(`Output cost: $${model.cost.output} per million tokens`);
425
- console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`);
426
- console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`);
427
- }
428
- ```
504
+ > **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
429
505
 
430
506
  ## Browser Usage
431
507
 
432
508
  The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
433
509
 
434
510
  ```typescript
435
- import { createLLM } from '@mariozechner/pi-ai';
511
+ import { getModel, complete } from '@mariozechner/pi-ai';
436
512
 
437
513
  // API key must be passed explicitly in browser
438
- const llm = createLLM('anthropic', 'claude-3-5-haiku-20241022', {
439
- apiKey: 'your-api-key'
440
- });
514
+ const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
441
515
 
442
- const response = await llm.generate({
516
+ const response = await complete(model, {
443
517
  messages: [{ role: 'user', content: 'Hello!' }]
518
+ }, {
519
+ apiKey: 'your-api-key'
444
520
  });
445
521
  ```
446
522
 
447
523
  > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
448
524
 
449
- ## Environment Variables
525
+ ### Environment Variables (Node.js only)
450
526
 
451
- Set these environment variables to use `createLLM` without passing API keys:
527
+ In Node.js environments, you can set environment variables to avoid passing API keys:
452
528
 
453
529
  ```bash
454
530
  OPENAI_API_KEY=sk-...
@@ -460,13 +536,17 @@ XAI_API_KEY=xai-...
460
536
  OPENROUTER_API_KEY=sk-or-...
461
537
  ```
462
538
 
463
- When set, you can omit the API key parameter:
539
+ When set, the library automatically uses these keys:
540
+
464
541
  ```typescript
465
542
  // Uses OPENAI_API_KEY from environment
466
- const llm = createLLM('openai', 'gpt-4o-mini');
543
+ const model = getModel('openai', 'gpt-4o-mini');
544
+ const response = await complete(model, context);
467
545
 
468
- // Or pass explicitly
469
- const llm = createLLM('openai', 'gpt-4o-mini', 'sk-...');
546
+ // Or override with explicit key
547
+ const response = await complete(model, context, {
548
+ apiKey: 'sk-different-key'
549
+ });
470
550
  ```
471
551
 
472
552
  ## License