npm - @mariozechner/pi-ai - Versions diffs - 0.5.27 → 0.5.28 - Mend

@mariozechner/pi-ai 0.5.27 → 0.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +355 -275
package/dist/generate.d.ts +22 -0
package/dist/generate.d.ts.map +1 -0
package/dist/generate.js +204 -0
package/dist/generate.js.map +1 -0
package/dist/index.d.ts +7 -8
package/dist/index.d.ts.map +1 -1
package/dist/index.js +7 -12
package/dist/index.js.map +1 -1
package/dist/models.d.ts +10 -71
package/dist/models.d.ts.map +1 -1
package/dist/models.generated.d.ts +3056 -2659
package/dist/models.generated.d.ts.map +1 -1
package/dist/models.generated.js +3063 -2663
package/dist/models.generated.js.map +1 -1
package/dist/models.js +17 -59
package/dist/models.js.map +1 -1
package/dist/providers/anthropic.d.ts +5 -18
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +249 -227
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/google.d.ts +3 -14
package/dist/providers/google.d.ts.map +1 -1
package/dist/providers/google.js +215 -220
package/dist/providers/google.js.map +1 -1
package/dist/providers/openai-completions.d.ts +4 -14
package/dist/providers/openai-completions.d.ts.map +1 -1
package/dist/providers/openai-completions.js +247 -215
package/dist/providers/openai-completions.js.map +1 -1
package/dist/providers/openai-responses.d.ts +6 -13
package/dist/providers/openai-responses.d.ts.map +1 -1
package/dist/providers/openai-responses.js +242 -244
package/dist/providers/openai-responses.js.map +1 -1
package/dist/providers/utils.d.ts +2 -14
package/dist/providers/utils.d.ts.map +1 -1
package/dist/providers/utils.js +2 -15
package/dist/providers/utils.js.map +1 -1
package/dist/types.d.ts +39 -16
package/dist/types.d.ts.map +1 -1
package/dist/types.js +1 -0
package/dist/types.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai
 ## Quick Start
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai';
-const llm = createLLM('openai', 'gpt-4o-mini');
+// Fully typed with auto-complete support for both providers and models
+const model = getModel('openai', 'gpt-4o-mini');
-const response = await llm.generate({
-  messages: [{ role: 'user', content: 'Hello!' }]
-});
+// Define tools
+const tools: Tool[] = [{
+  name: 'get_time',
+  description: 'Get the current time',
+  parameters: {
+    type: 'object',
+    properties: {},
+    required: []
+  }
+}];
+// Build a conversation context (easily serializable and transferable between models)
+const context: Context = {
+  systemPrompt: 'You are a helpful assistant.',
+  messages: [{ role: 'user', content: 'What time is it?' }],
+  tools
+};
+// Option 1: Streaming with all event types
+const s = stream(model, context);
+for await (const event of s) {
+  switch (event.type) {
+    case 'start':
+      console.log(`Starting with ${event.partial.model}`);
+      break;
+    case 'text_start':
+      console.log('\n[Text started]');
+      break;
+    case 'text_delta':
+      process.stdout.write(event.delta);
+      break;
+    case 'text_end':
+      console.log('\n[Text ended]');
+      break;
+    case 'thinking_start':
+      console.log('[Model is thinking...]');
+      break;
+    case 'thinking_delta':
+      process.stdout.write(event.delta);
+      break;
+    case 'thinking_end':
+      console.log('[Thinking complete]');
+      break;
+    case 'toolCall':
+      console.log(`\nTool called: ${event.toolCall.name}`);
+      break;
+    case 'done':
+      console.log(`\nFinished: ${event.reason}`);
+      break;
+    case 'error':
+      console.error(`Error: ${event.error}`);
+      break;
+  }
+}
+// Get the final message after streaming, add it to the context
+const finalMessage = await s.finalMessage();
+context.messages.push(finalMessage);
+// Handle tool calls if any
+const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall');
+for (const call of toolCalls) {
+  // Execute the tool
+  const result = call.name === 'get_time'
+    ? new Date().toISOString()
+    : 'Unknown tool';
+  // Add tool result to context
+  context.messages.push({
+    role: 'toolResult',
+    toolCallId: call.id,
+    toolName: call.name,
+    content: result,
+    isError: false
+  });
+}
+// Continue if there were tool calls
+if (toolCalls.length > 0) {
+  const continuation = await complete(model, context);
+  context.messages.push(continuation);
+  console.log('After tool execution:', continuation.content);
+}
+console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
+console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
+// Option 2: Get complete response without streaming
+const response = await complete(model, context);
-// response.content is an array of content blocks
 for (const block of response.content) {
   if (block.type === 'text') {
     console.log(block.text);
+  } else if (block.type === 'toolCall') {
+    console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
   }
 }
 ```
 ## Image Input
+Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
 ```typescript
 import { readFileSync } from 'fs';
+import { getModel, complete } from '@mariozechner/pi-ai';
+const model = getModel('openai', 'gpt-4o-mini');
+// Check if model supports images
+if (model.input.includes('image')) {
+  console.log('Model supports vision');
+}
 const imageBuffer = readFileSync('image.png');
 const base64Image = imageBuffer.toString('base64');
-const response = await llm.generate({
+const response = await complete(model, {
   messages: [{
     role: 'user',
     content: [
@@ -57,166 +156,151 @@ const response = await llm.generate({
     ]
   }]
 });
+// Access the response
+for (const block of response.content) {
+  if (block.type === 'text') {
+    console.log(block.text);
+  }
+}
 ```
-## Tool Calling
+## Thinking/Reasoning
+Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
+### Unified Interface (streamSimple/completeSimple)
 ```typescript
-const tools = [{
-  name: 'get_weather',
-  description: 'Get current weather for a location',
-  parameters: {
-    type: 'object',
-    properties: {
-      location: { type: 'string' }
-    },
-    required: ['location']
+import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai';
+// Many models across providers support thinking/reasoning
+const model = getModel('anthropic', 'claude-sonnet-4-20250514');
+// or getModel('openai', 'gpt-5-mini');
+// or getModel('google', 'gemini-2.5-flash');
+// or getModel('xai', 'grok-code-fast-1');
+// or getModel('groq', 'openai/gpt-oss-20b');
+// or getModel('cerebras', 'gpt-oss-120b');
+// or getModel('openrouter', 'z-ai/glm-4.5v');
+// Check if model supports reasoning
+if (model.reasoning) {
+  console.log('Model supports reasoning/thinking');
+}
+// Use the simplified reasoning option
+const response = await completeSimple(model, {
+  messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
+}, {
+  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
+});
+// Access thinking and text blocks
+for (const block of response.content) {
+  if (block.type === 'thinking') {
+    console.log('Thinking:', block.thinking);
+  } else if (block.type === 'text') {
+    console.log('Response:', block.text);
   }
-}];
+}
+```
-const messages = [];
-messages.push({ role: 'user', content: 'What is the weather in Paris?' });
+### Provider-Specific Options (stream/complete)
-const response = await llm.generate({ messages, tools });
-messages.push(response);
+For fine-grained control, use the provider-specific options:
-// Check for tool calls in the content blocks
-const toolCalls = response.content.filter(block => block.type === 'toolCall');
+```typescript
+import { getModel, complete } from '@mariozechner/pi-ai';
-for (const call of toolCalls) {
-  // Call your actual function
-  const result = await getWeather(call.arguments.location);
+// OpenAI Reasoning (o1, o3, gpt-5)
+const openaiModel = getModel('openai', 'gpt-5-mini');
+await complete(openaiModel, context, {
+  reasoningEffort: 'medium',
+  reasoningSummary: 'detailed'  // OpenAI Responses API only
+});
-  // Add tool result to context
-  messages.push({
-    role: 'toolResult',
-    content: JSON.stringify(result),
-    toolCallId: call.id,
-    toolName: call.name,
-    isError: false
-  });
-}
+// Anthropic Thinking (Claude Sonnet 4)
+const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514');
+await complete(anthropicModel, context, {
+  thinkingEnabled: true,
+  thinkingBudgetTokens: 8192  // Optional token limit
+});
-if (toolCalls.length > 0) {
-  // Continue conversation with tool results
-  const followUp = await llm.generate({ messages, tools });
-  messages.push(followUp);
-  // Print text blocks from the response
-  for (const block of followUp.content) {
-    if (block.type === 'text') {
-      console.log(block.text);
-    }
+// Google Gemini Thinking
+const googleModel = getModel('google', 'gemini-2.5-flash');
+await complete(googleModel, context, {
+  thinking: {
+    enabled: true,
+    budgetTokens: 8192  // -1 for dynamic, 0 to disable
   }
-}
+});
 ```
-## Streaming
+### Streaming Thinking Content
+When streaming, thinking content is delivered through specific events:
 ```typescript
-const response = await llm.generate({
-  messages: [{ role: 'user', content: 'Write a story' }]
-}, {
-  onEvent: (event) => {
-    switch (event.type) {
-      case 'start':
-        console.log(`Starting ${event.provider} ${event.model}`);
-        break;
-      case 'text_start':
-        console.log('[Starting text block]');
-        break;
-      case 'text_delta':
-        process.stdout.write(event.delta);
-        break;
-      case 'text_end':
-        console.log(`\n[Text block complete: ${event.content.length} chars]`);
-        break;
-      case 'thinking_start':
-        console.error('[Starting thinking]');
-        break;
-      case 'thinking_delta':
-        process.stderr.write(event.delta);
-        break;
-      case 'thinking_end':
-        console.error(`\n[Thinking complete: ${event.content.length} chars]`);
-        break;
-      case 'toolCall':
-        console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`);
-        break;
-      case 'done':
-        console.log(`Completed with reason: ${event.reason}`);
-        console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`);
-        break;
-      case 'error':
-        console.error('Error:', event.error);
-        break;
-    }
+const s = streamSimple(model, context, { reasoning: 'high' });
+for await (const event of s) {
+  switch (event.type) {
+    case 'thinking_start':
+      console.log('[Model started thinking]');
+      break;
+    case 'thinking_delta':
+      process.stdout.write(event.delta);  // Stream thinking content
+      break;
+    case 'thinking_end':
+      console.log('\n[Thinking complete]');
+      break;
   }
-});
+}
 ```
-## Abort Signal
+## Errors & Abort Signal
-The abort signal allows you to cancel in-progress requests. When aborted, providers return partial results accumulated up to the cancellation point, including accurate token counts and cost estimates.
+When a request ends with an error (including aborts), the API returns an `AssistantMessage` with:
+- `stopReason: 'error'` - Indicates the request ended with an error
+- `error: string` - Error message describing what happened
+- `content: array` - **Partial content** accumulated before the error
+- `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred)
-### Basic Usage
+### Aborting
+The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`.
 ```typescript
+import { getModel, stream } from '@mariozechner/pi-ai';
+const model = getModel('openai', 'gpt-4o-mini');
 const controller = new AbortController();
 // Abort after 2 seconds
 setTimeout(() => controller.abort(), 2000);
-const response = await llm.generate({
+const s = stream(model, {
   messages: [{ role: 'user', content: 'Write a long story' }]
 }, {
-  signal: controller.signal,
-  onEvent: (event) => {
-    if (event.type === 'text_delta') {
-      process.stdout.write(event.delta);
-    }
-  }
+  signal: controller.signal
 });
-// Check if the request was aborted
-if (response.stopReason === 'error' && response.error) {
-  console.log('Request was aborted:', response.error);
+for await (const event of s) {
+  if (event.type === 'text_delta') {
+    process.stdout.write(event.delta);
+  } else if (event.type === 'error') {
+    console.log('Error:', event.error);
+  }
+}
+// Get results (may be partial if aborted)
+const response = await s.finalMessage();
+if (response.stopReason === 'error') {
+  console.log('Error:', response.error);
   console.log('Partial content received:', response.content);
   console.log('Tokens used:', response.usage);
-} else {
-  console.log('Request completed successfully');
 }
 ```
-### Partial Results and Token Tracking
-When a request is aborted, the API returns an `AssistantMessage` with:
-- `stopReason: 'error'` - Indicates the request was aborted
-- `error: string` - Error message describing the abort
-- `content: array` - **Partial content** accumulated before the abort
-- `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred)
-```typescript
-// Example: User interrupts a long-running request
-const controller = new AbortController();
-document.getElementById('stop-button').onclick = () => controller.abort();
-const response = await llm.generate(context, {
-  signal: controller.signal,
-  onEvent: (e) => {
-    if (e.type === 'text_delta') updateUI(e.delta);
-  }
-});
-// Even if aborted, you get:
-// - Partial text that was streamed
-// - Token count (may be partial/estimated)
-// - Cost calculations (may be incomplete)
-console.log(`Generated ${response.content.length} content blocks`);
-console.log(`Estimated ${response.usage.output} output tokens`);
-console.log(`Estimated cost: $${response.usage.cost.total}`);
-```
 ### Continuing After Abort
 Aborted messages can be added to the conversation context and continued in subsequent requests:
@@ -232,19 +316,99 @@ const context = {
 const controller1 = new AbortController();
 setTimeout(() => controller1.abort(), 2000);
-const partial = await llm.generate(context, { signal: controller1.signal });
+const partial = await complete(model, context, { signal: controller1.signal });
 // Add the partial response to context
 context.messages.push(partial);
 context.messages.push({ role: 'user', content: 'Please continue' });
 // Continue the conversation
-const continuation = await llm.generate(context);
+const continuation = await complete(model, context);
 ```
-When an aborted message (with `stopReason: 'error'`) is resubmitted in the context:
-- **OpenAI Responses**: Filters out thinking blocks and tool calls from aborted messages, as API call will fail if incomplete thinking and tool calls are submitted
-- **Anthropic, Google, OpenAI Completions**: Send all blocks as-is (text, thinking, tool calls)
+## APIs, Models, and Providers
+The library implements 4 API interfaces, each with its own streaming function and options:
+- **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
+- **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
+- **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
+- **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
+### Providers and Models
+A **provider** offers models through a specific API. For example:
+- **Anthropic** models use the `anthropic-messages` API
+- **Google** models use the `google-generative-ai` API
+- **OpenAI** models use the `openai-responses` API
+- **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
+### Querying Providers and Models
+```typescript
+import { getProviders, getModels, getModel } from '@mariozechner/pi-ai';
+// Get all available providers
+const providers = getProviders();
+console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
+// Get all models from a provider (fully typed)
+const anthropicModels = getModels('anthropic');
+for (const model of anthropicModels) {
+  console.log(`${model.id}: ${model.name}`);
+  console.log(`  API: ${model.api}`); // 'anthropic-messages'
+  console.log(`  Context: ${model.contextWindow} tokens`);
+  console.log(`  Vision: ${model.input.includes('image')}`);
+  console.log(`  Reasoning: ${model.reasoning}`);
+}
+// Get a specific model (both provider and model ID are auto-completed in IDEs)
+const model = getModel('openai', 'gpt-4o-mini');
+console.log(`Using ${model.name} via ${model.api} API`);
+```
+### Custom Models
+You can create custom models for local inference servers or custom endpoints:
+```typescript
+import { Model, stream } from '@mariozechner/pi-ai';
+// Example: Ollama using OpenAI-compatible API
+const ollamaModel: Model<'openai-completions'> = {
+  id: 'llama-3.1-8b',
+  name: 'Llama 3.1 8B (Ollama)',
+  api: 'openai-completions',
+  provider: 'ollama',
+  baseUrl: 'http://localhost:11434/v1',
+  reasoning: false,
+  input: ['text'],
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  contextWindow: 128000,
+  maxTokens: 32000
+};
+// Use the custom model
+const response = await stream(ollamaModel, context, {
+  apiKey: 'dummy' // Ollama doesn't need a real key
+});
+```
+### Type Safety
+Models are typed by their API, ensuring type-safe options:
+```typescript
+// TypeScript knows this is an Anthropic model
+const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
+// So these options are type-checked for AnthropicOptions
+await stream(claude, context, {
+  thinkingEnabled: true,      // ✓ Valid for anthropic-messages
+  thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
+  // reasoningEffort: 'high'  // ✗ TypeScript error: not valid for anthropic-messages
+});
+```
 ## Cross-Provider Handoffs
@@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th
 When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
 - **User and tool result messages** are passed through unchanged
-- **Assistant messages from the same provider/model** are preserved as-is
+- **Assistant messages from the same provider/API** are preserved as-is
 - **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
 - **Tool calls and regular text** are preserved unchanged
 ### Example: Multi-Provider Conversation
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, complete, Context } from '@mariozechner/pi-ai';
 // Start with Claude
-const claude = createLLM('anthropic', 'claude-sonnet-4-0');
-const messages = [];
+const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
+const context: Context = {
+  messages: []
+};
-messages.push({ role: 'user', content: 'What is 25 * 18?' });
-const claudeResponse = await claude.generate({ messages }, {
-  thinking: { enabled: true }
+context.messages.push({ role: 'user', content: 'What is 25 * 18?' });
+const claudeResponse = await complete(claude, context, {
+  thinkingEnabled: true
 });
-messages.push(claudeResponse);
+context.messages.push(claudeResponse);
 // Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
-const gpt5 = createLLM('openai', 'gpt-5-mini');
-messages.push({ role: 'user', content: 'Is that calculation correct?' });
-const gptResponse = await gpt5.generate({ messages });
-messages.push(gptResponse);
+const gpt5 = getModel('openai', 'gpt-5-mini');
+context.messages.push({ role: 'user', content: 'Is that calculation correct?' });
+const gptResponse = await complete(gpt5, context);
+context.messages.push(gptResponse);
 // Switch to Gemini
-const gemini = createLLM('google', 'gemini-2.5-flash');
-messages.push({ role: 'user', content: 'What was the original question?' });
-const geminiResponse = await gemini.generate({ messages });
+const gemini = getModel('google', 'gemini-2.5-flash');
+context.messages.push({ role: 'user', content: 'What was the original question?' });
+const geminiResponse = await complete(gemini, context);
 ```
 ### Provider Compatibility
@@ -300,155 +466,65 @@ This enables flexible workflows where you can:
 - Use specialized models for specific tasks
 - Maintain conversation continuity across provider outages
-## Provider-Specific Options
-### OpenAI Reasoning (o1, o3)
-```typescript
-const llm = createLLM('openai', 'o1-mini');
+## Context Serialization
-await llm.generate(context, {
-  reasoningEffort: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
-});
-```
+The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
-### Anthropic Thinking
 ```typescript
-const llm = createLLM('anthropic', 'claude-3-5-sonnet-20241022');
-await llm.generate(context, {
-  thinking: {
-    enabled: true,
-    budgetTokens: 2048  // Optional thinking token limit
-  }
-});
-```
+import { Context, getModel, complete } from '@mariozechner/pi-ai';
-### Google Gemini Thinking
-```typescript
-const llm = createLLM('google', 'gemini-2.5-pro');
-await llm.generate(context, {
-  thinking: { enabled: true }
-});
-```
-## Custom Models
-### Local Models (Ollama, vLLM, etc.)
-```typescript
-import { OpenAICompletionsLLM } from '@mariozechner/pi-ai';
-const model = {
-  id: 'gpt-oss:20b',
-  provider: 'ollama',
-  baseUrl: 'http://localhost:11434/v1',
-  reasoning: false,
-  input: ['text'],
-  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-  contextWindow: 126000,
-  maxTokens: 32000,
-  name: 'Llama 3.1 8B'
-};
-const llm = new OpenAICompletionsLLM(model, 'dummy-key');
-```
-### Custom OpenAI-Compatible Endpoints
-```typescript
-const model = {
-  id: 'custom-model',
-  provider: 'custom',
-  baseUrl: 'https://your-api.com/v1',
-  reasoning: true,
-  input: ['text', 'image'],
-  cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
-  contextWindow: 32768,
-  maxTokens: 8192,
-  name: 'Custom Model'
+// Create and use a context
+const context: Context = {
+  systemPrompt: 'You are a helpful assistant.',
+  messages: [
+    { role: 'user', content: 'What is TypeScript?' }
+  ]
 };
-const llm = new OpenAICompletionsLLM(model, 'your-api-key');
-```
-## Model Discovery
-All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time.
+const model = getModel('openai', 'gpt-4o-mini');
+const response = await complete(model, context);
+context.messages.push(response);
-### List Available Models
-```typescript
-import { PROVIDERS } from '@mariozechner/pi-ai';
+// Serialize the entire context
+const serialized = JSON.stringify(context);
+console.log('Serialized context size:', serialized.length, 'bytes');
-// List all OpenAI models (all support tool calling)
-for (const [modelId, model] of Object.entries(PROVIDERS.openai.models)) {
-  console.log(`${modelId}: ${model.name}`);
-  console.log(`  Context: ${model.contextWindow} tokens`);
-  console.log(`  Reasoning: ${model.reasoning}`);
-  console.log(`  Vision: ${model.input.includes('image')}`);
-  console.log(`  Cost: $${model.cost.input}/$${model.cost.output} per million tokens`);
-}
+// Save to database, localStorage, file, etc.
+localStorage.setItem('conversation', serialized);
-// Find all models with reasoning support
-const reasoningModels = [];
-for (const provider of Object.values(PROVIDERS)) {
-  for (const model of Object.values(provider.models)) {
-    if (model.reasoning) {
-      reasoningModels.push(model);
-    }
-  }
-}
+// Later: deserialize and continue the conversation
+const restored: Context = JSON.parse(localStorage.getItem('conversation')!);
+restored.messages.push({ role: 'user', content: 'Tell me more about its type system' });
-// Find all vision-capable models
-const visionModels = [];
-for (const provider of Object.values(PROVIDERS)) {
-  for (const model of Object.values(provider.models)) {
-    if (model.input.includes('image')) {
-      visionModels.push(model);
-    }
-  }
-}
+// Continue with any model
+const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022');
+const continuation = await complete(newModel, restored);
 ```
-### Check Model Capabilities
-```typescript
-import { getModel } from '@mariozechner/pi-ai';
-const model = getModel('openai', 'gpt-4o-mini');
-if (model) {
-  console.log(`Model: ${model.name}`);
-  console.log(`Provider: ${model.provider}`);
-  console.log(`Context window: ${model.contextWindow} tokens`);
-  console.log(`Max output: ${model.maxTokens} tokens`);
-  console.log(`Supports reasoning: ${model.reasoning}`);
-  console.log(`Supports images: ${model.input.includes('image')}`);
-  console.log(`Input cost: $${model.cost.input} per million tokens`);
-  console.log(`Output cost: $${model.cost.output} per million tokens`);
-  console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`);
-  console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`);
-}
-```
+> **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
 ## Browser Usage
 The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, complete } from '@mariozechner/pi-ai';
 // API key must be passed explicitly in browser
-const llm = createLLM('anthropic', 'claude-3-5-haiku-20241022', {
-  apiKey: 'your-api-key'
-});
+const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
-const response = await llm.generate({
+const response = await complete(model, {
   messages: [{ role: 'user', content: 'Hello!' }]
+}, {
+  apiKey: 'your-api-key'
 });
 ```
 > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
-## Environment Variables
+### Environment Variables (Node.js only)
-Set these environment variables to use `createLLM` without passing API keys:
+In Node.js environments, you can set environment variables to avoid passing API keys:
 ```bash
 OPENAI_API_KEY=sk-...
@@ -460,13 +536,17 @@ XAI_API_KEY=xai-...
 OPENROUTER_API_KEY=sk-or-...
 ```
-When set, you can omit the API key parameter:
+When set, the library automatically uses these keys:
 ```typescript
 // Uses OPENAI_API_KEY from environment
-const llm = createLLM('openai', 'gpt-4o-mini');
+const model = getModel('openai', 'gpt-4o-mini');
+const response = await complete(model, context);
-// Or pass explicitly
-const llm = createLLM('openai', 'gpt-4o-mini', 'sk-...');
+// Or override with explicit key
+const response = await complete(model, context, {
+  apiKey: 'sk-different-key'
+});
 ```
 ## License