@mariozechner/pi-ai 0.5.27 → 0.5.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +355 -275
- package/dist/generate.d.ts +22 -0
- package/dist/generate.d.ts.map +1 -0
- package/dist/generate.js +204 -0
- package/dist/generate.js.map +1 -0
- package/dist/index.d.ts +7 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -12
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +10 -71
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +3056 -2659
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +3063 -2663
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +17 -59
- package/dist/models.js.map +1 -1
- package/dist/providers/anthropic.d.ts +5 -18
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +249 -227
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/google.d.ts +3 -14
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +215 -220
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/openai-completions.d.ts +4 -14
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +247 -215
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses.d.ts +6 -13
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +242 -244
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/utils.d.ts +2 -14
- package/dist/providers/utils.d.ts.map +1 -1
- package/dist/providers/utils.js +2 -15
- package/dist/providers/utils.js.map +1 -1
- package/dist/types.d.ts +39 -16
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -0
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai
|
|
|
24
24
|
## Quick Start
|
|
25
25
|
|
|
26
26
|
```typescript
|
|
27
|
-
import {
|
|
27
|
+
import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai';
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
// Fully typed with auto-complete support for both providers and models
|
|
30
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
// Define tools
|
|
33
|
+
const tools: Tool[] = [{
|
|
34
|
+
name: 'get_time',
|
|
35
|
+
description: 'Get the current time',
|
|
36
|
+
parameters: {
|
|
37
|
+
type: 'object',
|
|
38
|
+
properties: {},
|
|
39
|
+
required: []
|
|
40
|
+
}
|
|
41
|
+
}];
|
|
42
|
+
|
|
43
|
+
// Build a conversation context (easily serializable and transferable between models)
|
|
44
|
+
const context: Context = {
|
|
45
|
+
systemPrompt: 'You are a helpful assistant.',
|
|
46
|
+
messages: [{ role: 'user', content: 'What time is it?' }],
|
|
47
|
+
tools
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// Option 1: Streaming with all event types
|
|
51
|
+
const s = stream(model, context);
|
|
52
|
+
|
|
53
|
+
for await (const event of s) {
|
|
54
|
+
switch (event.type) {
|
|
55
|
+
case 'start':
|
|
56
|
+
console.log(`Starting with ${event.partial.model}`);
|
|
57
|
+
break;
|
|
58
|
+
case 'text_start':
|
|
59
|
+
console.log('\n[Text started]');
|
|
60
|
+
break;
|
|
61
|
+
case 'text_delta':
|
|
62
|
+
process.stdout.write(event.delta);
|
|
63
|
+
break;
|
|
64
|
+
case 'text_end':
|
|
65
|
+
console.log('\n[Text ended]');
|
|
66
|
+
break;
|
|
67
|
+
case 'thinking_start':
|
|
68
|
+
console.log('[Model is thinking...]');
|
|
69
|
+
break;
|
|
70
|
+
case 'thinking_delta':
|
|
71
|
+
process.stdout.write(event.delta);
|
|
72
|
+
break;
|
|
73
|
+
case 'thinking_end':
|
|
74
|
+
console.log('[Thinking complete]');
|
|
75
|
+
break;
|
|
76
|
+
case 'toolCall':
|
|
77
|
+
console.log(`\nTool called: ${event.toolCall.name}`);
|
|
78
|
+
break;
|
|
79
|
+
case 'done':
|
|
80
|
+
console.log(`\nFinished: ${event.reason}`);
|
|
81
|
+
break;
|
|
82
|
+
case 'error':
|
|
83
|
+
console.error(`Error: ${event.error}`);
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Get the final message after streaming, add it to the context
|
|
89
|
+
const finalMessage = await s.finalMessage();
|
|
90
|
+
context.messages.push(finalMessage);
|
|
91
|
+
|
|
92
|
+
// Handle tool calls if any
|
|
93
|
+
const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall');
|
|
94
|
+
for (const call of toolCalls) {
|
|
95
|
+
// Execute the tool
|
|
96
|
+
const result = call.name === 'get_time'
|
|
97
|
+
? new Date().toISOString()
|
|
98
|
+
: 'Unknown tool';
|
|
99
|
+
|
|
100
|
+
// Add tool result to context
|
|
101
|
+
context.messages.push({
|
|
102
|
+
role: 'toolResult',
|
|
103
|
+
toolCallId: call.id,
|
|
104
|
+
toolName: call.name,
|
|
105
|
+
content: result,
|
|
106
|
+
isError: false
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Continue if there were tool calls
|
|
111
|
+
if (toolCalls.length > 0) {
|
|
112
|
+
const continuation = await complete(model, context);
|
|
113
|
+
context.messages.push(continuation);
|
|
114
|
+
console.log('After tool execution:', continuation.content);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
|
|
118
|
+
console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
|
|
119
|
+
|
|
120
|
+
// Option 2: Get complete response without streaming
|
|
121
|
+
const response = await complete(model, context);
|
|
34
122
|
|
|
35
|
-
// response.content is an array of content blocks
|
|
36
123
|
for (const block of response.content) {
|
|
37
124
|
if (block.type === 'text') {
|
|
38
125
|
console.log(block.text);
|
|
126
|
+
} else if (block.type === 'toolCall') {
|
|
127
|
+
console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
|
|
39
128
|
}
|
|
40
129
|
}
|
|
41
130
|
```
|
|
42
131
|
|
|
43
132
|
## Image Input
|
|
44
133
|
|
|
134
|
+
Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
|
|
135
|
+
|
|
45
136
|
```typescript
|
|
46
137
|
import { readFileSync } from 'fs';
|
|
138
|
+
import { getModel, complete } from '@mariozechner/pi-ai';
|
|
139
|
+
|
|
140
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
141
|
+
|
|
142
|
+
// Check if model supports images
|
|
143
|
+
if (model.input.includes('image')) {
|
|
144
|
+
console.log('Model supports vision');
|
|
145
|
+
}
|
|
47
146
|
|
|
48
147
|
const imageBuffer = readFileSync('image.png');
|
|
49
148
|
const base64Image = imageBuffer.toString('base64');
|
|
50
149
|
|
|
51
|
-
const response = await
|
|
150
|
+
const response = await complete(model, {
|
|
52
151
|
messages: [{
|
|
53
152
|
role: 'user',
|
|
54
153
|
content: [
|
|
@@ -57,166 +156,151 @@ const response = await llm.generate({
|
|
|
57
156
|
]
|
|
58
157
|
}]
|
|
59
158
|
});
|
|
159
|
+
|
|
160
|
+
// Access the response
|
|
161
|
+
for (const block of response.content) {
|
|
162
|
+
if (block.type === 'text') {
|
|
163
|
+
console.log(block.text);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
60
166
|
```
|
|
61
167
|
|
|
62
|
-
##
|
|
168
|
+
## Thinking/Reasoning
|
|
169
|
+
|
|
170
|
+
Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
|
|
171
|
+
|
|
172
|
+
### Unified Interface (streamSimple/completeSimple)
|
|
63
173
|
|
|
64
174
|
```typescript
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
175
|
+
import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai';
|
|
176
|
+
|
|
177
|
+
// Many models across providers support thinking/reasoning
|
|
178
|
+
const model = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
179
|
+
// or getModel('openai', 'gpt-5-mini');
|
|
180
|
+
// or getModel('google', 'gemini-2.5-flash');
|
|
181
|
+
// or getModel('xai', 'grok-code-fast-1');
|
|
182
|
+
// or getModel('groq', 'openai/gpt-oss-20b');
|
|
183
|
+
// or getModel('cerebras', 'gpt-oss-120b');
|
|
184
|
+
// or getModel('openrouter', 'z-ai/glm-4.5v');
|
|
185
|
+
|
|
186
|
+
// Check if model supports reasoning
|
|
187
|
+
if (model.reasoning) {
|
|
188
|
+
console.log('Model supports reasoning/thinking');
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Use the simplified reasoning option
|
|
192
|
+
const response = await completeSimple(model, {
|
|
193
|
+
messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
|
|
194
|
+
}, {
|
|
195
|
+
reasoning: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// Access thinking and text blocks
|
|
199
|
+
for (const block of response.content) {
|
|
200
|
+
if (block.type === 'thinking') {
|
|
201
|
+
console.log('Thinking:', block.thinking);
|
|
202
|
+
} else if (block.type === 'text') {
|
|
203
|
+
console.log('Response:', block.text);
|
|
74
204
|
}
|
|
75
|
-
}
|
|
205
|
+
}
|
|
206
|
+
```
|
|
76
207
|
|
|
77
|
-
|
|
78
|
-
messages.push({ role: 'user', content: 'What is the weather in Paris?' });
|
|
208
|
+
### Provider-Specific Options (stream/complete)
|
|
79
209
|
|
|
80
|
-
|
|
81
|
-
messages.push(response);
|
|
210
|
+
For fine-grained control, use the provider-specific options:
|
|
82
211
|
|
|
83
|
-
|
|
84
|
-
|
|
212
|
+
```typescript
|
|
213
|
+
import { getModel, complete } from '@mariozechner/pi-ai';
|
|
85
214
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
215
|
+
// OpenAI Reasoning (o1, o3, gpt-5)
|
|
216
|
+
const openaiModel = getModel('openai', 'gpt-5-mini');
|
|
217
|
+
await complete(openaiModel, context, {
|
|
218
|
+
reasoningEffort: 'medium',
|
|
219
|
+
reasoningSummary: 'detailed' // OpenAI Responses API only
|
|
220
|
+
});
|
|
89
221
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
isError: false
|
|
97
|
-
});
|
|
98
|
-
}
|
|
222
|
+
// Anthropic Thinking (Claude Sonnet 4)
|
|
223
|
+
const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
224
|
+
await complete(anthropicModel, context, {
|
|
225
|
+
thinkingEnabled: true,
|
|
226
|
+
thinkingBudgetTokens: 8192 // Optional token limit
|
|
227
|
+
});
|
|
99
228
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
//
|
|
106
|
-
for (const block of followUp.content) {
|
|
107
|
-
if (block.type === 'text') {
|
|
108
|
-
console.log(block.text);
|
|
109
|
-
}
|
|
229
|
+
// Google Gemini Thinking
|
|
230
|
+
const googleModel = getModel('google', 'gemini-2.5-flash');
|
|
231
|
+
await complete(googleModel, context, {
|
|
232
|
+
thinking: {
|
|
233
|
+
enabled: true,
|
|
234
|
+
budgetTokens: 8192 // -1 for dynamic, 0 to disable
|
|
110
235
|
}
|
|
111
|
-
}
|
|
236
|
+
});
|
|
112
237
|
```
|
|
113
238
|
|
|
114
|
-
|
|
239
|
+
### Streaming Thinking Content
|
|
240
|
+
|
|
241
|
+
When streaming, thinking content is delivered through specific events:
|
|
115
242
|
|
|
116
243
|
```typescript
|
|
117
|
-
const
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
break;
|
|
131
|
-
case 'text_end':
|
|
132
|
-
console.log(`\n[Text block complete: ${event.content.length} chars]`);
|
|
133
|
-
break;
|
|
134
|
-
case 'thinking_start':
|
|
135
|
-
console.error('[Starting thinking]');
|
|
136
|
-
break;
|
|
137
|
-
case 'thinking_delta':
|
|
138
|
-
process.stderr.write(event.delta);
|
|
139
|
-
break;
|
|
140
|
-
case 'thinking_end':
|
|
141
|
-
console.error(`\n[Thinking complete: ${event.content.length} chars]`);
|
|
142
|
-
break;
|
|
143
|
-
case 'toolCall':
|
|
144
|
-
console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`);
|
|
145
|
-
break;
|
|
146
|
-
case 'done':
|
|
147
|
-
console.log(`Completed with reason: ${event.reason}`);
|
|
148
|
-
console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`);
|
|
149
|
-
break;
|
|
150
|
-
case 'error':
|
|
151
|
-
console.error('Error:', event.error);
|
|
152
|
-
break;
|
|
153
|
-
}
|
|
244
|
+
const s = streamSimple(model, context, { reasoning: 'high' });
|
|
245
|
+
|
|
246
|
+
for await (const event of s) {
|
|
247
|
+
switch (event.type) {
|
|
248
|
+
case 'thinking_start':
|
|
249
|
+
console.log('[Model started thinking]');
|
|
250
|
+
break;
|
|
251
|
+
case 'thinking_delta':
|
|
252
|
+
process.stdout.write(event.delta); // Stream thinking content
|
|
253
|
+
break;
|
|
254
|
+
case 'thinking_end':
|
|
255
|
+
console.log('\n[Thinking complete]');
|
|
256
|
+
break;
|
|
154
257
|
}
|
|
155
|
-
}
|
|
258
|
+
}
|
|
156
259
|
```
|
|
157
260
|
|
|
158
|
-
## Abort Signal
|
|
261
|
+
## Errors & Abort Signal
|
|
159
262
|
|
|
160
|
-
|
|
263
|
+
When a request ends with an error (including aborts), the API returns an `AssistantMessage` with:
|
|
264
|
+
- `stopReason: 'error'` - Indicates the request ended with an error
|
|
265
|
+
- `error: string` - Error message describing what happened
|
|
266
|
+
- `content: array` - **Partial content** accumulated before the error
|
|
267
|
+
- `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred)
|
|
161
268
|
|
|
162
|
-
###
|
|
269
|
+
### Aborting
|
|
270
|
+
The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`.
|
|
163
271
|
|
|
164
272
|
```typescript
|
|
273
|
+
import { getModel, stream } from '@mariozechner/pi-ai';
|
|
274
|
+
|
|
275
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
165
276
|
const controller = new AbortController();
|
|
166
277
|
|
|
167
278
|
// Abort after 2 seconds
|
|
168
279
|
setTimeout(() => controller.abort(), 2000);
|
|
169
280
|
|
|
170
|
-
const
|
|
281
|
+
const s = stream(model, {
|
|
171
282
|
messages: [{ role: 'user', content: 'Write a long story' }]
|
|
172
283
|
}, {
|
|
173
|
-
signal: controller.signal
|
|
174
|
-
onEvent: (event) => {
|
|
175
|
-
if (event.type === 'text_delta') {
|
|
176
|
-
process.stdout.write(event.delta);
|
|
177
|
-
}
|
|
178
|
-
}
|
|
284
|
+
signal: controller.signal
|
|
179
285
|
});
|
|
180
286
|
|
|
181
|
-
|
|
182
|
-
if (
|
|
183
|
-
|
|
287
|
+
for await (const event of s) {
|
|
288
|
+
if (event.type === 'text_delta') {
|
|
289
|
+
process.stdout.write(event.delta);
|
|
290
|
+
} else if (event.type === 'error') {
|
|
291
|
+
console.log('Error:', event.error);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Get results (may be partial if aborted)
|
|
296
|
+
const response = await s.finalMessage();
|
|
297
|
+
if (response.stopReason === 'error') {
|
|
298
|
+
console.log('Error:', response.error);
|
|
184
299
|
console.log('Partial content received:', response.content);
|
|
185
300
|
console.log('Tokens used:', response.usage);
|
|
186
|
-
} else {
|
|
187
|
-
console.log('Request completed successfully');
|
|
188
301
|
}
|
|
189
302
|
```
|
|
190
303
|
|
|
191
|
-
### Partial Results and Token Tracking
|
|
192
|
-
|
|
193
|
-
When a request is aborted, the API returns an `AssistantMessage` with:
|
|
194
|
-
- `stopReason: 'error'` - Indicates the request was aborted
|
|
195
|
-
- `error: string` - Error message describing the abort
|
|
196
|
-
- `content: array` - **Partial content** accumulated before the abort
|
|
197
|
-
- `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred)
|
|
198
|
-
|
|
199
|
-
```typescript
|
|
200
|
-
// Example: User interrupts a long-running request
|
|
201
|
-
const controller = new AbortController();
|
|
202
|
-
document.getElementById('stop-button').onclick = () => controller.abort();
|
|
203
|
-
|
|
204
|
-
const response = await llm.generate(context, {
|
|
205
|
-
signal: controller.signal,
|
|
206
|
-
onEvent: (e) => {
|
|
207
|
-
if (e.type === 'text_delta') updateUI(e.delta);
|
|
208
|
-
}
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
// Even if aborted, you get:
|
|
212
|
-
// - Partial text that was streamed
|
|
213
|
-
// - Token count (may be partial/estimated)
|
|
214
|
-
// - Cost calculations (may be incomplete)
|
|
215
|
-
console.log(`Generated ${response.content.length} content blocks`);
|
|
216
|
-
console.log(`Estimated ${response.usage.output} output tokens`);
|
|
217
|
-
console.log(`Estimated cost: $${response.usage.cost.total}`);
|
|
218
|
-
```
|
|
219
|
-
|
|
220
304
|
### Continuing After Abort
|
|
221
305
|
|
|
222
306
|
Aborted messages can be added to the conversation context and continued in subsequent requests:
|
|
@@ -232,19 +316,99 @@ const context = {
|
|
|
232
316
|
const controller1 = new AbortController();
|
|
233
317
|
setTimeout(() => controller1.abort(), 2000);
|
|
234
318
|
|
|
235
|
-
const partial = await
|
|
319
|
+
const partial = await complete(model, context, { signal: controller1.signal });
|
|
236
320
|
|
|
237
321
|
// Add the partial response to context
|
|
238
322
|
context.messages.push(partial);
|
|
239
323
|
context.messages.push({ role: 'user', content: 'Please continue' });
|
|
240
324
|
|
|
241
325
|
// Continue the conversation
|
|
242
|
-
const continuation = await
|
|
326
|
+
const continuation = await complete(model, context);
|
|
243
327
|
```
|
|
244
328
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
329
|
+
## APIs, Models, and Providers
|
|
330
|
+
|
|
331
|
+
The library implements 4 API interfaces, each with its own streaming function and options:
|
|
332
|
+
|
|
333
|
+
- **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
|
|
334
|
+
- **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
|
|
335
|
+
- **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
|
|
336
|
+
- **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
|
|
337
|
+
|
|
338
|
+
### Providers and Models
|
|
339
|
+
|
|
340
|
+
A **provider** offers models through a specific API. For example:
|
|
341
|
+
- **Anthropic** models use the `anthropic-messages` API
|
|
342
|
+
- **Google** models use the `google-generative-ai` API
|
|
343
|
+
- **OpenAI** models use the `openai-responses` API
|
|
344
|
+
- **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
|
|
345
|
+
|
|
346
|
+
### Querying Providers and Models
|
|
347
|
+
|
|
348
|
+
```typescript
|
|
349
|
+
import { getProviders, getModels, getModel } from '@mariozechner/pi-ai';
|
|
350
|
+
|
|
351
|
+
// Get all available providers
|
|
352
|
+
const providers = getProviders();
|
|
353
|
+
console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
|
|
354
|
+
|
|
355
|
+
// Get all models from a provider (fully typed)
|
|
356
|
+
const anthropicModels = getModels('anthropic');
|
|
357
|
+
for (const model of anthropicModels) {
|
|
358
|
+
console.log(`${model.id}: ${model.name}`);
|
|
359
|
+
console.log(` API: ${model.api}`); // 'anthropic-messages'
|
|
360
|
+
console.log(` Context: ${model.contextWindow} tokens`);
|
|
361
|
+
console.log(` Vision: ${model.input.includes('image')}`);
|
|
362
|
+
console.log(` Reasoning: ${model.reasoning}`);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Get a specific model (both provider and model ID are auto-completed in IDEs)
|
|
366
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
367
|
+
console.log(`Using ${model.name} via ${model.api} API`);
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Custom Models
|
|
371
|
+
|
|
372
|
+
You can create custom models for local inference servers or custom endpoints:
|
|
373
|
+
|
|
374
|
+
```typescript
|
|
375
|
+
import { Model, stream } from '@mariozechner/pi-ai';
|
|
376
|
+
|
|
377
|
+
// Example: Ollama using OpenAI-compatible API
|
|
378
|
+
const ollamaModel: Model<'openai-completions'> = {
|
|
379
|
+
id: 'llama-3.1-8b',
|
|
380
|
+
name: 'Llama 3.1 8B (Ollama)',
|
|
381
|
+
api: 'openai-completions',
|
|
382
|
+
provider: 'ollama',
|
|
383
|
+
baseUrl: 'http://localhost:11434/v1',
|
|
384
|
+
reasoning: false,
|
|
385
|
+
input: ['text'],
|
|
386
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
387
|
+
contextWindow: 128000,
|
|
388
|
+
maxTokens: 32000
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
// Use the custom model
|
|
392
|
+
const response = await stream(ollamaModel, context, {
|
|
393
|
+
apiKey: 'dummy' // Ollama doesn't need a real key
|
|
394
|
+
});
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### Type Safety
|
|
398
|
+
|
|
399
|
+
Models are typed by their API, ensuring type-safe options:
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
// TypeScript knows this is an Anthropic model
|
|
403
|
+
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
404
|
+
|
|
405
|
+
// So these options are type-checked for AnthropicOptions
|
|
406
|
+
await stream(claude, context, {
|
|
407
|
+
thinkingEnabled: true, // ✓ Valid for anthropic-messages
|
|
408
|
+
thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
|
|
409
|
+
// reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages
|
|
410
|
+
});
|
|
411
|
+
```
|
|
248
412
|
|
|
249
413
|
## Cross-Provider Handoffs
|
|
250
414
|
|
|
@@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th
|
|
|
255
419
|
When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
|
|
256
420
|
|
|
257
421
|
- **User and tool result messages** are passed through unchanged
|
|
258
|
-
- **Assistant messages from the same provider/
|
|
422
|
+
- **Assistant messages from the same provider/API** are preserved as-is
|
|
259
423
|
- **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
|
|
260
424
|
- **Tool calls and regular text** are preserved unchanged
|
|
261
425
|
|
|
262
426
|
### Example: Multi-Provider Conversation
|
|
263
427
|
|
|
264
428
|
```typescript
|
|
265
|
-
import {
|
|
429
|
+
import { getModel, complete, Context } from '@mariozechner/pi-ai';
|
|
266
430
|
|
|
267
431
|
// Start with Claude
|
|
268
|
-
const claude =
|
|
269
|
-
const
|
|
432
|
+
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
433
|
+
const context: Context = {
|
|
434
|
+
messages: []
|
|
435
|
+
};
|
|
270
436
|
|
|
271
|
-
messages.push({ role: 'user', content: 'What is 25 * 18?' });
|
|
272
|
-
const claudeResponse = await claude
|
|
273
|
-
|
|
437
|
+
context.messages.push({ role: 'user', content: 'What is 25 * 18?' });
|
|
438
|
+
const claudeResponse = await complete(claude, context, {
|
|
439
|
+
thinkingEnabled: true
|
|
274
440
|
});
|
|
275
|
-
messages.push(claudeResponse);
|
|
441
|
+
context.messages.push(claudeResponse);
|
|
276
442
|
|
|
277
443
|
// Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
|
|
278
|
-
const gpt5 =
|
|
279
|
-
messages.push({ role: 'user', content: 'Is that calculation correct?' });
|
|
280
|
-
const gptResponse = await gpt5
|
|
281
|
-
messages.push(gptResponse);
|
|
444
|
+
const gpt5 = getModel('openai', 'gpt-5-mini');
|
|
445
|
+
context.messages.push({ role: 'user', content: 'Is that calculation correct?' });
|
|
446
|
+
const gptResponse = await complete(gpt5, context);
|
|
447
|
+
context.messages.push(gptResponse);
|
|
282
448
|
|
|
283
449
|
// Switch to Gemini
|
|
284
|
-
const gemini =
|
|
285
|
-
messages.push({ role: 'user', content: 'What was the original question?' });
|
|
286
|
-
const geminiResponse = await gemini
|
|
450
|
+
const gemini = getModel('google', 'gemini-2.5-flash');
|
|
451
|
+
context.messages.push({ role: 'user', content: 'What was the original question?' });
|
|
452
|
+
const geminiResponse = await complete(gemini, context);
|
|
287
453
|
```
|
|
288
454
|
|
|
289
455
|
### Provider Compatibility
|
|
@@ -300,155 +466,65 @@ This enables flexible workflows where you can:
|
|
|
300
466
|
- Use specialized models for specific tasks
|
|
301
467
|
- Maintain conversation continuity across provider outages
|
|
302
468
|
|
|
303
|
-
##
|
|
304
|
-
|
|
305
|
-
### OpenAI Reasoning (o1, o3)
|
|
306
|
-
```typescript
|
|
307
|
-
const llm = createLLM('openai', 'o1-mini');
|
|
469
|
+
## Context Serialization
|
|
308
470
|
|
|
309
|
-
|
|
310
|
-
reasoningEffort: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
|
|
311
|
-
});
|
|
312
|
-
```
|
|
471
|
+
The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
|
|
313
472
|
|
|
314
|
-
### Anthropic Thinking
|
|
315
473
|
```typescript
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
await llm.generate(context, {
|
|
319
|
-
thinking: {
|
|
320
|
-
enabled: true,
|
|
321
|
-
budgetTokens: 2048 // Optional thinking token limit
|
|
322
|
-
}
|
|
323
|
-
});
|
|
324
|
-
```
|
|
474
|
+
import { Context, getModel, complete } from '@mariozechner/pi-ai';
|
|
325
475
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
});
|
|
333
|
-
```
|
|
334
|
-
|
|
335
|
-
## Custom Models
|
|
336
|
-
|
|
337
|
-
### Local Models (Ollama, vLLM, etc.)
|
|
338
|
-
```typescript
|
|
339
|
-
import { OpenAICompletionsLLM } from '@mariozechner/pi-ai';
|
|
340
|
-
|
|
341
|
-
const model = {
|
|
342
|
-
id: 'gpt-oss:20b',
|
|
343
|
-
provider: 'ollama',
|
|
344
|
-
baseUrl: 'http://localhost:11434/v1',
|
|
345
|
-
reasoning: false,
|
|
346
|
-
input: ['text'],
|
|
347
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
348
|
-
contextWindow: 126000,
|
|
349
|
-
maxTokens: 32000,
|
|
350
|
-
name: 'Llama 3.1 8B'
|
|
351
|
-
};
|
|
352
|
-
|
|
353
|
-
const llm = new OpenAICompletionsLLM(model, 'dummy-key');
|
|
354
|
-
```
|
|
355
|
-
|
|
356
|
-
### Custom OpenAI-Compatible Endpoints
|
|
357
|
-
```typescript
|
|
358
|
-
const model = {
|
|
359
|
-
id: 'custom-model',
|
|
360
|
-
provider: 'custom',
|
|
361
|
-
baseUrl: 'https://your-api.com/v1',
|
|
362
|
-
reasoning: true,
|
|
363
|
-
input: ['text', 'image'],
|
|
364
|
-
cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
|
|
365
|
-
contextWindow: 32768,
|
|
366
|
-
maxTokens: 8192,
|
|
367
|
-
name: 'Custom Model'
|
|
476
|
+
// Create and use a context
|
|
477
|
+
const context: Context = {
|
|
478
|
+
systemPrompt: 'You are a helpful assistant.',
|
|
479
|
+
messages: [
|
|
480
|
+
{ role: 'user', content: 'What is TypeScript?' }
|
|
481
|
+
]
|
|
368
482
|
};
|
|
369
483
|
|
|
370
|
-
const
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
## Model Discovery
|
|
374
|
-
|
|
375
|
-
All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time.
|
|
484
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
485
|
+
const response = await complete(model, context);
|
|
486
|
+
context.messages.push(response);
|
|
376
487
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
488
|
+
// Serialize the entire context
|
|
489
|
+
const serialized = JSON.stringify(context);
|
|
490
|
+
console.log('Serialized context size:', serialized.length, 'bytes');
|
|
380
491
|
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
console.log(`${modelId}: ${model.name}`);
|
|
384
|
-
console.log(` Context: ${model.contextWindow} tokens`);
|
|
385
|
-
console.log(` Reasoning: ${model.reasoning}`);
|
|
386
|
-
console.log(` Vision: ${model.input.includes('image')}`);
|
|
387
|
-
console.log(` Cost: $${model.cost.input}/$${model.cost.output} per million tokens`);
|
|
388
|
-
}
|
|
492
|
+
// Save to database, localStorage, file, etc.
|
|
493
|
+
localStorage.setItem('conversation', serialized);
|
|
389
494
|
|
|
390
|
-
//
|
|
391
|
-
const
|
|
392
|
-
|
|
393
|
-
for (const model of Object.values(provider.models)) {
|
|
394
|
-
if (model.reasoning) {
|
|
395
|
-
reasoningModels.push(model);
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
}
|
|
495
|
+
// Later: deserialize and continue the conversation
|
|
496
|
+
const restored: Context = JSON.parse(localStorage.getItem('conversation')!);
|
|
497
|
+
restored.messages.push({ role: 'user', content: 'Tell me more about its type system' });
|
|
399
498
|
|
|
400
|
-
//
|
|
401
|
-
const
|
|
402
|
-
|
|
403
|
-
for (const model of Object.values(provider.models)) {
|
|
404
|
-
if (model.input.includes('image')) {
|
|
405
|
-
visionModels.push(model);
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
}
|
|
499
|
+
// Continue with any model
|
|
500
|
+
const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022');
|
|
501
|
+
const continuation = await complete(newModel, restored);
|
|
409
502
|
```
|
|
410
503
|
|
|
411
|
-
|
|
412
|
-
```typescript
|
|
413
|
-
import { getModel } from '@mariozechner/pi-ai';
|
|
414
|
-
|
|
415
|
-
const model = getModel('openai', 'gpt-4o-mini');
|
|
416
|
-
if (model) {
|
|
417
|
-
console.log(`Model: ${model.name}`);
|
|
418
|
-
console.log(`Provider: ${model.provider}`);
|
|
419
|
-
console.log(`Context window: ${model.contextWindow} tokens`);
|
|
420
|
-
console.log(`Max output: ${model.maxTokens} tokens`);
|
|
421
|
-
console.log(`Supports reasoning: ${model.reasoning}`);
|
|
422
|
-
console.log(`Supports images: ${model.input.includes('image')}`);
|
|
423
|
-
console.log(`Input cost: $${model.cost.input} per million tokens`);
|
|
424
|
-
console.log(`Output cost: $${model.cost.output} per million tokens`);
|
|
425
|
-
console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`);
|
|
426
|
-
console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`);
|
|
427
|
-
}
|
|
428
|
-
```
|
|
504
|
+
> **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
|
|
429
505
|
|
|
430
506
|
## Browser Usage
|
|
431
507
|
|
|
432
508
|
The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
|
|
433
509
|
|
|
434
510
|
```typescript
|
|
435
|
-
import {
|
|
511
|
+
import { getModel, complete } from '@mariozechner/pi-ai';
|
|
436
512
|
|
|
437
513
|
// API key must be passed explicitly in browser
|
|
438
|
-
const
|
|
439
|
-
apiKey: 'your-api-key'
|
|
440
|
-
});
|
|
514
|
+
const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
|
|
441
515
|
|
|
442
|
-
const response = await
|
|
516
|
+
const response = await complete(model, {
|
|
443
517
|
messages: [{ role: 'user', content: 'Hello!' }]
|
|
518
|
+
}, {
|
|
519
|
+
apiKey: 'your-api-key'
|
|
444
520
|
});
|
|
445
521
|
```
|
|
446
522
|
|
|
447
523
|
> **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
|
|
448
524
|
|
|
449
|
-
|
|
525
|
+
### Environment Variables (Node.js only)
|
|
450
526
|
|
|
451
|
-
|
|
527
|
+
In Node.js environments, you can set environment variables to avoid passing API keys:
|
|
452
528
|
|
|
453
529
|
```bash
|
|
454
530
|
OPENAI_API_KEY=sk-...
|
|
@@ -460,13 +536,17 @@ XAI_API_KEY=xai-...
|
|
|
460
536
|
OPENROUTER_API_KEY=sk-or-...
|
|
461
537
|
```
|
|
462
538
|
|
|
463
|
-
When set,
|
|
539
|
+
When set, the library automatically uses these keys:
|
|
540
|
+
|
|
464
541
|
```typescript
|
|
465
542
|
// Uses OPENAI_API_KEY from environment
|
|
466
|
-
const
|
|
543
|
+
const model = getModel('openai', 'gpt-4o-mini');
|
|
544
|
+
const response = await complete(model, context);
|
|
467
545
|
|
|
468
|
-
// Or
|
|
469
|
-
const
|
|
546
|
+
// Or override with explicit key
|
|
547
|
+
const response = await complete(model, context, {
|
|
548
|
+
apiKey: 'sk-different-key'
|
|
549
|
+
});
|
|
470
550
|
```
|
|
471
551
|
|
|
472
552
|
## License
|