noosphere 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1255 -118
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,13 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
Unified AI creation engine — text, image, video, and audio generation across all providers through a single interface.
|
|
4
4
|
|
|
5
|
+
One import. Every model. Every modality.
|
|
6
|
+
|
|
5
7
|
## Features
|
|
6
8
|
|
|
7
|
-
- **
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
9
|
+
- **4 modalities** — LLM chat, image generation, video generation, and text-to-speech
|
|
10
|
+
- **246+ LLM models** — via Pi-AI gateway (OpenAI, Anthropic, Google, Groq, Mistral, xAI, Cerebras, OpenRouter)
|
|
11
|
+
- **867+ media endpoints** — via FAL (Flux, SDXL, Kling, Sora 2, VEO 3, Kokoro, ElevenLabs, and hundreds more)
|
|
12
|
+
- **30+ HuggingFace tasks** — LLM, image, TTS, translation, summarization, classification, and more
|
|
13
|
+
- **Local-first architecture** — Auto-detects ComfyUI, Ollama, Piper, and Kokoro on your machine
|
|
14
|
+
- **Agentic capabilities** — Tool use, function calling, reasoning/thinking, vision, and agent loops via Pi-AI
|
|
10
15
|
- **Failover & retry** — Automatic retries with exponential backoff and cross-provider failover
|
|
11
|
-
- **Usage tracking** —
|
|
16
|
+
- **Usage tracking** — Real-time cost, latency, and token tracking across all providers
|
|
12
17
|
- **TypeScript-first** — Full type definitions with ESM and CommonJS support
|
|
13
18
|
|
|
14
19
|
## Install
|
|
@@ -56,7 +61,7 @@ const audio = await ai.speak({
|
|
|
56
61
|
|
|
57
62
|
## Configuration
|
|
58
63
|
|
|
59
|
-
API keys are resolved from the constructor or environment variables:
|
|
64
|
+
API keys are resolved from the constructor config or environment variables (config takes priority):
|
|
60
65
|
|
|
61
66
|
```typescript
|
|
62
67
|
const ai = new Noosphere({
|
|
@@ -80,47 +85,118 @@ Or set environment variables:
|
|
|
80
85
|
|---|---|
|
|
81
86
|
| `OPENAI_API_KEY` | OpenAI |
|
|
82
87
|
| `ANTHROPIC_API_KEY` | Anthropic |
|
|
83
|
-
| `GEMINI_API_KEY` | Google |
|
|
84
|
-
| `FAL_KEY` | FAL |
|
|
88
|
+
| `GEMINI_API_KEY` | Google Gemini |
|
|
89
|
+
| `FAL_KEY` | FAL.ai |
|
|
85
90
|
| `HUGGINGFACE_TOKEN` | Hugging Face |
|
|
86
91
|
| `GROQ_API_KEY` | Groq |
|
|
87
92
|
| `MISTRAL_API_KEY` | Mistral |
|
|
88
|
-
| `XAI_API_KEY` | xAI |
|
|
93
|
+
| `XAI_API_KEY` | xAI (Grok) |
|
|
89
94
|
| `OPENROUTER_API_KEY` | OpenRouter |
|
|
90
95
|
|
|
91
|
-
|
|
96
|
+
### Full Configuration Reference
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
const ai = new Noosphere({
|
|
100
|
+
// API keys (or use env vars above)
|
|
101
|
+
keys: { /* ... */ },
|
|
102
|
+
|
|
103
|
+
// Default models per modality
|
|
104
|
+
defaults: {
|
|
105
|
+
llm: { provider: 'pi-ai', model: 'claude-sonnet-4-20250514' },
|
|
106
|
+
image: { provider: 'fal', model: 'fal-ai/flux/schnell' },
|
|
107
|
+
video: { provider: 'fal', model: 'fal-ai/kling-video/v2/master/text-to-video' },
|
|
108
|
+
tts: { provider: 'fal', model: 'fal-ai/kokoro/american-english' },
|
|
109
|
+
},
|
|
110
|
+
|
|
111
|
+
// Local service configuration
|
|
112
|
+
autoDetectLocal: true, // env: NOOSPHERE_AUTO_DETECT_LOCAL
|
|
113
|
+
local: {
|
|
114
|
+
ollama: { enabled: true, host: 'http://localhost', port: 11434 },
|
|
115
|
+
comfyui: { enabled: true, host: 'http://localhost', port: 8188 },
|
|
116
|
+
piper: { enabled: true, host: 'http://localhost', port: 5500 },
|
|
117
|
+
kokoro: { enabled: true, host: 'http://localhost', port: 5501 },
|
|
118
|
+
custom: [], // additional LocalServiceConfig[]
|
|
119
|
+
},
|
|
120
|
+
|
|
121
|
+
// Retry & failover
|
|
122
|
+
retry: {
|
|
123
|
+
maxRetries: 2, // default: 2
|
|
124
|
+
backoffMs: 1000, // default: 1000 (exponential: 1s, 2s, 4s...)
|
|
125
|
+
failover: true, // default: true — try other providers on failure
|
|
126
|
+
retryableErrors: ['PROVIDER_UNAVAILABLE', 'RATE_LIMITED', 'TIMEOUT'],
|
|
127
|
+
},
|
|
128
|
+
|
|
129
|
+
// Timeouts per modality (ms)
|
|
130
|
+
timeout: {
|
|
131
|
+
llm: 30000, // 30s
|
|
132
|
+
image: 120000, // 2min
|
|
133
|
+
video: 300000, // 5min
|
|
134
|
+
tts: 60000, // 1min
|
|
135
|
+
},
|
|
136
|
+
|
|
137
|
+
// Model discovery cache (minutes)
|
|
138
|
+
discoveryCacheTTL: 60, // env: NOOSPHERE_DISCOVERY_CACHE_TTL
|
|
139
|
+
|
|
140
|
+
// Real-time usage callback
|
|
141
|
+
onUsage: (event) => {
|
|
142
|
+
console.log(`${event.provider}/${event.model}: $${event.cost} (${event.latencyMs}ms)`);
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Local Service Environment Variables
|
|
148
|
+
|
|
149
|
+
| Variable | Default | Description |
|
|
150
|
+
|---|---|---|
|
|
151
|
+
| `OLLAMA_HOST` | `http://localhost` | Ollama server host |
|
|
152
|
+
| `OLLAMA_PORT` | `11434` | Ollama server port |
|
|
153
|
+
| `COMFYUI_HOST` | `http://localhost` | ComfyUI server host |
|
|
154
|
+
| `COMFYUI_PORT` | `8188` | ComfyUI server port |
|
|
155
|
+
| `PIPER_HOST` | `http://localhost` | Piper TTS server host |
|
|
156
|
+
| `PIPER_PORT` | `5500` | Piper TTS server port |
|
|
157
|
+
| `KOKORO_HOST` | `http://localhost` | Kokoro TTS server host |
|
|
158
|
+
| `KOKORO_PORT` | `5501` | Kokoro TTS server port |
|
|
159
|
+
| `NOOSPHERE_AUTO_DETECT_LOCAL` | `true` | Enable/disable local service auto-detection |
|
|
160
|
+
| `NOOSPHERE_DISCOVERY_CACHE_TTL` | `60` | Model cache TTL in minutes |
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## API Reference
|
|
92
165
|
|
|
93
166
|
### `new Noosphere(config?)`
|
|
94
167
|
|
|
95
|
-
Creates a new instance. Providers are initialized lazily on first
|
|
168
|
+
Creates a new instance. Providers are initialized lazily on first API call. Auto-detects local services via HTTP pings (2s timeout each).
|
|
96
169
|
|
|
97
|
-
### Generation
|
|
170
|
+
### Generation Methods
|
|
98
171
|
|
|
99
172
|
#### `ai.chat(options): Promise<NoosphereResult>`
|
|
100
173
|
|
|
101
|
-
Generate text with
|
|
174
|
+
Generate text with any LLM. Supports 246+ models across 8 providers.
|
|
102
175
|
|
|
103
176
|
```typescript
|
|
104
177
|
const result = await ai.chat({
|
|
105
|
-
provider: 'anthropic',
|
|
106
|
-
model: 'claude-sonnet-4-20250514',
|
|
178
|
+
provider: 'anthropic', // optional — auto-resolved if omitted
|
|
179
|
+
model: 'claude-sonnet-4-20250514', // optional — uses default or first available
|
|
107
180
|
messages: [
|
|
108
181
|
{ role: 'system', content: 'You are helpful.' },
|
|
109
182
|
{ role: 'user', content: 'Explain quantum computing' },
|
|
110
183
|
],
|
|
111
|
-
temperature: 0.7,
|
|
112
|
-
maxTokens: 1024,
|
|
113
|
-
jsonMode: false,
|
|
184
|
+
temperature: 0.7, // optional (0-2)
|
|
185
|
+
maxTokens: 1024, // optional
|
|
186
|
+
jsonMode: false, // optional
|
|
114
187
|
});
|
|
115
188
|
|
|
116
|
-
console.log(result.content);
|
|
117
|
-
console.log(result.thinking);
|
|
118
|
-
console.log(result.usage.cost);
|
|
189
|
+
console.log(result.content); // response text
|
|
190
|
+
console.log(result.thinking); // reasoning output (Claude, GPT-5, o3, Gemini, Grok-4)
|
|
191
|
+
console.log(result.usage.cost); // cost in USD
|
|
192
|
+
console.log(result.usage.input); // input tokens
|
|
193
|
+
console.log(result.usage.output); // output tokens
|
|
194
|
+
console.log(result.latencyMs); // response time in ms
|
|
119
195
|
```
|
|
120
196
|
|
|
121
197
|
#### `ai.stream(options): NoosphereStream`
|
|
122
198
|
|
|
123
|
-
Stream LLM responses.
|
|
199
|
+
Stream LLM responses token-by-token. Same options as `chat()`.
|
|
124
200
|
|
|
125
201
|
```typescript
|
|
126
202
|
const stream = ai.stream({
|
|
@@ -128,67 +204,95 @@ const stream = ai.stream({
|
|
|
128
204
|
});
|
|
129
205
|
|
|
130
206
|
for await (const event of stream) {
|
|
131
|
-
|
|
132
|
-
|
|
207
|
+
switch (event.type) {
|
|
208
|
+
case 'text_delta':
|
|
209
|
+
process.stdout.write(event.delta!);
|
|
210
|
+
break;
|
|
211
|
+
case 'thinking_delta':
|
|
212
|
+
console.log('[thinking]', event.delta);
|
|
213
|
+
break;
|
|
214
|
+
case 'done':
|
|
215
|
+
console.log('\n\nUsage:', event.result!.usage);
|
|
216
|
+
break;
|
|
217
|
+
case 'error':
|
|
218
|
+
console.error(event.error);
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
133
221
|
}
|
|
134
222
|
|
|
135
|
-
// Or
|
|
223
|
+
// Or consume the full result
|
|
136
224
|
const result = await stream.result();
|
|
225
|
+
|
|
226
|
+
// Abort at any time
|
|
227
|
+
stream.abort();
|
|
137
228
|
```
|
|
138
229
|
|
|
139
230
|
#### `ai.image(options): Promise<NoosphereResult>`
|
|
140
231
|
|
|
141
|
-
Generate images.
|
|
232
|
+
Generate images. Supports 200+ image models via FAL, HuggingFace, and ComfyUI.
|
|
142
233
|
|
|
143
234
|
```typescript
|
|
144
235
|
const result = await ai.image({
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
236
|
+
provider: 'fal', // optional
|
|
237
|
+
model: 'fal-ai/flux-2-pro', // optional
|
|
238
|
+
prompt: 'A futuristic cityscape at sunset',
|
|
239
|
+
negativePrompt: 'blurry, low quality', // optional
|
|
240
|
+
width: 1024, // optional
|
|
241
|
+
height: 768, // optional
|
|
242
|
+
seed: 42, // optional — reproducible results
|
|
243
|
+
steps: 30, // optional — inference steps (more = higher quality)
|
|
244
|
+
guidanceScale: 7.5, // optional — prompt adherence (higher = stricter)
|
|
152
245
|
});
|
|
153
246
|
|
|
154
|
-
console.log(result.url);
|
|
155
|
-
console.log(result.
|
|
247
|
+
console.log(result.url); // image URL (FAL)
|
|
248
|
+
console.log(result.buffer); // image Buffer (HuggingFace, ComfyUI)
|
|
249
|
+
console.log(result.media?.width); // actual dimensions
|
|
250
|
+
console.log(result.media?.height);
|
|
251
|
+
console.log(result.media?.format); // 'png'
|
|
156
252
|
```
|
|
157
253
|
|
|
158
254
|
#### `ai.video(options): Promise<NoosphereResult>`
|
|
159
255
|
|
|
160
|
-
Generate videos.
|
|
256
|
+
Generate videos. Supports 150+ video models via FAL (Kling, Sora 2, VEO 3, WAN, Pixverse, and more).
|
|
161
257
|
|
|
162
258
|
```typescript
|
|
163
259
|
const result = await ai.video({
|
|
260
|
+
provider: 'fal',
|
|
261
|
+
model: 'fal-ai/kling-video/v2/master/text-to-video',
|
|
164
262
|
prompt: 'A bird flying through clouds',
|
|
165
|
-
imageUrl: 'https://...',
|
|
166
|
-
duration: 5,
|
|
167
|
-
fps: 24,
|
|
168
|
-
width: 1280,
|
|
169
|
-
height: 720,
|
|
263
|
+
imageUrl: 'https://...', // optional — image-to-video
|
|
264
|
+
duration: 5, // optional — seconds
|
|
265
|
+
fps: 24, // optional
|
|
266
|
+
width: 1280, // optional
|
|
267
|
+
height: 720, // optional
|
|
170
268
|
});
|
|
171
269
|
|
|
172
|
-
console.log(result.url);
|
|
270
|
+
console.log(result.url); // video URL
|
|
271
|
+
console.log(result.media?.duration); // actual duration
|
|
272
|
+
console.log(result.media?.fps); // frames per second
|
|
273
|
+
console.log(result.media?.format); // 'mp4'
|
|
173
274
|
```
|
|
174
275
|
|
|
175
276
|
#### `ai.speak(options): Promise<NoosphereResult>`
|
|
176
277
|
|
|
177
|
-
Text-to-speech synthesis.
|
|
278
|
+
Text-to-speech synthesis. Supports 50+ TTS models via FAL, HuggingFace, Piper, and Kokoro.
|
|
178
279
|
|
|
179
280
|
```typescript
|
|
180
281
|
const result = await ai.speak({
|
|
282
|
+
provider: 'fal',
|
|
283
|
+
model: 'fal-ai/kokoro/american-english',
|
|
181
284
|
text: 'Hello world',
|
|
182
|
-
voice: '
|
|
183
|
-
language: 'en',
|
|
184
|
-
speed: 1.0,
|
|
185
|
-
format: 'mp3',
|
|
285
|
+
voice: 'af_heart', // optional — voice ID
|
|
286
|
+
language: 'en', // optional
|
|
287
|
+
speed: 1.0, // optional
|
|
288
|
+
format: 'mp3', // optional — 'mp3' | 'wav' | 'ogg'
|
|
186
289
|
});
|
|
187
290
|
|
|
188
|
-
|
|
291
|
+
console.log(result.buffer); // audio Buffer
|
|
292
|
+
console.log(result.url); // audio URL (FAL)
|
|
189
293
|
```
|
|
190
294
|
|
|
191
|
-
### Discovery
|
|
295
|
+
### Discovery Methods
|
|
192
296
|
|
|
193
297
|
#### `ai.getProviders(modality?): Promise<ProviderInfo[]>`
|
|
194
298
|
|
|
@@ -196,15 +300,16 @@ List available providers, optionally filtered by modality.
|
|
|
196
300
|
|
|
197
301
|
```typescript
|
|
198
302
|
const providers = await ai.getProviders('llm');
|
|
199
|
-
// [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount:
|
|
303
|
+
// [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount: 246 }]
|
|
200
304
|
```
|
|
201
305
|
|
|
202
306
|
#### `ai.getModels(modality?): Promise<ModelInfo[]>`
|
|
203
307
|
|
|
204
|
-
List all available models.
|
|
308
|
+
List all available models with full metadata.
|
|
205
309
|
|
|
206
310
|
```typescript
|
|
207
311
|
const models = await ai.getModels('image');
|
|
312
|
+
// Returns ModelInfo[] with id, provider, name, modality, local, cost, capabilities
|
|
208
313
|
```
|
|
209
314
|
|
|
210
315
|
#### `ai.getModel(provider, modelId): Promise<ModelInfo | null>`
|
|
@@ -213,129 +318,1161 @@ Get details about a specific model.
|
|
|
213
318
|
|
|
214
319
|
#### `ai.syncModels(): Promise<SyncResult>`
|
|
215
320
|
|
|
216
|
-
Refresh model lists from all providers.
|
|
321
|
+
Refresh model lists from all providers. Returns sync count, per-provider breakdown, and any errors.
|
|
217
322
|
|
|
218
323
|
### Usage Tracking
|
|
219
324
|
|
|
220
325
|
#### `ai.getUsage(options?): UsageSummary`
|
|
221
326
|
|
|
222
|
-
Get aggregated usage statistics.
|
|
327
|
+
Get aggregated usage statistics with optional filtering.
|
|
223
328
|
|
|
224
329
|
```typescript
|
|
225
|
-
const usage = ai.getUsage({
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
330
|
+
const usage = ai.getUsage({
|
|
331
|
+
since: '2024-01-01', // optional — ISO date or Date object
|
|
332
|
+
until: '2024-12-31', // optional
|
|
333
|
+
provider: 'openai', // optional — filter by provider
|
|
334
|
+
modality: 'llm', // optional — filter by modality
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
console.log(usage.totalCost); // total USD spent
|
|
338
|
+
console.log(usage.totalRequests); // number of requests
|
|
339
|
+
console.log(usage.byProvider); // { openai: 2.50, anthropic: 1.20, fal: 0.30 }
|
|
340
|
+
console.log(usage.byModality); // { llm: 3.00, image: 0.70, video: 0.30, tts: 0.00 }
|
|
230
341
|
```
|
|
231
342
|
|
|
232
|
-
|
|
343
|
+
### Lifecycle
|
|
344
|
+
|
|
345
|
+
#### `ai.registerProvider(provider): void`
|
|
346
|
+
|
|
347
|
+
Register a custom provider (see [Custom Providers](#custom-providers)).
|
|
348
|
+
|
|
349
|
+
#### `ai.dispose(): Promise<void>`
|
|
350
|
+
|
|
351
|
+
Cleanup all provider resources, clear model cache, and reset usage tracker.
|
|
352
|
+
|
|
353
|
+
### NoosphereResult
|
|
354
|
+
|
|
355
|
+
Every generation method returns a `NoosphereResult`:
|
|
233
356
|
|
|
234
357
|
```typescript
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
358
|
+
interface NoosphereResult {
|
|
359
|
+
content?: string; // LLM response text
|
|
360
|
+
thinking?: string; // reasoning/thinking output (supported models)
|
|
361
|
+
url?: string; // media URL (images, videos, audio from cloud providers)
|
|
362
|
+
buffer?: Buffer; // media binary data (local providers, HuggingFace)
|
|
363
|
+
provider: string; // which provider handled the request
|
|
364
|
+
model: string; // which model was used
|
|
365
|
+
modality: Modality; // 'llm' | 'image' | 'video' | 'tts'
|
|
366
|
+
latencyMs: number; // request duration in milliseconds
|
|
367
|
+
usage: {
|
|
368
|
+
cost: number; // cost in USD
|
|
369
|
+
input?: number; // input tokens/characters
|
|
370
|
+
output?: number; // output tokens
|
|
371
|
+
unit?: string; // 'tokens' | 'characters' | 'per_image' | 'per_second' | 'free'
|
|
372
|
+
};
|
|
373
|
+
media?: {
|
|
374
|
+
width?: number; // image/video width
|
|
375
|
+
height?: number; // image/video height
|
|
376
|
+
duration?: number; // video/audio duration in seconds
|
|
377
|
+
format?: string; // 'png' | 'mp4' | 'mp3' | 'wav'
|
|
378
|
+
fps?: number; // video frames per second
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
---
|
|
384
|
+
|
|
385
|
+
## Providers In Depth
|
|
386
|
+
|
|
387
|
+
### Pi-AI — LLM Gateway (246+ models)
|
|
388
|
+
|
|
389
|
+
**Provider ID:** `pi-ai`
|
|
390
|
+
**Modalities:** LLM (chat + streaming)
|
|
391
|
+
**Library:** `@mariozechner/pi-ai`
|
|
392
|
+
|
|
393
|
+
A unified gateway that routes to 8 LLM providers through 4 different API protocols:
|
|
394
|
+
|
|
395
|
+
| API Protocol | Providers |
|
|
396
|
+
|---|---|
|
|
397
|
+
| `anthropic-messages` | Anthropic |
|
|
398
|
+
| `google-generative-ai` | Google |
|
|
399
|
+
| `openai-responses` | OpenAI (reasoning models) |
|
|
400
|
+
| `openai-completions` | OpenAI, xAI, Groq, Cerebras, Zai, OpenRouter |
|
|
401
|
+
|
|
402
|
+
#### Anthropic Models (19)
|
|
403
|
+
|
|
404
|
+
| Model | Context | Reasoning | Vision | Input Cost | Output Cost |
|
|
405
|
+
|---|---|---|---|---|---|
|
|
406
|
+
| `claude-opus-4-0` | 200k | Yes | Yes | $15/M | $75/M |
|
|
407
|
+
| `claude-opus-4-1` | 200k | Yes | Yes | $15/M | $75/M |
|
|
408
|
+
| `claude-sonnet-4-20250514` | 200k | Yes | Yes | $3/M | $15/M |
|
|
409
|
+
| `claude-sonnet-4-5-20250929` | 200k | Yes | Yes | $3/M | $15/M |
|
|
410
|
+
| `claude-3-7-sonnet-20250219` | 200k | Yes | Yes | $3/M | $15/M |
|
|
411
|
+
| `claude-3-5-sonnet-20241022` | 200k | No | Yes | $3/M | $15/M |
|
|
412
|
+
| `claude-haiku-4-5-20251001` | 200k | No | Yes | $0.80/M | $4/M |
|
|
413
|
+
| `claude-3-5-haiku-20241022` | 200k | No | Yes | $0.80/M | $4/M |
|
|
414
|
+
| `claude-3-haiku-20240307` | 200k | No | Yes | $0.25/M | $1.25/M |
|
|
415
|
+
| *...and 10 more variants* | | | | | |
|
|
416
|
+
|
|
417
|
+
#### OpenAI Models (24)
|
|
418
|
+
|
|
419
|
+
| Model | Context | Reasoning | Vision | Input Cost | Output Cost |
|
|
420
|
+
|---|---|---|---|---|---|
|
|
421
|
+
| `gpt-5` | 200k | Yes | Yes | $10/M | $30/M |
|
|
422
|
+
| `gpt-5-mini` | 200k | Yes | Yes | $2.50/M | $10/M |
|
|
423
|
+
| `gpt-4.1` | 128k | No | Yes | $2/M | $8/M |
|
|
424
|
+
| `gpt-4.1-mini` | 128k | No | Yes | $0.40/M | $1.60/M |
|
|
425
|
+
| `gpt-4.1-nano` | 128k | No | Yes | $0.10/M | $0.40/M |
|
|
426
|
+
| `gpt-4o` | 128k | No | Yes | $2.50/M | $10/M |
|
|
427
|
+
| `gpt-4o-mini` | 128k | No | Yes | $0.15/M | $0.60/M |
|
|
428
|
+
| `o3-pro` | 200k | Yes | Yes | $20/M | $80/M |
|
|
429
|
+
| `o3-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
|
|
430
|
+
| `o4-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
|
|
431
|
+
| `codex-mini-latest` | 200k | Yes | No | $1.50/M | $6/M |
|
|
432
|
+
| *...and 13 more variants* | | | | | |
|
|
433
|
+
|
|
434
|
+
#### Google Gemini Models (19)
|
|
435
|
+
|
|
436
|
+
| Model | Context | Reasoning | Vision | Cost |
|
|
437
|
+
|---|---|---|---|---|
|
|
438
|
+
| `gemini-2.5-flash` | 1M | Yes | Yes | $0.15-0.60/M |
|
|
439
|
+
| `gemini-2.5-pro` | 1M | Yes | Yes | $1.25-10/M |
|
|
440
|
+
| `gemini-2.0-flash` | 1M | No | Yes | $0.10-0.40/M |
|
|
441
|
+
| `gemini-2.0-flash-lite` | 1M | No | Yes | $0.025-0.10/M |
|
|
442
|
+
| `gemini-1.5-flash` | 1M | No | Yes | $0.075-0.30/M |
|
|
443
|
+
| `gemini-1.5-pro` | 2M | No | Yes | $1.25-5/M |
|
|
444
|
+
| *...and 13 more variants* | | | | |
|
|
445
|
+
|
|
446
|
+
#### xAI Grok Models (20)
|
|
447
|
+
|
|
448
|
+
| Model | Context | Reasoning | Vision | Input Cost |
|
|
449
|
+
|---|---|---|---|---|
|
|
450
|
+
| `grok-4` | 256k | Yes | Yes | $5/M |
|
|
451
|
+
| `grok-4-fast` | 256k | Yes | Yes | $3/M |
|
|
452
|
+
| `grok-3` | 131k | No | Yes | $3/M |
|
|
453
|
+
| `grok-3-fast` | 131k | No | Yes | $5/M |
|
|
454
|
+
| `grok-3-mini-fast-latest` | 131k | Yes | No | $0.30/M |
|
|
455
|
+
| `grok-2-vision` | 32k | No | Yes | $2/M |
|
|
456
|
+
| *...and 14 more variants* | | | | |
|
|
457
|
+
|
|
458
|
+
#### Groq Models (15)
|
|
459
|
+
|
|
460
|
+
| Model | Context | Cost |
|
|
461
|
+
|---|---|---|
|
|
462
|
+
| `llama-3.3-70b-versatile` | 128k | $0.59/M |
|
|
463
|
+
| `llama-3.1-8b-instant` | 128k | $0.05/M |
|
|
464
|
+
| `mistral-saba-24b` | 32k | $0.40/M |
|
|
465
|
+
| `qwen-qwq-32b` | 128k | $0.29/M |
|
|
466
|
+
| `deepseek-r1-distill-llama-70b` | 128k | $0.75/M |
|
|
467
|
+
| *...and 10 more* | | |
|
|
468
|
+
|
|
469
|
+
#### Cerebras Models (3)
|
|
470
|
+
|
|
471
|
+
`gpt-oss-120b`, `qwen-3-235b-a22b-instruct-2507`, `qwen-3-coder-480b`
|
|
472
|
+
|
|
473
|
+
#### Zai Models (5)
|
|
474
|
+
|
|
475
|
+
`glm-4.6`, `glm-4.5`, `glm-4.5-flash`, `glm-4.5v`, `glm-4.5-air`
|
|
476
|
+
|
|
477
|
+
#### OpenRouter (141 models)
|
|
478
|
+
|
|
479
|
+
Aggregator providing access to hundreds of additional models including Llama, Deepseek, Mistral, Qwen, and many more. Full list available via `ai.getModels('llm')`.
|
|
480
|
+
|
|
481
|
+
#### The Pi-AI Engine — Deep Dive
|
|
482
|
+
|
|
483
|
+
Noosphere's LLM provider is powered by `@mariozechner/pi-ai`, part of the **Pi mono-repo** by Mario Zechner (badlogic). Pi is NOT a wrapper like LangChain or Mastra — it's a **micro-framework for agentic AI** (~15K LOC, 4 npm packages) that was built from scratch as a minimalist alternative to Claude Code.
|
|
484
|
+
|
|
485
|
+
Pi consists of 4 packages in 3 tiers:
|
|
486
|
+
|
|
487
|
+
```
|
|
488
|
+
TIER 1 — FOUNDATION
|
|
489
|
+
@mariozechner/pi-ai LLM API: stream(), complete(), model registry
|
|
490
|
+
0 internal deps, talks to 20+ providers
|
|
491
|
+
|
|
492
|
+
TIER 2 — INFRASTRUCTURE
|
|
493
|
+
@mariozechner/pi-agent-core Agent loop, tool execution, lifecycle events
|
|
494
|
+
Depends on pi-ai
|
|
495
|
+
|
|
496
|
+
@mariozechner/pi-tui Terminal UI with differential rendering
|
|
497
|
+
Standalone, 0 internal deps
|
|
498
|
+
|
|
499
|
+
TIER 3 — APPLICATION
|
|
500
|
+
@mariozechner/pi-coding-agent CLI + SDK: sessions, compaction, extensions
|
|
501
|
+
Depends on all above
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
Noosphere uses `@mariozechner/pi-ai` (Tier 1) directly for LLM access. But the full Pi ecosystem provides capabilities that can be layered on top.
|
|
505
|
+
|
|
506
|
+
---
|
|
507
|
+
|
|
508
|
+
#### How Pi Keeps 200+ Models Updated
|
|
509
|
+
|
|
510
|
+
Pi does NOT hardcode models. It has an **auto-generation pipeline** that runs at build time:
|
|
511
|
+
|
|
512
|
+
```
|
|
513
|
+
STEP 1: FETCH (3 sources in parallel)
|
|
514
|
+
┌──────────────────┐ ┌──────────────────┐ ┌───────────────┐
|
|
515
|
+
│ models.dev │ │ OpenRouter │ │ Vercel AI │
|
|
516
|
+
│ /api.json │ │ /v1/models │ │ Gateway │
|
|
517
|
+
│ │ │ │ │ /v1/models │
|
|
518
|
+
│ Context windows │ │ Pricing ($/M) │ │ Capability │
|
|
519
|
+
│ Capabilities │ │ Availability │ │ tags │
|
|
520
|
+
│ Tool support │ │ Provider routing │ │ │
|
|
521
|
+
└────────┬─────────┘ └────────┬─────────┘ └──────┬────────┘
|
|
522
|
+
└─────────┬───────────┴────────────────────┘
|
|
523
|
+
▼
|
|
524
|
+
STEP 2: MERGE & DEDUPLICATE
|
|
525
|
+
Priority: models.dev > OpenRouter > Vercel
|
|
526
|
+
Key: provider + modelId
|
|
527
|
+
│
|
|
528
|
+
▼
|
|
529
|
+
STEP 3: FILTER
|
|
530
|
+
✅ tool_call === true
|
|
531
|
+
✅ streaming supported
|
|
532
|
+
✅ system messages supported
|
|
533
|
+
✅ not deprecated
|
|
534
|
+
│
|
|
535
|
+
▼
|
|
536
|
+
STEP 4: NORMALIZE
|
|
537
|
+
Costs → $/million tokens
|
|
538
|
+
API type → one of 4 protocols
|
|
539
|
+
Input modes → ["text"] or ["text","image"]
|
|
540
|
+
│
|
|
541
|
+
▼
|
|
542
|
+
STEP 5: PATCH (manual corrections)
|
|
543
|
+
Claude Opus: cache pricing fix
|
|
544
|
+
GPT-5.4: context window override
|
|
545
|
+
Kimi K2.5: hardcoded pricing
|
|
546
|
+
│
|
|
547
|
+
▼
|
|
548
|
+
STEP 6: GENERATE TypeScript
|
|
549
|
+
→ models.generated.ts (~330KB)
|
|
550
|
+
→ 200+ models with full type safety
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
Each generated model entry looks like:
|
|
554
|
+
|
|
555
|
+
```typescript
|
|
556
|
+
{
|
|
557
|
+
id: "claude-opus-4-6",
|
|
558
|
+
name: "Claude Opus 4.6",
|
|
559
|
+
api: "anthropic-messages",
|
|
560
|
+
provider: "anthropic",
|
|
561
|
+
baseUrl: "https://api.anthropic.com",
|
|
562
|
+
reasoning: true,
|
|
563
|
+
input: ["text", "image"],
|
|
564
|
+
cost: {
|
|
565
|
+
input: 15, // $15/M tokens
|
|
566
|
+
output: 75, // $75/M tokens
|
|
567
|
+
cacheRead: 1.5, // prompt cache hit
|
|
568
|
+
cacheWrite: 18.75, // prompt cache write
|
|
238
569
|
},
|
|
239
|
-
|
|
570
|
+
contextWindow: 200_000,
|
|
571
|
+
maxTokens: 32_000,
|
|
572
|
+
} satisfies Model<"anthropic-messages">
|
|
240
573
|
```
|
|
241
574
|
|
|
242
|
-
|
|
575
|
+
When a new model is released (e.g., Gemini 3.0), it appears in models.dev/OpenRouter → the script captures it → a new Pi version is published → Noosphere updates its dependency.
|
|
576
|
+
|
|
577
|
+
---
|
|
243
578
|
|
|
244
|
-
|
|
579
|
+
#### 4 API Protocols — How Pi Talks to Every Provider
|
|
580
|
+
|
|
581
|
+
Pi abstracts all LLM providers into 4 wire protocols. Each protocol handles the differences in request format, streaming format, auth headers, and response parsing:
|
|
582
|
+
|
|
583
|
+
| Protocol | Providers | Key Differences |
|
|
584
|
+
|---|---|---|
|
|
585
|
+
| `anthropic-messages` | Anthropic, AWS Bedrock | `system` as top-level field, content as `[{type:"text", text:"..."}]` blocks, `x-api-key` auth, `anthropic-beta` headers |
|
|
586
|
+
| `openai-completions` | OpenAI, xAI, Groq, Cerebras, OpenRouter, Ollama, vLLM | `system` as message with `role:"system"`, content as string, `Authorization: Bearer` auth, `tool_calls` array |
|
|
587
|
+
| `openai-responses` | OpenAI (reasoning models) | New Responses API with server-side context, `store: true`, reasoning summaries |
|
|
588
|
+
| `google-generative-ai` | Google Gemini, Vertex AI | `systemInstruction.parts[{text}]`, role `"model"` instead of `"assistant"`, `functionCall` instead of `tool_calls`, `thinkingConfig` |
|
|
589
|
+
|
|
590
|
+
The core function `streamSimple()` detects which protocol to use based on `model.api` and handles all the formatting/parsing transparently:
|
|
245
591
|
|
|
246
592
|
```typescript
|
|
247
|
-
|
|
593
|
+
// What happens inside Pi when you call Noosphere's chat():
|
|
594
|
+
async function* streamSimple(
|
|
595
|
+
model: Model, // includes model.api to determine protocol
|
|
596
|
+
context: Context, // { systemPrompt, messages, tools }
|
|
597
|
+
options?: StreamOptions // { signal, onPayload, thinkingLevel, ... }
|
|
598
|
+
): AsyncIterable<AssistantMessageEvent> {
|
|
599
|
+
// 1. Format request according to model.api protocol
|
|
600
|
+
// 2. Open SSE/WebSocket stream
|
|
601
|
+
// 3. Parse provider-specific chunks
|
|
602
|
+
// 4. Emit normalized events:
|
|
603
|
+
// → text_delta, thinking_delta, tool_call, message_end
|
|
604
|
+
}
|
|
605
|
+
```
|
|
248
606
|
|
|
249
|
-
|
|
250
|
-
id: 'my-provider',
|
|
251
|
-
name: 'My Provider',
|
|
252
|
-
modalities: ['llm'],
|
|
253
|
-
isLocal: false,
|
|
607
|
+
---
|
|
254
608
|
|
|
255
|
-
|
|
256
|
-
async listModels() { return [/* ... */]; },
|
|
609
|
+
#### Agentic Capabilities
|
|
257
610
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
611
|
+
These are the capabilities people get access to through the Pi-AI engine:
|
|
612
|
+
|
|
613
|
+
##### 1. Tool Use / Function Calling
|
|
614
|
+
|
|
615
|
+
Full structured tool calling supported across **all major providers**. Tool definitions use TypeBox schemas with runtime validation via AJV:
|
|
616
|
+
|
|
617
|
+
```typescript
|
|
618
|
+
import { type Tool, StringEnum } from '@mariozechner/pi-ai';
|
|
619
|
+
import { Type } from '@sinclair/typebox';
|
|
620
|
+
|
|
621
|
+
// Define a tool with typed parameters
|
|
622
|
+
const searchTool: Tool = {
|
|
623
|
+
name: 'web_search',
|
|
624
|
+
description: 'Search the web for information',
|
|
625
|
+
parameters: Type.Object({
|
|
626
|
+
query: Type.String({ description: 'Search query' }),
|
|
627
|
+
maxResults: Type.Optional(Type.Number({ default: 5 })),
|
|
628
|
+
type: StringEnum(['web', 'images', 'news'], { description: 'Search type' }),
|
|
629
|
+
}),
|
|
262
630
|
};
|
|
263
631
|
|
|
264
|
-
|
|
632
|
+
// Pass tools in context — Pi handles the rest
|
|
633
|
+
const context = {
|
|
634
|
+
systemPrompt: 'You are a helpful assistant.',
|
|
635
|
+
messages: [{ role: 'user', content: 'Search for recent AI news' }],
|
|
636
|
+
tools: [searchTool],
|
|
637
|
+
};
|
|
265
638
|
```
|
|
266
639
|
|
|
267
|
-
|
|
640
|
+
**How tool calling works internally:**
|
|
268
641
|
|
|
269
|
-
|
|
642
|
+
```
|
|
643
|
+
User prompt → LLM → "I need to call web_search"
|
|
644
|
+
│
|
|
645
|
+
▼
|
|
646
|
+
Pi validates arguments with AJV
|
|
647
|
+
against the TypeBox schema
|
|
648
|
+
│
|
|
649
|
+
┌─────┴─────┐
|
|
650
|
+
│ Valid? │
|
|
651
|
+
├─Yes───────┤
|
|
652
|
+
│ Execute │
|
|
653
|
+
│ tool │
|
|
654
|
+
├───────────┤
|
|
655
|
+
│ No │
|
|
656
|
+
│ Return │
|
|
657
|
+
│ validation│
|
|
658
|
+
│ error to │
|
|
659
|
+
│ LLM │
|
|
660
|
+
└───────────┘
|
|
661
|
+
│
|
|
662
|
+
▼
|
|
663
|
+
Tool result → back into context → LLM continues
|
|
664
|
+
```
|
|
665
|
+
|
|
666
|
+
**Provider-specific tool_choice control:**
|
|
667
|
+
- **Anthropic:** `"auto" | "any" | "none" | { type: "tool", name: "specific_tool" }`
|
|
668
|
+
- **OpenAI:** `"auto" | "none" | "required" | { type: "function", function: { name: "..." } }`
|
|
669
|
+
- **Google:** `"auto" | "none" | "any"`
|
|
670
|
+
|
|
671
|
+
**Partial JSON streaming:** During streaming, Pi parses tool call arguments incrementally using partial JSON parsing. This means you can see tool arguments being built in real-time, not just after the tool call completes.
|
|
672
|
+
|
|
673
|
+
##### 2. Reasoning / Extended Thinking
|
|
674
|
+
|
|
675
|
+
Pi provides **unified thinking support** across all providers that support it. Thinking blocks are automatically extracted, separated from regular text, and streamed as distinct events:
|
|
676
|
+
|
|
677
|
+
| Provider | Models | Control Parameters | How It Works |
|
|
678
|
+
|---|---|---|---|
|
|
679
|
+
| **Anthropic** | Claude Opus, Sonnet 4+ | `thinkingEnabled: boolean`, `thinkingBudgetTokens: number` | Extended thinking blocks in response, separate `thinking` content type |
|
|
680
|
+
| **OpenAI** | o1, o3, o4, GPT-5 | `reasoningEffort: "minimal" \| "low" \| "medium" \| "high"` | Reasoning via Responses API, `reasoningSummary: "auto" \| "detailed" \| "concise"` |
|
|
681
|
+
| **Google** | Gemini 2.5 Flash/Pro | `thinking.enabled: boolean`, `thinking.budgetTokens: number` | Thinking via `thinkingConfig`, mapped to effort levels |
|
|
682
|
+
| **xAI** | Grok-4, Grok-3-mini | Native reasoning | Automatic when model supports it |
|
|
683
|
+
|
|
684
|
+
**Cross-provider thinking portability:** When switching models mid-conversation, Pi converts thinking blocks between formats. Anthropic thinking blocks become `<thinking>` tagged text when sent to OpenAI/Google, and vice versa.
|
|
270
685
|
|
|
271
686
|
```typescript
|
|
272
|
-
|
|
687
|
+
// Thinking is automatically extracted in Noosphere responses:
|
|
688
|
+
const result = await ai.chat({
|
|
689
|
+
model: 'claude-opus-4-6',
|
|
690
|
+
messages: [{ role: 'user', content: 'Solve this step by step: 15! / 13!' }],
|
|
691
|
+
});
|
|
273
692
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
693
|
+
console.log(result.thinking); // "Let me work through this... 15! = 15 × 14 × 13!..."
|
|
694
|
+
console.log(result.content); // "15! / 13! = 15 × 14 = 210"
|
|
695
|
+
|
|
696
|
+
// During streaming, thinking arrives as separate events:
|
|
697
|
+
const stream = ai.stream({ messages: [...] });
|
|
698
|
+
for await (const event of stream) {
|
|
699
|
+
if (event.type === 'thinking_delta') console.log('[THINKING]', event.delta);
|
|
700
|
+
if (event.type === 'text_delta') console.log('[RESPONSE]', event.delta);
|
|
701
|
+
}
|
|
702
|
+
```
|
|
703
|
+
|
|
704
|
+
##### 3. Vision / Multimodal Input
|
|
705
|
+
|
|
706
|
+
Models with `input: ["text", "image"]` accept images alongside text. Pi handles the encoding and format differences per provider:
|
|
707
|
+
|
|
708
|
+
```typescript
|
|
709
|
+
// Send images to vision-capable models
|
|
710
|
+
const messages = [{
|
|
711
|
+
role: 'user',
|
|
712
|
+
content: [
|
|
713
|
+
{ type: 'text', text: 'What is in this image?' },
|
|
714
|
+
{ type: 'image', data: base64PngString, mimeType: 'image/png' },
|
|
715
|
+
],
|
|
716
|
+
}];
|
|
717
|
+
|
|
718
|
+
// Supported MIME types: image/png, image/jpeg, image/gif, image/webp
|
|
719
|
+
// Images are silently ignored when sent to non-vision models
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
**Vision-capable models include:** All Claude models, all GPT-4o/GPT-5 models, Gemini models, Grok-2-vision, Grok-4, and select Groq models.
|
|
723
|
+
|
|
724
|
+
##### 4. Agent Loop — Autonomous Tool Execution
|
|
725
|
+
|
|
726
|
+
The `@mariozechner/pi-agent-core` package provides a complete agent loop that automatically cycles through `prompt → LLM → tool call → result → repeat` until the task is done:
|
|
727
|
+
|
|
728
|
+
```typescript
|
|
729
|
+
import { agentLoop } from '@mariozechner/pi-ai';
|
|
730
|
+
|
|
731
|
+
const events = agentLoop(userMessage, agentContext, {
|
|
732
|
+
model: getModel('anthropic', 'claude-opus-4-6'),
|
|
733
|
+
tools: [searchTool, readFileTool, writeFileTool],
|
|
734
|
+
signal: abortController.signal,
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
for await (const event of events) {
|
|
738
|
+
switch (event.type) {
|
|
739
|
+
case 'agent_start': // Agent begins
|
|
740
|
+
case 'turn_start': // New LLM turn begins
|
|
741
|
+
case 'message_start': // LLM starts responding
|
|
742
|
+
case 'message_update': // Text/thinking delta received
|
|
743
|
+
case 'tool_execution_start': // About to execute a tool
|
|
744
|
+
case 'tool_execution_end': // Tool finished, result available
|
|
745
|
+
case 'message_end': // LLM finished this message
|
|
746
|
+
case 'turn_end': // Turn complete (may loop if tools were called)
|
|
747
|
+
case 'agent_end': // All done, final messages available
|
|
282
748
|
}
|
|
283
749
|
}
|
|
284
750
|
```
|
|
285
751
|
|
|
286
|
-
|
|
752
|
+
**The agent loop state machine:**
|
|
287
753
|
|
|
288
|
-
|
|
754
|
+
```
|
|
755
|
+
[User sends prompt]
|
|
756
|
+
│
|
|
757
|
+
▼
|
|
758
|
+
┌─[Build Context]──▶ [Check Queues]──▶ [Stream LLM]◄── streamFn()
|
|
759
|
+
│ │
|
|
760
|
+
│ ┌─────┴──────┐
|
|
761
|
+
│ │ │
|
|
762
|
+
│ text tool_call
|
|
763
|
+
│ │ │
|
|
764
|
+
│ ▼ ▼
|
|
765
|
+
│ [Done] [Execute Tool]
|
|
766
|
+
│ │
|
|
767
|
+
│ tool result
|
|
768
|
+
│ │
|
|
769
|
+
└──────────────────────────────────────────────────┘
|
|
770
|
+
(loops back to Stream LLM)
|
|
771
|
+
```
|
|
772
|
+
|
|
773
|
+
**Key design decisions:**
|
|
774
|
+
- Tools execute **sequentially** by default (parallelism can be added on top)
|
|
775
|
+
- The `streamFn` is **injectable** — you can wrap it with middleware to modify requests per-provider
|
|
776
|
+
- Tool arguments are **validated at runtime** using TypeBox + AJV before execution
|
|
777
|
+
- Aborted/failed responses preserve partial content and usage data
|
|
778
|
+
- Tool results are automatically added to the conversation context
|
|
779
|
+
|
|
780
|
+
##### 5. The `streamFn` Pattern — Injectable Middleware
|
|
781
|
+
|
|
782
|
+
This is Pi's most powerful architectural feature. The `streamFn` is the function that actually talks to the LLM, and it can be **wrapped with middleware** like Express.js request handlers:
|
|
289
783
|
|
|
290
784
|
```typescript
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
785
|
+
import type { StreamFn } from '@mariozechner/pi-agent-core';
|
|
786
|
+
import { streamSimple } from '@mariozechner/pi-ai';
|
|
787
|
+
|
|
788
|
+
// Start with Pi's base streaming function
|
|
789
|
+
let fn: StreamFn = streamSimple;
|
|
790
|
+
|
|
791
|
+
// Wrap it with middleware that modifies requests per-provider
|
|
792
|
+
fn = createMyCustomWrapper(fn, {
|
|
793
|
+
// Add custom headers for Anthropic
|
|
794
|
+
onPayload: (payload) => {
|
|
795
|
+
if (model.provider === 'anthropic') {
|
|
796
|
+
payload.headers['anthropic-beta'] = 'fine-grained-tool-streaming-2025-05-14';
|
|
797
|
+
}
|
|
297
798
|
},
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
799
|
+
});
|
|
800
|
+
|
|
801
|
+
// Each wrapper calls the previous one, forming a chain:
|
|
802
|
+
// request → wrapper3 → wrapper2 → wrapper1 → streamSimple → API
|
|
803
|
+
```
|
|
804
|
+
|
|
805
|
+
This pattern is what allows projects like OpenClaw to stack **16 provider-specific wrappers** on top of Pi's base streaming — adding beta headers for Anthropic, WebSocket transport for OpenAI, thinking sanitization for Google, reasoning effort headers for OpenRouter, and more — without modifying Pi's source code.
|
|
806
|
+
|
|
807
|
+
##### 6. Session Management (via pi-coding-agent)
|
|
808
|
+
|
|
809
|
+
The `@mariozechner/pi-coding-agent` package provides persistent session management with JSONL-based storage:
|
|
810
|
+
|
|
811
|
+
```typescript
|
|
812
|
+
import { createAgentSession, SessionManager } from '@mariozechner/pi-coding-agent';
|
|
813
|
+
|
|
814
|
+
// Create a session with full persistence
|
|
815
|
+
const session = await createAgentSession({
|
|
816
|
+
model: 'claude-opus-4-6',
|
|
817
|
+
tools: myTools,
|
|
818
|
+
sessionManager, // handles JSONL persistence
|
|
819
|
+
});
|
|
820
|
+
|
|
821
|
+
const result = await session.run('Build a REST API');
|
|
822
|
+
// Session is automatically saved to:
|
|
823
|
+
// ~/.pi/agent/sessions/session_abc123.jsonl
|
|
824
|
+
```
|
|
825
|
+
|
|
826
|
+
**Session file format (append-only JSONL):**
|
|
827
|
+
```jsonl
|
|
828
|
+
{"role":"user","content":"Build a REST API","timestamp":1710000000}
|
|
829
|
+
{"role":"assistant","content":"I'll create...","model":"claude-opus-4-6","usage":{...}}
|
|
830
|
+
{"role":"toolResult","toolCallId":"tc_001","toolName":"bash","content":"OK"}
|
|
831
|
+
{"type":"compaction","summary":"The user asked to build...","preservedMessages":[...]}
|
|
832
|
+
```
|
|
833
|
+
|
|
834
|
+
**Session operations:**
|
|
835
|
+
- `create()` — new session
|
|
836
|
+
- `open(id)` — restore existing session
|
|
837
|
+
- `continueRecent()` — continue the most recent session
|
|
838
|
+
- `forkFrom(id)` — create a branch (new JSONL referencing parent)
|
|
839
|
+
- `inMemory()` — RAM-only session (for SDK/testing)
|
|
840
|
+
|
|
841
|
+
##### 7. Context Compaction — Automatic Context Window Management
|
|
842
|
+
|
|
843
|
+
When the conversation approaches the model's context window limit, Pi automatically **compacts** the history:
|
|
844
|
+
|
|
845
|
+
```
|
|
846
|
+
1. DETECT: Calculate inputTokens + outputTokens vs model.contextWindow
|
|
847
|
+
2. TRIGGER: Proactively before overflow, or as recovery after overflow error
|
|
848
|
+
3. SUMMARIZE: Send history to LLM with a compaction prompt
|
|
849
|
+
4. WRITE: Append compaction entry to JSONL:
|
|
850
|
+
{"type":"compaction","summary":"...","preservedMessages":[last N messages]}
|
|
851
|
+
5. CONTINUE: Context is now summary + recent messages instead of full history
|
|
852
|
+
```
|
|
853
|
+
|
|
854
|
+
The JSONL file is **never rewritten** — compaction entries are appended, maintaining a complete audit trail.
|
|
855
|
+
|
|
856
|
+
##### 8. Cost Tracking — Cache-Aware Pricing
|
|
857
|
+
|
|
858
|
+
Pi tracks costs per-request with cache-aware pricing for providers that support prompt caching:
|
|
859
|
+
|
|
860
|
+
```typescript
|
|
861
|
+
// Every model has 4 cost dimensions:
|
|
862
|
+
{
|
|
863
|
+
input: 15, // $15 per 1M input tokens
|
|
864
|
+
output: 75, // $75 per 1M output tokens
|
|
865
|
+
cacheRead: 1.5, // $1.50 per 1M cached prompt tokens (read)
|
|
866
|
+
cacheWrite: 18.75, // $18.75 per 1M cached prompt tokens (write)
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Usage tracking on every response:
|
|
870
|
+
{
|
|
871
|
+
input: 1500, // tokens consumed as input
|
|
872
|
+
output: 800, // tokens generated
|
|
873
|
+
cacheRead: 5000, // prompt cache hits
|
|
874
|
+
cacheWrite: 1500, // prompt cache writes
|
|
875
|
+
cost: {
|
|
876
|
+
total: 0.082, // total cost in USD
|
|
877
|
+
input: 0.0225,
|
|
878
|
+
output: 0.06,
|
|
879
|
+
cacheRead: 0.0075,
|
|
880
|
+
cacheWrite: 0.028,
|
|
303
881
|
},
|
|
882
|
+
}
|
|
883
|
+
```
|
|
884
|
+
|
|
885
|
+
**Anthropic and OpenAI** support prompt caching. For providers without caching, `cacheRead` and `cacheWrite` are always 0.
|
|
886
|
+
|
|
887
|
+
##### 9. Extension System (via pi-coding-agent)
|
|
888
|
+
|
|
889
|
+
Pi supports a plugin system where extensions can register tools, commands, and lifecycle hooks:
|
|
890
|
+
|
|
891
|
+
```typescript
|
|
892
|
+
// Extensions are TypeScript modules loaded at runtime via jiti
|
|
893
|
+
export default function(api: ExtensionAPI) {
|
|
894
|
+
// Register a custom tool
|
|
895
|
+
api.registerTool('my_tool', {
|
|
896
|
+
description: 'Does something useful',
|
|
897
|
+
parameters: { /* TypeBox schema */ },
|
|
898
|
+
execute: async (args) => 'result',
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
// Register a slash command
|
|
902
|
+
api.registerCommand('/mycommand', {
|
|
903
|
+
handler: async (args) => { /* ... */ },
|
|
904
|
+
description: 'Custom command',
|
|
905
|
+
});
|
|
906
|
+
|
|
907
|
+
// Hook into the agent lifecycle
|
|
908
|
+
api.on('before_agent_start', async (context) => {
|
|
909
|
+
context.systemPrompt += '\nExtra instructions';
|
|
910
|
+
});
|
|
911
|
+
|
|
912
|
+
api.on('tool_execution_end', async (event) => {
|
|
913
|
+
// Post-process tool results
|
|
914
|
+
});
|
|
915
|
+
}
|
|
916
|
+
```
|
|
917
|
+
|
|
918
|
+
**Resource discovery chain (priority):**
|
|
919
|
+
1. Project `.pi/` directory (highest)
|
|
920
|
+
2. User `~/.pi/agent/`
|
|
921
|
+
3. npm packages with Pi metadata
|
|
922
|
+
4. Built-in defaults
|
|
923
|
+
|
|
924
|
+
##### 10. The Anti-MCP Philosophy — Why Pi Uses CLI Instead
|
|
925
|
+
|
|
926
|
+
Pi explicitly **rejects MCP** (Model Context Protocol). Mario Zechner's argument, backed by benchmarks:
|
|
927
|
+
|
|
928
|
+
**The token cost problem:**
|
|
929
|
+
|
|
930
|
+
| Approach | Tools | Tokens Consumed | % of Claude's Context |
|
|
931
|
+
|---|---|---|---|
|
|
932
|
+
| Playwright MCP | 21 tools | 13,700 tokens | 6.8% |
|
|
933
|
+
| Chrome DevTools MCP | 26 tools | 18,000 tokens | 9.0% |
|
|
934
|
+
| Pi CLI + README | N/A | 225 tokens | ~0.1% |
|
|
935
|
+
|
|
936
|
+
That's a **60-80x reduction** in token consumption. With 5 MCP servers, you lose ~55,000 tokens before doing any work.
|
|
937
|
+
|
|
938
|
+
**Benchmark results (120 evaluations):**
|
|
939
|
+
|
|
940
|
+
| Approach | Avg Cost | Success Rate |
|
|
941
|
+
|---|---|---|
|
|
942
|
+
| CLI (tmux) | $0.37 | 100% |
|
|
943
|
+
| CLI (terminalcp) | $0.39 | 100% |
|
|
944
|
+
| MCP (terminalcp) | $0.48 | 100% |
|
|
945
|
+
|
|
946
|
+
Same success rate, MCP costs **30% more**.
|
|
947
|
+
|
|
948
|
+
**Pi's alternative: Progressive Disclosure via CLI tools + READMEs**
|
|
949
|
+
|
|
950
|
+
Instead of loading all tool definitions upfront, Pi's agent has `bash` as a built-in tool and discovers CLI tools only when needed:
|
|
951
|
+
|
|
952
|
+
```
|
|
953
|
+
MCP approach: Pi approach:
|
|
954
|
+
───────────── ──────────
|
|
955
|
+
Session start → Session start →
|
|
956
|
+
Load 21 Playwright tools Load 4 tools: read, write, edit, bash
|
|
957
|
+
Load 26 Chrome DevTools tools (225 tokens)
|
|
958
|
+
Load N more MCP tools
|
|
959
|
+
(~55,000 tokens wasted)
|
|
960
|
+
|
|
961
|
+
When browser needed: When browser needed:
|
|
962
|
+
Tools already loaded Agent reads SKILL.md (225 tokens)
|
|
963
|
+
(but context is polluted) Runs: browser-start.js
|
|
964
|
+
Runs: browser-nav.js https://...
|
|
965
|
+
Runs: browser-screenshot.js
|
|
966
|
+
|
|
967
|
+
When browser NOT needed: When browser NOT needed:
|
|
968
|
+
Tools still consume context 0 tokens wasted
|
|
969
|
+
```
|
|
970
|
+
|
|
971
|
+
**The 4 built-in tools** (what Pi argues is sufficient):
|
|
972
|
+
|
|
973
|
+
| Tool | What It Does | Why It's Enough |
|
|
974
|
+
|---|---|---|
|
|
975
|
+
| `read` | Read files (text + images) | Supports offset/limit for large files |
|
|
976
|
+
| `write` | Create/overwrite files | Creates directories automatically |
|
|
977
|
+
| `edit` | Replace text (oldText→newText) | Surgical edits, like a diff |
|
|
978
|
+
| `bash` | Execute any shell command | **bash can do everything else** — replaces MCP entirely |
|
|
979
|
+
|
|
980
|
+
The key insight: `bash` replaces MCP. Any CLI tool, API call, database query, or system operation can be invoked through bash. The agent reads the tool's README only when it needs it, paying tokens on-demand instead of upfront.
|
|
981
|
+
|
|
982
|
+
---
|
|
983
|
+
|
|
984
|
+
### FAL — Media Generation (867+ endpoints)
|
|
985
|
+
|
|
986
|
+
**Provider ID:** `fal`
|
|
987
|
+
**Modalities:** Image, Video, TTS
|
|
988
|
+
**Library:** `@fal-ai/client`
|
|
989
|
+
|
|
990
|
+
The largest media generation provider with dynamic pricing fetched at runtime from `https://api.fal.ai/v1/models/pricing`.
|
|
991
|
+
|
|
992
|
+
#### Image Models (200+)
|
|
993
|
+
|
|
994
|
+
**FLUX Family (20+ variants):**
|
|
995
|
+
| Model | Description |
|
|
996
|
+
|---|---|
|
|
997
|
+
| `fal-ai/flux/schnell` | Fast generation (default) |
|
|
998
|
+
| `fal-ai/flux/dev` | Higher quality |
|
|
999
|
+
| `fal-ai/flux-2` | Next generation |
|
|
1000
|
+
| `fal-ai/flux-2-pro` | Professional quality |
|
|
1001
|
+
| `fal-ai/flux-2-flex` | Flexible variant |
|
|
1002
|
+
| `fal-ai/flux-2/edit` | Image editing |
|
|
1003
|
+
| `fal-ai/flux-2/lora` | LoRA fine-tuning |
|
|
1004
|
+
| `fal-ai/flux-pro/v1.1-ultra` | Ultra high quality |
|
|
1005
|
+
| `fal-ai/flux-pro/kontext` | Context-aware generation |
|
|
1006
|
+
| `fal-ai/flux-lora` | Custom style training |
|
|
1007
|
+
| `fal-ai/flux-vision-upscaler` | AI upscaling |
|
|
1008
|
+
| `fal-ai/flux-krea-trainer` | Model training |
|
|
1009
|
+
| `fal-ai/flux-lora-fast-training` | Fast fine-tuning |
|
|
1010
|
+
| `fal-ai/flux-lora-portrait-trainer` | Portrait specialist |
|
|
1011
|
+
|
|
1012
|
+
**Stable Diffusion:**
|
|
1013
|
+
`fal-ai/stable-diffusion-v15`, `fal-ai/stable-diffusion-v35-large`, `fal-ai/stable-diffusion-v35-medium`, `fal-ai/stable-diffusion-v3-medium`
|
|
1014
|
+
|
|
1015
|
+
**Other Image Models:**
|
|
1016
|
+
| Model | Description |
|
|
1017
|
+
|---|---|
|
|
1018
|
+
| `fal-ai/recraft/v3/text-to-image` | Artistic generation |
|
|
1019
|
+
| `fal-ai/ideogram/v2`, `v2a`, `v3` | Ideogram series |
|
|
1020
|
+
| `fal-ai/imagen3`, `fal-ai/imagen4/preview` | Google Imagen |
|
|
1021
|
+
| `fal-ai/gpt-image-1` | GPT image generation |
|
|
1022
|
+
| `fal-ai/gpt-image-1/edit-image` | GPT image editing |
|
|
1023
|
+
| `fal-ai/reve/text-to-image` | Reve generation |
|
|
1024
|
+
| `fal-ai/sana`, `fal-ai/sana/sprint` | Sana models |
|
|
1025
|
+
| `fal-ai/pixart-sigma` | PixArt Sigma |
|
|
1026
|
+
| `fal-ai/bria/text-to-image/base` | Bria AI |
|
|
1027
|
+
|
|
1028
|
+
**Pre-trained LoRA Styles:**
|
|
1029
|
+
`fal-ai/flux-2-lora-gallery/sepia-vintage`, `virtual-tryon`, `satellite-view-style`, `realism`, `multiple-angles`, `hdr-style`, `face-to-full-portrait`, `digital-comic-art`, `ballpoint-pen-sketch`, `apartment-staging`, `add-background`
|
|
1030
|
+
|
|
1031
|
+
**Image Editing/Enhancement (30+ tools):**
|
|
1032
|
+
`fal-ai/image-editing/age-progression`, `baby-version`, `background-change`, `hair-change`, `expression-change`, `object-removal`, `photo-restoration`, `style-transfer`, and many more.
|
|
1033
|
+
|
|
1034
|
+
#### Video Models (150+)
|
|
1035
|
+
|
|
1036
|
+
**Kling Video (20+ variants):**
|
|
1037
|
+
| Model | Description |
|
|
1038
|
+
|---|---|
|
|
1039
|
+
| `fal-ai/kling-video/v2/master/text-to-video` | Default text-to-video |
|
|
1040
|
+
| `fal-ai/kling-video/v2/master/image-to-video` | Image-to-video |
|
|
1041
|
+
| `fal-ai/kling-video/v2.5-turbo/pro/text-to-video` | Turbo pro |
|
|
1042
|
+
| `fal-ai/kling-video/o1/image-to-video` | O1 quality |
|
|
1043
|
+
| `fal-ai/kling-video/o1/video-to-video/edit` | Video editing |
|
|
1044
|
+
| `fal-ai/kling-video/lipsync/audio-to-video` | Lip sync |
|
|
1045
|
+
| `fal-ai/kling-video/video-to-audio` | Audio extraction |
|
|
1046
|
+
|
|
1047
|
+
**Sora 2 (OpenAI):**
|
|
1048
|
+
| Model | Description |
|
|
1049
|
+
|---|---|
|
|
1050
|
+
| `fal-ai/sora-2/text-to-video` | Text-to-video |
|
|
1051
|
+
| `fal-ai/sora-2/text-to-video/pro` | Pro quality |
|
|
1052
|
+
| `fal-ai/sora-2/image-to-video` | Image-to-video |
|
|
1053
|
+
| `fal-ai/sora-2/video-to-video/remix` | Video remixing |
|
|
1054
|
+
|
|
1055
|
+
**VEO 3 (Google):**
|
|
1056
|
+
| Model | Description |
|
|
1057
|
+
|---|---|
|
|
1058
|
+
| `fal-ai/veo3` | VEO 3 standard |
|
|
1059
|
+
| `fal-ai/veo3/fast` | Fast variant |
|
|
1060
|
+
| `fal-ai/veo3/image-to-video` | Image-to-video |
|
|
1061
|
+
| `fal-ai/veo3.1` | Latest version |
|
|
1062
|
+
| `fal-ai/veo3.1/reference-to-video` | Reference-guided |
|
|
1063
|
+
| `fal-ai/veo3.1/first-last-frame-to-video` | Frame interpolation |
|
|
1064
|
+
|
|
1065
|
+
**WAN (15+ variants):**
|
|
1066
|
+
`fal-ai/wan-pro/text-to-video`, `fal-ai/wan-pro/image-to-video`, `fal-ai/wan/v2.2-a14b/text-to-video`, `fal-ai/wan-vace-14b/depth`, `fal-ai/wan-vace-14b/inpainting`, `fal-ai/wan-vace-14b/pose`, `fal-ai/wan-effects`
|
|
1067
|
+
|
|
1068
|
+
**Pixverse (20+ variants):**
|
|
1069
|
+
`fal-ai/pixverse/v5.5/text-to-video`, `fal-ai/pixverse/v5.5/image-to-video`, `fal-ai/pixverse/v5.5/effects`, `fal-ai/pixverse/lipsync`, `fal-ai/pixverse/sound-effects`
|
|
1070
|
+
|
|
1071
|
+
**Minimax / Hailuo:**
|
|
1072
|
+
`fal-ai/minimax/hailuo-2.3/text-to-video/pro`, `fal-ai/minimax/hailuo-2.3/image-to-video/pro`, `fal-ai/minimax/video-01-director`, `fal-ai/minimax/video-01-live`
|
|
1073
|
+
|
|
1074
|
+
**Other Video Models:**
|
|
1075
|
+
| Provider | Models |
|
|
1076
|
+
|---|---|
|
|
1077
|
+
| Hunyuan | `fal-ai/hunyuan-video/text-to-video`, `image-to-video`, `video-to-video`, `foley` |
|
|
1078
|
+
| Pika | `fal-ai/pika/v2.2/text-to-video`, `pikascenes`, `pikaffects` |
|
|
1079
|
+
| LTX | `fal-ai/ltx-2/text-to-video`, `image-to-video`, `retake-video` |
|
|
1080
|
+
| Luma | `fal-ai/luma-dream-machine/ray-2`, `ray-2-flash`, `luma-photon` |
|
|
1081
|
+
| Vidu | `fal-ai/vidu/q2/text-to-video`, `image-to-video/pro` |
|
|
1082
|
+
| CogVideoX | `fal-ai/cogvideox-5b/text-to-video`, `video-to-video` |
|
|
1083
|
+
| Seedance | `fal-ai/bytedance/seedance/v1/text-to-video`, `image-to-video` |
|
|
1084
|
+
| Magi | `fal-ai/magi/text-to-video`, `extend-video` |
|
|
1085
|
+
|
|
1086
|
+
#### TTS / Speech Models (50+)
|
|
1087
|
+
|
|
1088
|
+
**Kokoro (9 languages, 20+ voices per language):**
|
|
1089
|
+
| Model | Language | Example Voices |
|
|
1090
|
+
|---|---|---|
|
|
1091
|
+
| `fal-ai/kokoro/american-english` | English (US) | af_heart, af_alloy, af_bella, af_nova, am_adam, am_echo, am_onyx |
|
|
1092
|
+
| `fal-ai/kokoro/british-english` | English (UK) | British voice set |
|
|
1093
|
+
| `fal-ai/kokoro/french` | French | French voice set |
|
|
1094
|
+
| `fal-ai/kokoro/japanese` | Japanese | Japanese voice set |
|
|
1095
|
+
| `fal-ai/kokoro/spanish` | Spanish | Spanish voice set |
|
|
1096
|
+
| `fal-ai/kokoro/mandarin-chinese` | Chinese | Mandarin voice set |
|
|
1097
|
+
| `fal-ai/kokoro/italian` | Italian | Italian voice set |
|
|
1098
|
+
| `fal-ai/kokoro/hindi` | Hindi | Hindi voice set |
|
|
1099
|
+
| `fal-ai/kokoro/brazilian-portuguese` | Portuguese | Portuguese voice set |
|
|
1100
|
+
|
|
1101
|
+
**ElevenLabs:**
|
|
1102
|
+
| Model | Description |
|
|
1103
|
+
|---|---|
|
|
1104
|
+
| `fal-ai/elevenlabs/tts/eleven-v3` | Professional quality |
|
|
1105
|
+
| `fal-ai/elevenlabs/tts/turbo-v2.5` | Faster inference |
|
|
1106
|
+
| `fal-ai/elevenlabs/tts/multilingual-v2` | Multi-language |
|
|
1107
|
+
| `fal-ai/elevenlabs/text-to-dialogue/eleven-v3` | Dialogue generation |
|
|
1108
|
+
| `fal-ai/elevenlabs/sound-effects/v2` | Sound effects |
|
|
1109
|
+
| `fal-ai/elevenlabs/speech-to-text` | Transcription |
|
|
1110
|
+
| `fal-ai/elevenlabs/audio-isolation` | Background removal |
|
|
1111
|
+
|
|
1112
|
+
**Other TTS:**
|
|
1113
|
+
`fal-ai/f5-tts` (voice cloning), `fal-ai/dia-tts`, `fal-ai/minimax/speech-2.6-turbo`, `fal-ai/minimax/speech-2.6-hd`, `fal-ai/chatterbox/text-to-speech`, `fal-ai/index-tts-2/text-to-speech`
|
|
1114
|
+
|
|
1115
|
+
#### FAL Client Capabilities
|
|
1116
|
+
|
|
1117
|
+
The `@fal-ai/client` provides additional features beyond what Noosphere surfaces:
|
|
1118
|
+
|
|
1119
|
+
- **Queue API** — Submit jobs, poll status, get results, cancel. Supports webhooks and priority levels
|
|
1120
|
+
- **Streaming API** — Real-time streaming responses via async iterators
|
|
1121
|
+
- **Realtime API** — WebSocket connections for interactive use (e.g., real-time image generation)
|
|
1122
|
+
- **Storage API** — File upload with configurable TTL (1h, 1d, 7d, 30d, 1y, never)
|
|
1123
|
+
- **Retry logic** — Configurable retries with exponential backoff and jitter
|
|
1124
|
+
- **Request middleware** — Custom request interceptors and proxy support
|
|
1125
|
+
|
|
1126
|
+
---
|
|
1127
|
+
|
|
1128
|
+
### Hugging Face — Open Source AI (30+ tasks)
|
|
1129
|
+
|
|
1130
|
+
**Provider ID:** `huggingface`
|
|
1131
|
+
**Modalities:** LLM, Image, TTS
|
|
1132
|
+
**Library:** `@huggingface/inference`
|
|
1133
|
+
|
|
1134
|
+
Access to the entire Hugging Face Hub ecosystem. Any model hosted on HuggingFace can be used by passing its ID directly.
|
|
1135
|
+
|
|
1136
|
+
#### Default Models
|
|
1137
|
+
|
|
1138
|
+
| Modality | Default Model | Description |
|
|
1139
|
+
|---|---|---|
|
|
1140
|
+
| LLM | `meta-llama/Llama-3.1-8B-Instruct` | Llama 3.1 8B |
|
|
1141
|
+
| Image | `stabilityai/stable-diffusion-xl-base-1.0` | SDXL Base |
|
|
1142
|
+
| TTS | `facebook/mms-tts-eng` | MMS TTS English |
|
|
1143
|
+
|
|
1144
|
+
Any HuggingFace model ID works — just pass it as the `model` parameter:
|
|
1145
|
+
|
|
1146
|
+
```typescript
|
|
1147
|
+
await ai.chat({
|
|
1148
|
+
provider: 'huggingface',
|
|
1149
|
+
model: 'mistralai/Mixtral-8x7B-v0.1',
|
|
1150
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
304
1151
|
});
|
|
305
1152
|
```
|
|
306
1153
|
|
|
307
|
-
|
|
1154
|
+
#### Full Library Capabilities
|
|
1155
|
+
|
|
1156
|
+
The `@huggingface/inference` library (v3.15.0) provides 30+ AI tasks, including capabilities not yet surfaced by Noosphere:
|
|
1157
|
+
|
|
1158
|
+
**Natural Language Processing:**
|
|
1159
|
+
| Task | Method | Description |
|
|
1160
|
+
|---|---|---|
|
|
1161
|
+
| Chat | `chatCompletion()` | OpenAI-compatible chat completions |
|
|
1162
|
+
| Chat Streaming | `chatCompletionStream()` | Token-by-token streaming |
|
|
1163
|
+
| Text Generation | `textGeneration()` | Raw text completion |
|
|
1164
|
+
| Summarization | `summarization()` | Text summarization |
|
|
1165
|
+
| Translation | `translation()` | Language translation |
|
|
1166
|
+
| Question Answering | `questionAnswering()` | Extract answers from context |
|
|
1167
|
+
| Text Classification | `textClassification()` | Sentiment, topic classification |
|
|
1168
|
+
| Zero-Shot Classification | `zeroShotClassification()` | Classify without training |
|
|
1169
|
+
| Token Classification | `tokenClassification()` | NER, POS tagging |
|
|
1170
|
+
| Sentence Similarity | `sentenceSimilarity()` | Semantic similarity scores |
|
|
1171
|
+
| Feature Extraction | `featureExtraction()` | Text embeddings |
|
|
1172
|
+
| Fill Mask | `fillMask()` | Fill in masked tokens |
|
|
1173
|
+
| Table QA | `tableQuestionAnswering()` | Answer questions about tables |
|
|
1174
|
+
|
|
1175
|
+
**Computer Vision:**
|
|
1176
|
+
| Task | Method | Description |
|
|
1177
|
+
|---|---|---|
|
|
1178
|
+
| Text-to-Image | `textToImage()` | Generate images from text |
|
|
1179
|
+
| Image-to-Image | `imageToImage()` | Transform/edit images |
|
|
1180
|
+
| Image Captioning | `imageToText()` | Describe images |
|
|
1181
|
+
| Classification | `imageClassification()` | Classify image content |
|
|
1182
|
+
| Object Detection | `objectDetection()` | Detect and locate objects |
|
|
1183
|
+
| Segmentation | `imageSegmentation()` | Pixel-level segmentation |
|
|
1184
|
+
| Zero-Shot Image | `zeroShotImageClassification()` | Classify without training |
|
|
1185
|
+
| Text-to-Video | `textToVideo()` | Generate videos |
|
|
1186
|
+
|
|
1187
|
+
**Audio:**
|
|
1188
|
+
| Task | Method | Description |
|
|
1189
|
+
|---|---|---|
|
|
1190
|
+
| Text-to-Speech | `textToSpeech()` | Generate speech |
|
|
1191
|
+
| Speech-to-Text | `automaticSpeechRecognition()` | Transcription |
|
|
1192
|
+
| Audio Classification | `audioClassification()` | Classify sounds |
|
|
1193
|
+
| Audio-to-Audio | `audioToAudio()` | Source separation, enhancement |
|
|
1194
|
+
|
|
1195
|
+
**Multimodal:**
|
|
1196
|
+
| Task | Method | Description |
|
|
1197
|
+
|---|---|---|
|
|
1198
|
+
| Visual QA | `visualQuestionAnswering()` | Answer questions about images |
|
|
1199
|
+
| Document QA | `documentQuestionAnswering()` | Answer questions about documents |
|
|
1200
|
+
|
|
1201
|
+
**Tabular:**
|
|
1202
|
+
| Task | Method | Description |
|
|
1203
|
+
|---|---|---|
|
|
1204
|
+
| Classification | `tabularClassification()` | Classify tabular data |
|
|
1205
|
+
| Regression | `tabularRegression()` | Predict continuous values |
|
|
1206
|
+
|
|
1207
|
+
#### HuggingFace Agentic Features
|
|
1208
|
+
|
|
1209
|
+
- **Tool/Function Calling:** Full support via `tools` parameter with `tool_choice` control (auto/none/required)
|
|
1210
|
+
- **JSON Schema Responses:** `response_format: { type: 'json_schema', json_schema: {...} }`
|
|
1211
|
+
- **Reasoning:** `reasoning_effort` parameter (none/minimal/low/medium/high/xhigh)
|
|
1212
|
+
- **Multimodal Input:** Images via `image_url` content chunks in chat messages
|
|
1213
|
+
- **17 Inference Providers:** Route through Groq, Together, Fireworks, Replicate, Cerebras, Cohere, and more
|
|
1214
|
+
|
|
1215
|
+
---
|
|
1216
|
+
|
|
1217
|
+
### ComfyUI — Local Image Generation
|
|
1218
|
+
|
|
1219
|
+
**Provider ID:** `comfyui`
|
|
1220
|
+
**Modalities:** Image, Video (planned)
|
|
1221
|
+
**Type:** Local
|
|
1222
|
+
**Default Port:** 8188
|
|
308
1223
|
|
|
309
|
-
|
|
1224
|
+
Connects to a local ComfyUI instance for Stable Diffusion workflows.
|
|
1225
|
+
|
|
1226
|
+
#### How It Works
|
|
1227
|
+
|
|
1228
|
+
1. Clones a built-in txt2img workflow template (KSampler + SDXL pipeline)
|
|
1229
|
+
2. Injects your parameters (prompt, dimensions, seed, steps, guidance)
|
|
1230
|
+
3. POSTs the workflow to ComfyUI's `/prompt` endpoint
|
|
1231
|
+
4. Polls `/history/{promptId}` every second until completion (max 5 minutes)
|
|
1232
|
+
5. Fetches the generated image from `/view`
|
|
1233
|
+
6. Returns a PNG buffer
|
|
1234
|
+
|
|
1235
|
+
#### Configuration
|
|
310
1236
|
|
|
311
1237
|
```typescript
|
|
312
1238
|
const ai = new Noosphere({
|
|
313
|
-
autoDetectLocal: true, // default
|
|
314
1239
|
local: {
|
|
315
|
-
comfyui: {
|
|
316
|
-
|
|
317
|
-
|
|
1240
|
+
comfyui: {
|
|
1241
|
+
enabled: true,
|
|
1242
|
+
host: 'http://localhost',
|
|
1243
|
+
port: 8188,
|
|
1244
|
+
},
|
|
318
1245
|
},
|
|
319
1246
|
});
|
|
320
1247
|
```
|
|
321
1248
|
|
|
322
|
-
|
|
1249
|
+
#### Default Workflow
|
|
1250
|
+
|
|
1251
|
+
- **Checkpoint:** `sd_xl_base_1.0.safetensors`
|
|
1252
|
+
- **Sampler:** euler with normal scheduler
|
|
1253
|
+
- **Default Steps:** 20
|
|
1254
|
+
- **Default CFG/Guidance:** 7
|
|
1255
|
+
- **Default Size:** 1024x1024
|
|
1256
|
+
- **Max Size:** 2048x2048
|
|
1257
|
+
- **Output:** PNG
|
|
1258
|
+
|
|
1259
|
+
#### Models Exposed
|
|
1260
|
+
|
|
1261
|
+
| Model ID | Modality | Description |
|
|
1262
|
+
|---|---|---|
|
|
1263
|
+
| `comfyui-txt2img` | Image | Text-to-image via workflow |
|
|
1264
|
+
| `comfyui-txt2vid` | Video | Planned (requires AnimateDiff workflow) |
|
|
1265
|
+
|
|
1266
|
+
---
|
|
1267
|
+
|
|
1268
|
+
### Local TTS — Piper & Kokoro
|
|
1269
|
+
|
|
1270
|
+
**Provider IDs:** `piper`, `kokoro`
|
|
1271
|
+
**Modality:** TTS
|
|
1272
|
+
**Type:** Local
|
|
1273
|
+
|
|
1274
|
+
Connects to local OpenAI-compatible TTS servers.
|
|
1275
|
+
|
|
1276
|
+
#### Supported Engines
|
|
1277
|
+
|
|
1278
|
+
| Engine | Default Port | Health Check | Voice Discovery |
|
|
1279
|
+
|---|---|---|---|
|
|
1280
|
+
| Piper | 5500 | `GET /health` | `GET /voices` |
|
|
1281
|
+
| Kokoro | 5501 | `GET /health` | `GET /v1/models` (fallback) |
|
|
1282
|
+
|
|
1283
|
+
#### API
|
|
1284
|
+
|
|
1285
|
+
Uses the OpenAI-compatible TTS endpoint:
|
|
1286
|
+
|
|
1287
|
+
```
|
|
1288
|
+
POST /v1/audio/speech
|
|
1289
|
+
{
|
|
1290
|
+
"model": "tts-1",
|
|
1291
|
+
"input": "Hello world",
|
|
1292
|
+
"voice": "default",
|
|
1293
|
+
"speed": 1.0,
|
|
1294
|
+
"response_format": "mp3"
|
|
1295
|
+
}
|
|
1296
|
+
```
|
|
1297
|
+
|
|
1298
|
+
Supports `mp3`, `wav`, and `ogg` formats. Returns audio as a Buffer.
|
|
1299
|
+
|
|
1300
|
+
---
|
|
1301
|
+
|
|
1302
|
+
## Architecture
|
|
1303
|
+
|
|
1304
|
+
### Provider Resolution (Local-First)
|
|
1305
|
+
|
|
1306
|
+
When you call a generation method without specifying a provider, Noosphere resolves one automatically:
|
|
1307
|
+
|
|
1308
|
+
1. If `model` is specified without `provider` → looks up model in registry cache
|
|
1309
|
+
2. If a `default` is configured for the modality → uses that
|
|
1310
|
+
3. Otherwise → **local providers first**, then cloud providers
|
|
1311
|
+
|
|
1312
|
+
```
|
|
1313
|
+
resolveProvider(modality):
|
|
1314
|
+
1. Check user-specified provider ID → return if found
|
|
1315
|
+
2. Check configured defaults → return if found
|
|
1316
|
+
3. Scan all providers:
|
|
1317
|
+
→ Return first LOCAL provider supporting this modality
|
|
1318
|
+
→ Fallback to first CLOUD provider
|
|
1319
|
+
4. Throw NO_PROVIDER error
|
|
1320
|
+
```
|
|
1321
|
+
|
|
1322
|
+
### Retry & Failover Logic
|
|
1323
|
+
|
|
1324
|
+
```
|
|
1325
|
+
executeWithRetry(modality, provider, fn):
|
|
1326
|
+
for attempt = 0..maxRetries:
|
|
1327
|
+
try: return fn()
|
|
1328
|
+
catch:
|
|
1329
|
+
if error is retryable AND attempts remain:
|
|
1330
|
+
wait backoffMs * 2^attempt (exponential backoff)
|
|
1331
|
+
retry same provider
|
|
1332
|
+
if error is NOT GENERATION_FAILED AND failover enabled:
|
|
1333
|
+
try each alternative provider for this modality
|
|
1334
|
+
throw last error
|
|
1335
|
+
```
|
|
1336
|
+
|
|
1337
|
+
**Retryable errors (same provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT`, `GENERATION_FAILED`
|
|
1338
|
+
|
|
1339
|
+
**Failover-eligible errors (cross-provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT` (NOT `GENERATION_FAILED`)
|
|
1340
|
+
|
|
1341
|
+
### Model Registry & Caching
|
|
1342
|
+
|
|
1343
|
+
- Models are fetched from providers via `listModels()` and cached in memory
|
|
1344
|
+
- Cache TTL is configurable (default: 60 minutes)
|
|
1345
|
+
- `syncModels()` forces a refresh of all provider model lists
|
|
1346
|
+
- Registry tracks model → provider mappings for fast resolution
|
|
1347
|
+
|
|
1348
|
+
### Usage Tracking
|
|
1349
|
+
|
|
1350
|
+
Every API call (success or failure) records a `UsageEvent`:
|
|
1351
|
+
|
|
1352
|
+
```typescript
|
|
1353
|
+
interface UsageEvent {
|
|
1354
|
+
modality: 'llm' | 'image' | 'video' | 'tts';
|
|
1355
|
+
provider: string;
|
|
1356
|
+
model: string;
|
|
1357
|
+
cost: number; // USD
|
|
1358
|
+
latencyMs: number;
|
|
1359
|
+
input?: number; // tokens or characters
|
|
1360
|
+
output?: number; // tokens
|
|
1361
|
+
unit?: string;
|
|
1362
|
+
timestamp: string; // ISO 8601
|
|
1363
|
+
success: boolean;
|
|
1364
|
+
error?: string; // error message if failed
|
|
1365
|
+
metadata?: Record<string, unknown>;
|
|
1366
|
+
}
|
|
1367
|
+
```
|
|
1368
|
+
|
|
1369
|
+
---
|
|
323
1370
|
|
|
324
|
-
|
|
1371
|
+
## Error Handling
|
|
1372
|
+
|
|
1373
|
+
All errors are instances of `NoosphereError`:
|
|
325
1374
|
|
|
326
1375
|
```typescript
|
|
327
|
-
|
|
1376
|
+
import { NoosphereError } from 'noosphere';
|
|
1377
|
+
|
|
1378
|
+
try {
|
|
1379
|
+
await ai.chat({ messages: [{ role: 'user', content: 'Hello' }] });
|
|
1380
|
+
} catch (err) {
|
|
1381
|
+
if (err instanceof NoosphereError) {
|
|
1382
|
+
console.log(err.code); // error code
|
|
1383
|
+
console.log(err.provider); // which provider failed
|
|
1384
|
+
console.log(err.modality); // which modality
|
|
1385
|
+
console.log(err.model); // which model (if known)
|
|
1386
|
+
console.log(err.cause); // underlying error
|
|
1387
|
+
console.log(err.isRetryable()); // whether retry might help
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
328
1390
|
```
|
|
329
1391
|
|
|
330
|
-
|
|
1392
|
+
### Error Codes
|
|
331
1393
|
|
|
332
|
-
|
|
|
333
|
-
|
|
334
|
-
|
|
|
335
|
-
|
|
|
336
|
-
|
|
|
337
|
-
|
|
|
338
|
-
|
|
|
1394
|
+
| Code | Description | Retryable | Failover |
|
|
1395
|
+
|---|---|---|---|
|
|
1396
|
+
| `PROVIDER_UNAVAILABLE` | Provider is down or unreachable | Yes | Yes |
|
|
1397
|
+
| `RATE_LIMITED` | API rate limit exceeded | Yes | Yes |
|
|
1398
|
+
| `TIMEOUT` | Request exceeded timeout | Yes | Yes |
|
|
1399
|
+
| `GENERATION_FAILED` | Generation error (bad prompt, model issue) | Yes | No |
|
|
1400
|
+
| `AUTH_FAILED` | Invalid or missing API key | No | No |
|
|
1401
|
+
| `MODEL_NOT_FOUND` | Requested model doesn't exist | No | No |
|
|
1402
|
+
| `INVALID_INPUT` | Bad parameters or unsupported operation | No | No |
|
|
1403
|
+
| `NO_PROVIDER` | No provider available for the requested modality | No | No |
|
|
1404
|
+
|
|
1405
|
+
---
|
|
1406
|
+
|
|
1407
|
+
## Custom Providers
|
|
1408
|
+
|
|
1409
|
+
Extend Noosphere with your own providers:
|
|
1410
|
+
|
|
1411
|
+
```typescript
|
|
1412
|
+
import type { NoosphereProvider, ModelInfo, ChatOptions, NoosphereResult, Modality } from 'noosphere';
|
|
1413
|
+
|
|
1414
|
+
const myProvider: NoosphereProvider = {
|
|
1415
|
+
// Required properties
|
|
1416
|
+
id: 'my-provider',
|
|
1417
|
+
name: 'My Custom Provider',
|
|
1418
|
+
modalities: ['llm', 'image'] as Modality[],
|
|
1419
|
+
isLocal: false,
|
|
1420
|
+
|
|
1421
|
+
// Required methods
|
|
1422
|
+
async ping() { return true; },
|
|
1423
|
+
async listModels(modality?: Modality): Promise<ModelInfo[]> {
|
|
1424
|
+
return [{
|
|
1425
|
+
id: 'my-model',
|
|
1426
|
+
provider: 'my-provider',
|
|
1427
|
+
name: 'My Model',
|
|
1428
|
+
modality: 'llm',
|
|
1429
|
+
local: false,
|
|
1430
|
+
cost: { price: 1.0, unit: 'per_1m_tokens' },
|
|
1431
|
+
capabilities: {
|
|
1432
|
+
contextWindow: 128000,
|
|
1433
|
+
maxTokens: 4096,
|
|
1434
|
+
supportsVision: false,
|
|
1435
|
+
supportsStreaming: true,
|
|
1436
|
+
},
|
|
1437
|
+
}];
|
|
1438
|
+
},
|
|
1439
|
+
|
|
1440
|
+
// Optional methods — implement per modality
|
|
1441
|
+
async chat(options: ChatOptions): Promise<NoosphereResult> {
|
|
1442
|
+
const start = Date.now();
|
|
1443
|
+
// ... your implementation
|
|
1444
|
+
return {
|
|
1445
|
+
content: 'Response text',
|
|
1446
|
+
provider: 'my-provider',
|
|
1447
|
+
model: 'my-model',
|
|
1448
|
+
modality: 'llm',
|
|
1449
|
+
latencyMs: Date.now() - start,
|
|
1450
|
+
usage: { cost: 0.001, input: 100, output: 50, unit: 'tokens' },
|
|
1451
|
+
};
|
|
1452
|
+
},
|
|
1453
|
+
|
|
1454
|
+
// stream?(options): NoosphereStream
|
|
1455
|
+
// image?(options): Promise<NoosphereResult>
|
|
1456
|
+
// video?(options): Promise<NoosphereResult>
|
|
1457
|
+
// speak?(options): Promise<NoosphereResult>
|
|
1458
|
+
// dispose?(): Promise<void>
|
|
1459
|
+
};
|
|
1460
|
+
|
|
1461
|
+
ai.registerProvider(myProvider);
|
|
1462
|
+
```
|
|
1463
|
+
|
|
1464
|
+
---
|
|
1465
|
+
|
|
1466
|
+
## Provider Summary
|
|
1467
|
+
|
|
1468
|
+
| Provider | ID | Modalities | Type | Models | Library |
|
|
1469
|
+
|---|---|---|---|---|---|
|
|
1470
|
+
| Pi-AI Gateway | `pi-ai` | LLM | Cloud | 246+ | `@mariozechner/pi-ai` |
|
|
1471
|
+
| FAL.ai | `fal` | Image, Video, TTS | Cloud | 867+ | `@fal-ai/client` |
|
|
1472
|
+
| Hugging Face | `huggingface` | LLM, Image, TTS | Cloud | Unlimited (any HF model) | `@huggingface/inference` |
|
|
1473
|
+
| ComfyUI | `comfyui` | Image | Local | SDXL workflows | Direct HTTP |
|
|
1474
|
+
| Piper TTS | `piper` | TTS | Local | Piper voices | Direct HTTP |
|
|
1475
|
+
| Kokoro TTS | `kokoro` | TTS | Local | Kokoro voices | Direct HTTP |
|
|
339
1476
|
|
|
340
1477
|
## Requirements
|
|
341
1478
|
|