noosphere 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +820 -122
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,13 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
Unified AI creation engine — text, image, video, and audio generation across all providers through a single interface.
|
|
4
4
|
|
|
5
|
+
One import. Every model. Every modality.
|
|
6
|
+
|
|
5
7
|
## Features
|
|
6
8
|
|
|
7
|
-
- **
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
9
|
+
- **4 modalities** — LLM chat, image generation, video generation, and text-to-speech
|
|
10
|
+
- **246+ LLM models** — via Pi-AI gateway (OpenAI, Anthropic, Google, Groq, Mistral, xAI, Cerebras, OpenRouter)
|
|
11
|
+
- **867+ media endpoints** — via FAL (Flux, SDXL, Kling, Sora 2, VEO 3, Kokoro, ElevenLabs, and hundreds more)
|
|
12
|
+
- **30+ HuggingFace tasks** — LLM, image, TTS, translation, summarization, classification, and more
|
|
13
|
+
- **Local-first architecture** — Auto-detects ComfyUI, Ollama, Piper, and Kokoro on your machine
|
|
14
|
+
- **Agentic capabilities** — Tool use, function calling, reasoning/thinking, vision, and agent loops via Pi-AI
|
|
10
15
|
- **Failover & retry** — Automatic retries with exponential backoff and cross-provider failover
|
|
11
|
-
- **Usage tracking** —
|
|
16
|
+
- **Usage tracking** — Real-time cost, latency, and token tracking across all providers
|
|
12
17
|
- **TypeScript-first** — Full type definitions with ESM and CommonJS support
|
|
13
18
|
|
|
14
19
|
## Install
|
|
@@ -56,7 +61,7 @@ const audio = await ai.speak({
|
|
|
56
61
|
|
|
57
62
|
## Configuration
|
|
58
63
|
|
|
59
|
-
API keys are resolved from the constructor or environment variables:
|
|
64
|
+
API keys are resolved from the constructor config or environment variables (config takes priority):
|
|
60
65
|
|
|
61
66
|
```typescript
|
|
62
67
|
const ai = new Noosphere({
|
|
@@ -80,47 +85,118 @@ Or set environment variables:
|
|
|
80
85
|
|---|---|
|
|
81
86
|
| `OPENAI_API_KEY` | OpenAI |
|
|
82
87
|
| `ANTHROPIC_API_KEY` | Anthropic |
|
|
83
|
-
| `GEMINI_API_KEY` | Google |
|
|
84
|
-
| `FAL_KEY` | FAL |
|
|
88
|
+
| `GEMINI_API_KEY` | Google Gemini |
|
|
89
|
+
| `FAL_KEY` | FAL.ai |
|
|
85
90
|
| `HUGGINGFACE_TOKEN` | Hugging Face |
|
|
86
91
|
| `GROQ_API_KEY` | Groq |
|
|
87
92
|
| `MISTRAL_API_KEY` | Mistral |
|
|
88
|
-
| `XAI_API_KEY` | xAI |
|
|
93
|
+
| `XAI_API_KEY` | xAI (Grok) |
|
|
89
94
|
| `OPENROUTER_API_KEY` | OpenRouter |
|
|
90
95
|
|
|
91
|
-
|
|
96
|
+
### Full Configuration Reference
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
const ai = new Noosphere({
|
|
100
|
+
// API keys (or use env vars above)
|
|
101
|
+
keys: { /* ... */ },
|
|
102
|
+
|
|
103
|
+
// Default models per modality
|
|
104
|
+
defaults: {
|
|
105
|
+
llm: { provider: 'pi-ai', model: 'claude-sonnet-4-20250514' },
|
|
106
|
+
image: { provider: 'fal', model: 'fal-ai/flux/schnell' },
|
|
107
|
+
video: { provider: 'fal', model: 'fal-ai/kling-video/v2/master/text-to-video' },
|
|
108
|
+
tts: { provider: 'fal', model: 'fal-ai/kokoro/american-english' },
|
|
109
|
+
},
|
|
110
|
+
|
|
111
|
+
// Local service configuration
|
|
112
|
+
autoDetectLocal: true, // env: NOOSPHERE_AUTO_DETECT_LOCAL
|
|
113
|
+
local: {
|
|
114
|
+
ollama: { enabled: true, host: 'http://localhost', port: 11434 },
|
|
115
|
+
comfyui: { enabled: true, host: 'http://localhost', port: 8188 },
|
|
116
|
+
piper: { enabled: true, host: 'http://localhost', port: 5500 },
|
|
117
|
+
kokoro: { enabled: true, host: 'http://localhost', port: 5501 },
|
|
118
|
+
custom: [], // additional LocalServiceConfig[]
|
|
119
|
+
},
|
|
120
|
+
|
|
121
|
+
// Retry & failover
|
|
122
|
+
retry: {
|
|
123
|
+
maxRetries: 2, // default: 2
|
|
124
|
+
backoffMs: 1000, // default: 1000 (exponential: 1s, 2s, 4s...)
|
|
125
|
+
failover: true, // default: true — try other providers on failure
|
|
126
|
+
retryableErrors: ['PROVIDER_UNAVAILABLE', 'RATE_LIMITED', 'TIMEOUT'],
|
|
127
|
+
},
|
|
128
|
+
|
|
129
|
+
// Timeouts per modality (ms)
|
|
130
|
+
timeout: {
|
|
131
|
+
llm: 30000, // 30s
|
|
132
|
+
image: 120000, // 2min
|
|
133
|
+
video: 300000, // 5min
|
|
134
|
+
tts: 60000, // 1min
|
|
135
|
+
},
|
|
136
|
+
|
|
137
|
+
// Model discovery cache (minutes)
|
|
138
|
+
discoveryCacheTTL: 60, // env: NOOSPHERE_DISCOVERY_CACHE_TTL
|
|
139
|
+
|
|
140
|
+
// Real-time usage callback
|
|
141
|
+
onUsage: (event) => {
|
|
142
|
+
console.log(`${event.provider}/${event.model}: $${event.cost} (${event.latencyMs}ms)`);
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Local Service Environment Variables
|
|
148
|
+
|
|
149
|
+
| Variable | Default | Description |
|
|
150
|
+
|---|---|---|
|
|
151
|
+
| `OLLAMA_HOST` | `http://localhost` | Ollama server host |
|
|
152
|
+
| `OLLAMA_PORT` | `11434` | Ollama server port |
|
|
153
|
+
| `COMFYUI_HOST` | `http://localhost` | ComfyUI server host |
|
|
154
|
+
| `COMFYUI_PORT` | `8188` | ComfyUI server port |
|
|
155
|
+
| `PIPER_HOST` | `http://localhost` | Piper TTS server host |
|
|
156
|
+
| `PIPER_PORT` | `5500` | Piper TTS server port |
|
|
157
|
+
| `KOKORO_HOST` | `http://localhost` | Kokoro TTS server host |
|
|
158
|
+
| `KOKORO_PORT` | `5501` | Kokoro TTS server port |
|
|
159
|
+
| `NOOSPHERE_AUTO_DETECT_LOCAL` | `true` | Enable/disable local service auto-detection |
|
|
160
|
+
| `NOOSPHERE_DISCOVERY_CACHE_TTL` | `60` | Model cache TTL in minutes |
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## API Reference
|
|
92
165
|
|
|
93
166
|
### `new Noosphere(config?)`
|
|
94
167
|
|
|
95
|
-
Creates a new instance. Providers are initialized lazily on first
|
|
168
|
+
Creates a new instance. Providers are initialized lazily on first API call. Auto-detects local services via HTTP pings (2s timeout each).
|
|
96
169
|
|
|
97
|
-
### Generation
|
|
170
|
+
### Generation Methods
|
|
98
171
|
|
|
99
172
|
#### `ai.chat(options): Promise<NoosphereResult>`
|
|
100
173
|
|
|
101
|
-
Generate text with
|
|
174
|
+
Generate text with any LLM. Supports 246+ models across 8 providers.
|
|
102
175
|
|
|
103
176
|
```typescript
|
|
104
177
|
const result = await ai.chat({
|
|
105
|
-
provider: 'anthropic',
|
|
106
|
-
model: 'claude-sonnet-4-20250514',
|
|
178
|
+
provider: 'anthropic', // optional — auto-resolved if omitted
|
|
179
|
+
model: 'claude-sonnet-4-20250514', // optional — uses default or first available
|
|
107
180
|
messages: [
|
|
108
181
|
{ role: 'system', content: 'You are helpful.' },
|
|
109
182
|
{ role: 'user', content: 'Explain quantum computing' },
|
|
110
183
|
],
|
|
111
|
-
temperature: 0.7,
|
|
112
|
-
maxTokens: 1024,
|
|
113
|
-
jsonMode: false,
|
|
184
|
+
temperature: 0.7, // optional (0-2)
|
|
185
|
+
maxTokens: 1024, // optional
|
|
186
|
+
jsonMode: false, // optional
|
|
114
187
|
});
|
|
115
188
|
|
|
116
|
-
console.log(result.content);
|
|
117
|
-
console.log(result.thinking);
|
|
118
|
-
console.log(result.usage.cost);
|
|
189
|
+
console.log(result.content); // response text
|
|
190
|
+
console.log(result.thinking); // reasoning output (Claude, GPT-5, o3, Gemini, Grok-4)
|
|
191
|
+
console.log(result.usage.cost); // cost in USD
|
|
192
|
+
console.log(result.usage.input); // input tokens
|
|
193
|
+
console.log(result.usage.output); // output tokens
|
|
194
|
+
console.log(result.latencyMs); // response time in ms
|
|
119
195
|
```
|
|
120
196
|
|
|
121
197
|
#### `ai.stream(options): NoosphereStream`
|
|
122
198
|
|
|
123
|
-
Stream LLM responses.
|
|
199
|
+
Stream LLM responses token-by-token. Same options as `chat()`.
|
|
124
200
|
|
|
125
201
|
```typescript
|
|
126
202
|
const stream = ai.stream({
|
|
@@ -128,67 +204,95 @@ const stream = ai.stream({
|
|
|
128
204
|
});
|
|
129
205
|
|
|
130
206
|
for await (const event of stream) {
|
|
131
|
-
|
|
132
|
-
|
|
207
|
+
switch (event.type) {
|
|
208
|
+
case 'text_delta':
|
|
209
|
+
process.stdout.write(event.delta!);
|
|
210
|
+
break;
|
|
211
|
+
case 'thinking_delta':
|
|
212
|
+
console.log('[thinking]', event.delta);
|
|
213
|
+
break;
|
|
214
|
+
case 'done':
|
|
215
|
+
console.log('\n\nUsage:', event.result!.usage);
|
|
216
|
+
break;
|
|
217
|
+
case 'error':
|
|
218
|
+
console.error(event.error);
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
133
221
|
}
|
|
134
222
|
|
|
135
|
-
// Or
|
|
223
|
+
// Or consume the full result
|
|
136
224
|
const result = await stream.result();
|
|
225
|
+
|
|
226
|
+
// Abort at any time
|
|
227
|
+
stream.abort();
|
|
137
228
|
```
|
|
138
229
|
|
|
139
230
|
#### `ai.image(options): Promise<NoosphereResult>`
|
|
140
231
|
|
|
141
|
-
Generate images.
|
|
232
|
+
Generate images. Supports 200+ image models via FAL, HuggingFace, and ComfyUI.
|
|
142
233
|
|
|
143
234
|
```typescript
|
|
144
235
|
const result = await ai.image({
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
236
|
+
provider: 'fal', // optional
|
|
237
|
+
model: 'fal-ai/flux-2-pro', // optional
|
|
238
|
+
prompt: 'A futuristic cityscape at sunset',
|
|
239
|
+
negativePrompt: 'blurry, low quality', // optional
|
|
240
|
+
width: 1024, // optional
|
|
241
|
+
height: 768, // optional
|
|
242
|
+
seed: 42, // optional — reproducible results
|
|
243
|
+
steps: 30, // optional — inference steps (more = higher quality)
|
|
244
|
+
guidanceScale: 7.5, // optional — prompt adherence (higher = stricter)
|
|
152
245
|
});
|
|
153
246
|
|
|
154
|
-
console.log(result.url);
|
|
155
|
-
console.log(result.
|
|
247
|
+
console.log(result.url); // image URL (FAL)
|
|
248
|
+
console.log(result.buffer); // image Buffer (HuggingFace, ComfyUI)
|
|
249
|
+
console.log(result.media?.width); // actual dimensions
|
|
250
|
+
console.log(result.media?.height);
|
|
251
|
+
console.log(result.media?.format); // 'png'
|
|
156
252
|
```
|
|
157
253
|
|
|
158
254
|
#### `ai.video(options): Promise<NoosphereResult>`
|
|
159
255
|
|
|
160
|
-
Generate videos.
|
|
256
|
+
Generate videos. Supports 150+ video models via FAL (Kling, Sora 2, VEO 3, WAN, Pixverse, and more).
|
|
161
257
|
|
|
162
258
|
```typescript
|
|
163
259
|
const result = await ai.video({
|
|
260
|
+
provider: 'fal',
|
|
261
|
+
model: 'fal-ai/kling-video/v2/master/text-to-video',
|
|
164
262
|
prompt: 'A bird flying through clouds',
|
|
165
|
-
imageUrl: 'https://...',
|
|
166
|
-
duration: 5,
|
|
167
|
-
fps: 24,
|
|
168
|
-
width: 1280,
|
|
169
|
-
height: 720,
|
|
263
|
+
imageUrl: 'https://...', // optional — image-to-video
|
|
264
|
+
duration: 5, // optional — seconds
|
|
265
|
+
fps: 24, // optional
|
|
266
|
+
width: 1280, // optional
|
|
267
|
+
height: 720, // optional
|
|
170
268
|
});
|
|
171
269
|
|
|
172
|
-
console.log(result.url);
|
|
270
|
+
console.log(result.url); // video URL
|
|
271
|
+
console.log(result.media?.duration); // actual duration
|
|
272
|
+
console.log(result.media?.fps); // frames per second
|
|
273
|
+
console.log(result.media?.format); // 'mp4'
|
|
173
274
|
```
|
|
174
275
|
|
|
175
276
|
#### `ai.speak(options): Promise<NoosphereResult>`
|
|
176
277
|
|
|
177
|
-
Text-to-speech synthesis.
|
|
278
|
+
Text-to-speech synthesis. Supports 50+ TTS models via FAL, HuggingFace, Piper, and Kokoro.
|
|
178
279
|
|
|
179
280
|
```typescript
|
|
180
281
|
const result = await ai.speak({
|
|
282
|
+
provider: 'fal',
|
|
283
|
+
model: 'fal-ai/kokoro/american-english',
|
|
181
284
|
text: 'Hello world',
|
|
182
|
-
voice: '
|
|
183
|
-
language: 'en',
|
|
184
|
-
speed: 1.0,
|
|
185
|
-
format: 'mp3',
|
|
285
|
+
voice: 'af_heart', // optional — voice ID
|
|
286
|
+
language: 'en', // optional
|
|
287
|
+
speed: 1.0, // optional
|
|
288
|
+
format: 'mp3', // optional — 'mp3' | 'wav' | 'ogg'
|
|
186
289
|
});
|
|
187
290
|
|
|
188
|
-
|
|
291
|
+
console.log(result.buffer); // audio Buffer
|
|
292
|
+
console.log(result.url); // audio URL (FAL)
|
|
189
293
|
```
|
|
190
294
|
|
|
191
|
-
### Discovery
|
|
295
|
+
### Discovery Methods
|
|
192
296
|
|
|
193
297
|
#### `ai.getProviders(modality?): Promise<ProviderInfo[]>`
|
|
194
298
|
|
|
@@ -196,15 +300,16 @@ List available providers, optionally filtered by modality.
|
|
|
196
300
|
|
|
197
301
|
```typescript
|
|
198
302
|
const providers = await ai.getProviders('llm');
|
|
199
|
-
// [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount:
|
|
303
|
+
// [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount: 246 }]
|
|
200
304
|
```
|
|
201
305
|
|
|
202
306
|
#### `ai.getModels(modality?): Promise<ModelInfo[]>`
|
|
203
307
|
|
|
204
|
-
List all available models.
|
|
308
|
+
List all available models with full metadata.
|
|
205
309
|
|
|
206
310
|
```typescript
|
|
207
311
|
const models = await ai.getModels('image');
|
|
312
|
+
// Returns ModelInfo[] with id, provider, name, modality, local, cost, capabilities
|
|
208
313
|
```
|
|
209
314
|
|
|
210
315
|
#### `ai.getModel(provider, modelId): Promise<ModelInfo | null>`
|
|
@@ -213,58 +318,618 @@ Get details about a specific model.
|
|
|
213
318
|
|
|
214
319
|
#### `ai.syncModels(): Promise<SyncResult>`
|
|
215
320
|
|
|
216
|
-
Refresh model lists from all providers.
|
|
321
|
+
Refresh model lists from all providers. Returns sync count, per-provider breakdown, and any errors.
|
|
217
322
|
|
|
218
323
|
### Usage Tracking
|
|
219
324
|
|
|
220
325
|
#### `ai.getUsage(options?): UsageSummary`
|
|
221
326
|
|
|
222
|
-
Get aggregated usage statistics.
|
|
327
|
+
Get aggregated usage statistics with optional filtering.
|
|
223
328
|
|
|
224
329
|
```typescript
|
|
225
|
-
const usage = ai.getUsage({
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
330
|
+
const usage = ai.getUsage({
|
|
331
|
+
since: '2024-01-01', // optional — ISO date or Date object
|
|
332
|
+
until: '2024-12-31', // optional
|
|
333
|
+
provider: 'openai', // optional — filter by provider
|
|
334
|
+
modality: 'llm', // optional — filter by modality
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
console.log(usage.totalCost); // total USD spent
|
|
338
|
+
console.log(usage.totalRequests); // number of requests
|
|
339
|
+
console.log(usage.byProvider); // { openai: 2.50, anthropic: 1.20, fal: 0.30 }
|
|
340
|
+
console.log(usage.byModality); // { llm: 3.00, image: 0.70, video: 0.30, tts: 0.00 }
|
|
230
341
|
```
|
|
231
342
|
|
|
232
|
-
|
|
343
|
+
### Lifecycle
|
|
344
|
+
|
|
345
|
+
#### `ai.registerProvider(provider): void`
|
|
346
|
+
|
|
347
|
+
Register a custom provider (see [Custom Providers](#custom-providers)).
|
|
348
|
+
|
|
349
|
+
#### `ai.dispose(): Promise<void>`
|
|
350
|
+
|
|
351
|
+
Cleanup all provider resources, clear model cache, and reset usage tracker.
|
|
352
|
+
|
|
353
|
+
### NoosphereResult
|
|
354
|
+
|
|
355
|
+
Every generation method returns a `NoosphereResult`:
|
|
233
356
|
|
|
234
357
|
```typescript
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
358
|
+
interface NoosphereResult {
|
|
359
|
+
content?: string; // LLM response text
|
|
360
|
+
thinking?: string; // reasoning/thinking output (supported models)
|
|
361
|
+
url?: string; // media URL (images, videos, audio from cloud providers)
|
|
362
|
+
buffer?: Buffer; // media binary data (local providers, HuggingFace)
|
|
363
|
+
provider: string; // which provider handled the request
|
|
364
|
+
model: string; // which model was used
|
|
365
|
+
modality: Modality; // 'llm' | 'image' | 'video' | 'tts'
|
|
366
|
+
latencyMs: number; // request duration in milliseconds
|
|
367
|
+
usage: {
|
|
368
|
+
cost: number; // cost in USD
|
|
369
|
+
input?: number; // input tokens/characters
|
|
370
|
+
output?: number; // output tokens
|
|
371
|
+
unit?: string; // 'tokens' | 'characters' | 'per_image' | 'per_second' | 'free'
|
|
372
|
+
};
|
|
373
|
+
media?: {
|
|
374
|
+
width?: number; // image/video width
|
|
375
|
+
height?: number; // image/video height
|
|
376
|
+
duration?: number; // video/audio duration in seconds
|
|
377
|
+
format?: string; // 'png' | 'mp4' | 'mp3' | 'wav'
|
|
378
|
+
fps?: number; // video frames per second
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
---
|
|
384
|
+
|
|
385
|
+
## Providers In Depth
|
|
386
|
+
|
|
387
|
+
### Pi-AI — LLM Gateway (246+ models)
|
|
388
|
+
|
|
389
|
+
**Provider ID:** `pi-ai`
|
|
390
|
+
**Modalities:** LLM (chat + streaming)
|
|
391
|
+
**Library:** `@mariozechner/pi-ai`
|
|
392
|
+
|
|
393
|
+
A unified gateway that routes to 8 LLM providers through 4 different API protocols:
|
|
394
|
+
|
|
395
|
+
| API Protocol | Providers |
|
|
396
|
+
|---|---|
|
|
397
|
+
| `anthropic-messages` | Anthropic |
|
|
398
|
+
| `google-generative-ai` | Google |
|
|
399
|
+
| `openai-responses` | OpenAI (reasoning models) |
|
|
400
|
+
| `openai-completions` | OpenAI, xAI, Groq, Cerebras, Zai, OpenRouter |
|
|
401
|
+
|
|
402
|
+
#### Anthropic Models (19)
|
|
403
|
+
|
|
404
|
+
| Model | Context | Reasoning | Vision | Input Cost | Output Cost |
|
|
405
|
+
|---|---|---|---|---|---|
|
|
406
|
+
| `claude-opus-4-0` | 200k | Yes | Yes | $15/M | $75/M |
|
|
407
|
+
| `claude-opus-4-1` | 200k | Yes | Yes | $15/M | $75/M |
|
|
408
|
+
| `claude-sonnet-4-20250514` | 200k | Yes | Yes | $3/M | $15/M |
|
|
409
|
+
| `claude-sonnet-4-5-20250929` | 200k | Yes | Yes | $3/M | $15/M |
|
|
410
|
+
| `claude-3-7-sonnet-20250219` | 200k | Yes | Yes | $3/M | $15/M |
|
|
411
|
+
| `claude-3-5-sonnet-20241022` | 200k | No | Yes | $3/M | $15/M |
|
|
412
|
+
| `claude-haiku-4-5-20251001` | 200k | No | Yes | $0.80/M | $4/M |
|
|
413
|
+
| `claude-3-5-haiku-20241022` | 200k | No | Yes | $0.80/M | $4/M |
|
|
414
|
+
| `claude-3-haiku-20240307` | 200k | No | Yes | $0.25/M | $1.25/M |
|
|
415
|
+
| *...and 10 more variants* | | | | | |
|
|
416
|
+
|
|
417
|
+
#### OpenAI Models (24)
|
|
418
|
+
|
|
419
|
+
| Model | Context | Reasoning | Vision | Input Cost | Output Cost |
|
|
420
|
+
|---|---|---|---|---|---|
|
|
421
|
+
| `gpt-5` | 200k | Yes | Yes | $10/M | $30/M |
|
|
422
|
+
| `gpt-5-mini` | 200k | Yes | Yes | $2.50/M | $10/M |
|
|
423
|
+
| `gpt-4.1` | 128k | No | Yes | $2/M | $8/M |
|
|
424
|
+
| `gpt-4.1-mini` | 128k | No | Yes | $0.40/M | $1.60/M |
|
|
425
|
+
| `gpt-4.1-nano` | 128k | No | Yes | $0.10/M | $0.40/M |
|
|
426
|
+
| `gpt-4o` | 128k | No | Yes | $2.50/M | $10/M |
|
|
427
|
+
| `gpt-4o-mini` | 128k | No | Yes | $0.15/M | $0.60/M |
|
|
428
|
+
| `o3-pro` | 200k | Yes | Yes | $20/M | $80/M |
|
|
429
|
+
| `o3-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
|
|
430
|
+
| `o4-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
|
|
431
|
+
| `codex-mini-latest` | 200k | Yes | No | $1.50/M | $6/M |
|
|
432
|
+
| *...and 13 more variants* | | | | | |
|
|
433
|
+
|
|
434
|
+
#### Google Gemini Models (19)
|
|
435
|
+
|
|
436
|
+
| Model | Context | Reasoning | Vision | Cost |
|
|
437
|
+
|---|---|---|---|---|
|
|
438
|
+
| `gemini-2.5-flash` | 1M | Yes | Yes | $0.15-0.60/M |
|
|
439
|
+
| `gemini-2.5-pro` | 1M | Yes | Yes | $1.25-10/M |
|
|
440
|
+
| `gemini-2.0-flash` | 1M | No | Yes | $0.10-0.40/M |
|
|
441
|
+
| `gemini-2.0-flash-lite` | 1M | No | Yes | $0.025-0.10/M |
|
|
442
|
+
| `gemini-1.5-flash` | 1M | No | Yes | $0.075-0.30/M |
|
|
443
|
+
| `gemini-1.5-pro` | 2M | No | Yes | $1.25-5/M |
|
|
444
|
+
| *...and 13 more variants* | | | | |
|
|
445
|
+
|
|
446
|
+
#### xAI Grok Models (20)
|
|
447
|
+
|
|
448
|
+
| Model | Context | Reasoning | Vision | Input Cost |
|
|
449
|
+
|---|---|---|---|---|
|
|
450
|
+
| `grok-4` | 256k | Yes | Yes | $5/M |
|
|
451
|
+
| `grok-4-fast` | 256k | Yes | Yes | $3/M |
|
|
452
|
+
| `grok-3` | 131k | No | Yes | $3/M |
|
|
453
|
+
| `grok-3-fast` | 131k | No | Yes | $5/M |
|
|
454
|
+
| `grok-3-mini-fast-latest` | 131k | Yes | No | $0.30/M |
|
|
455
|
+
| `grok-2-vision` | 32k | No | Yes | $2/M |
|
|
456
|
+
| *...and 14 more variants* | | | | |
|
|
457
|
+
|
|
458
|
+
#### Groq Models (15)
|
|
459
|
+
|
|
460
|
+
| Model | Context | Cost |
|
|
461
|
+
|---|---|---|
|
|
462
|
+
| `llama-3.3-70b-versatile` | 128k | $0.59/M |
|
|
463
|
+
| `llama-3.1-8b-instant` | 128k | $0.05/M |
|
|
464
|
+
| `mistral-saba-24b` | 32k | $0.40/M |
|
|
465
|
+
| `qwen-qwq-32b` | 128k | $0.29/M |
|
|
466
|
+
| `deepseek-r1-distill-llama-70b` | 128k | $0.75/M |
|
|
467
|
+
| *...and 10 more* | | |
|
|
468
|
+
|
|
469
|
+
#### Cerebras Models (3)
|
|
470
|
+
|
|
471
|
+
`gpt-oss-120b`, `qwen-3-235b-a22b-instruct-2507`, `qwen-3-coder-480b`
|
|
472
|
+
|
|
473
|
+
#### Zai Models (5)
|
|
474
|
+
|
|
475
|
+
`glm-4.6`, `glm-4.5`, `glm-4.5-flash`, `glm-4.5v`, `glm-4.5-air`
|
|
476
|
+
|
|
477
|
+
#### OpenRouter (141 models)
|
|
478
|
+
|
|
479
|
+
Aggregator providing access to hundreds of additional models including Llama, Deepseek, Mistral, Qwen, and many more. Full list available via `ai.getModels('llm')`.
|
|
480
|
+
|
|
481
|
+
#### Agentic Capabilities (via Pi-AI library)
|
|
482
|
+
|
|
483
|
+
The underlying `@mariozechner/pi-ai` library exposes powerful agentic features. While Noosphere currently surfaces chat and streaming, the library provides:
|
|
484
|
+
|
|
485
|
+
**Tool Use / Function Calling:**
|
|
486
|
+
```typescript
|
|
487
|
+
// Supported across Anthropic, OpenAI, Google, xAI, Groq
|
|
488
|
+
// Tool definitions use TypeBox schemas for runtime validation
|
|
489
|
+
interface Tool<TParameters extends TSchema = TSchema> {
|
|
490
|
+
name: string;
|
|
491
|
+
description: string;
|
|
492
|
+
parameters: TParameters; // TypeBox schema — validated at runtime with AJV
|
|
493
|
+
}
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**Reasoning / Thinking:**
|
|
497
|
+
- **Anthropic:** `thinkingEnabled`, `thinkingBudgetTokens` — Claude Opus/Sonnet extended thinking
|
|
498
|
+
- **OpenAI:** `reasoningEffort` (minimal/low/medium/high) — o1/o3/o4/GPT-5 reasoning
|
|
499
|
+
- **Google:** `thinking.enabled`, `thinking.budgetTokens` — Gemini 2.5 thinking
|
|
500
|
+
- **xAI:** Grok-4 native reasoning
|
|
501
|
+
- Thinking blocks are automatically extracted and streamed as separate `thinking_delta` events
|
|
502
|
+
|
|
503
|
+
**Vision / Multimodal Input:**
|
|
504
|
+
```typescript
|
|
505
|
+
// Send images alongside text to vision-capable models
|
|
506
|
+
{
|
|
507
|
+
role: "user",
|
|
508
|
+
content: [
|
|
509
|
+
{ type: "text", text: "What's in this image?" },
|
|
510
|
+
{ type: "image", data: base64String, mimeType: "image/png" }
|
|
511
|
+
]
|
|
512
|
+
}
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
**Agent Loop:**
|
|
516
|
+
```typescript
|
|
517
|
+
// Built-in agentic execution loop with automatic tool calling
|
|
518
|
+
import { agentLoop } from '@mariozechner/pi-ai';
|
|
519
|
+
|
|
520
|
+
const events = agentLoop(prompt, context, {
|
|
521
|
+
tools: [myTool],
|
|
522
|
+
model: getModel('anthropic', 'claude-sonnet-4-20250514'),
|
|
239
523
|
});
|
|
524
|
+
|
|
525
|
+
for await (const event of events) {
|
|
526
|
+
// event.type: agent_start → turn_start → message_start →
|
|
527
|
+
// message_update → tool_execution_start → tool_execution_end →
|
|
528
|
+
// message_end → turn_end → agent_end
|
|
529
|
+
}
|
|
240
530
|
```
|
|
241
531
|
|
|
242
|
-
|
|
532
|
+
**Cost Tracking per Model:**
|
|
533
|
+
```typescript
|
|
534
|
+
// Costs tracked per 1M tokens with cache-aware pricing
|
|
535
|
+
{
|
|
536
|
+
input: number, // cost per 1M input tokens
|
|
537
|
+
output: number, // cost per 1M output tokens
|
|
538
|
+
cacheRead: number, // prompt cache hit cost
|
|
539
|
+
cacheWrite: number, // prompt cache write cost
|
|
540
|
+
}
|
|
541
|
+
```
|
|
542
|
+
|
|
543
|
+
---
|
|
544
|
+
|
|
545
|
+
### FAL — Media Generation (867+ endpoints)
|
|
546
|
+
|
|
547
|
+
**Provider ID:** `fal`
|
|
548
|
+
**Modalities:** Image, Video, TTS
|
|
549
|
+
**Library:** `@fal-ai/client`
|
|
550
|
+
|
|
551
|
+
The largest media generation provider with dynamic pricing fetched at runtime from `https://api.fal.ai/v1/models/pricing`.
|
|
552
|
+
|
|
553
|
+
#### Image Models (200+)
|
|
554
|
+
|
|
555
|
+
**FLUX Family (20+ variants):**
|
|
556
|
+
| Model | Description |
|
|
557
|
+
|---|---|
|
|
558
|
+
| `fal-ai/flux/schnell` | Fast generation (default) |
|
|
559
|
+
| `fal-ai/flux/dev` | Higher quality |
|
|
560
|
+
| `fal-ai/flux-2` | Next generation |
|
|
561
|
+
| `fal-ai/flux-2-pro` | Professional quality |
|
|
562
|
+
| `fal-ai/flux-2-flex` | Flexible variant |
|
|
563
|
+
| `fal-ai/flux-2/edit` | Image editing |
|
|
564
|
+
| `fal-ai/flux-2/lora` | LoRA fine-tuning |
|
|
565
|
+
| `fal-ai/flux-pro/v1.1-ultra` | Ultra high quality |
|
|
566
|
+
| `fal-ai/flux-pro/kontext` | Context-aware generation |
|
|
567
|
+
| `fal-ai/flux-lora` | Custom style training |
|
|
568
|
+
| `fal-ai/flux-vision-upscaler` | AI upscaling |
|
|
569
|
+
| `fal-ai/flux-krea-trainer` | Model training |
|
|
570
|
+
| `fal-ai/flux-lora-fast-training` | Fast fine-tuning |
|
|
571
|
+
| `fal-ai/flux-lora-portrait-trainer` | Portrait specialist |
|
|
572
|
+
|
|
573
|
+
**Stable Diffusion:**
|
|
574
|
+
`fal-ai/stable-diffusion-v15`, `fal-ai/stable-diffusion-v35-large`, `fal-ai/stable-diffusion-v35-medium`, `fal-ai/stable-diffusion-v3-medium`
|
|
575
|
+
|
|
576
|
+
**Other Image Models:**
|
|
577
|
+
| Model | Description |
|
|
578
|
+
|---|---|
|
|
579
|
+
| `fal-ai/recraft/v3/text-to-image` | Artistic generation |
|
|
580
|
+
| `fal-ai/ideogram/v2`, `v2a`, `v3` | Ideogram series |
|
|
581
|
+
| `fal-ai/imagen3`, `fal-ai/imagen4/preview` | Google Imagen |
|
|
582
|
+
| `fal-ai/gpt-image-1` | GPT image generation |
|
|
583
|
+
| `fal-ai/gpt-image-1/edit-image` | GPT image editing |
|
|
584
|
+
| `fal-ai/reve/text-to-image` | Reve generation |
|
|
585
|
+
| `fal-ai/sana`, `fal-ai/sana/sprint` | Sana models |
|
|
586
|
+
| `fal-ai/pixart-sigma` | PixArt Sigma |
|
|
587
|
+
| `fal-ai/bria/text-to-image/base` | Bria AI |
|
|
588
|
+
|
|
589
|
+
**Pre-trained LoRA Styles:**
|
|
590
|
+
`fal-ai/flux-2-lora-gallery/sepia-vintage`, `virtual-tryon`, `satellite-view-style`, `realism`, `multiple-angles`, `hdr-style`, `face-to-full-portrait`, `digital-comic-art`, `ballpoint-pen-sketch`, `apartment-staging`, `add-background`
|
|
591
|
+
|
|
592
|
+
**Image Editing/Enhancement (30+ tools):**
|
|
593
|
+
`fal-ai/image-editing/age-progression`, `baby-version`, `background-change`, `hair-change`, `expression-change`, `object-removal`, `photo-restoration`, `style-transfer`, and many more.
|
|
594
|
+
|
|
595
|
+
#### Video Models (150+)
|
|
596
|
+
|
|
597
|
+
**Kling Video (20+ variants):**
|
|
598
|
+
| Model | Description |
|
|
599
|
+
|---|---|
|
|
600
|
+
| `fal-ai/kling-video/v2/master/text-to-video` | Default text-to-video |
|
|
601
|
+
| `fal-ai/kling-video/v2/master/image-to-video` | Image-to-video |
|
|
602
|
+
| `fal-ai/kling-video/v2.5-turbo/pro/text-to-video` | Turbo pro |
|
|
603
|
+
| `fal-ai/kling-video/o1/image-to-video` | O1 quality |
|
|
604
|
+
| `fal-ai/kling-video/o1/video-to-video/edit` | Video editing |
|
|
605
|
+
| `fal-ai/kling-video/lipsync/audio-to-video` | Lip sync |
|
|
606
|
+
| `fal-ai/kling-video/video-to-audio` | Audio extraction |
|
|
607
|
+
|
|
608
|
+
**Sora 2 (OpenAI):**
|
|
609
|
+
| Model | Description |
|
|
610
|
+
|---|---|
|
|
611
|
+
| `fal-ai/sora-2/text-to-video` | Text-to-video |
|
|
612
|
+
| `fal-ai/sora-2/text-to-video/pro` | Pro quality |
|
|
613
|
+
| `fal-ai/sora-2/image-to-video` | Image-to-video |
|
|
614
|
+
| `fal-ai/sora-2/video-to-video/remix` | Video remixing |
|
|
615
|
+
|
|
616
|
+
**VEO 3 (Google):**
|
|
617
|
+
| Model | Description |
|
|
618
|
+
|---|---|
|
|
619
|
+
| `fal-ai/veo3` | VEO 3 standard |
|
|
620
|
+
| `fal-ai/veo3/fast` | Fast variant |
|
|
621
|
+
| `fal-ai/veo3/image-to-video` | Image-to-video |
|
|
622
|
+
| `fal-ai/veo3.1` | Latest version |
|
|
623
|
+
| `fal-ai/veo3.1/reference-to-video` | Reference-guided |
|
|
624
|
+
| `fal-ai/veo3.1/first-last-frame-to-video` | Frame interpolation |
|
|
625
|
+
|
|
626
|
+
**WAN (15+ variants):**
|
|
627
|
+
`fal-ai/wan-pro/text-to-video`, `fal-ai/wan-pro/image-to-video`, `fal-ai/wan/v2.2-a14b/text-to-video`, `fal-ai/wan-vace-14b/depth`, `fal-ai/wan-vace-14b/inpainting`, `fal-ai/wan-vace-14b/pose`, `fal-ai/wan-effects`
|
|
628
|
+
|
|
629
|
+
**Pixverse (20+ variants):**
|
|
630
|
+
`fal-ai/pixverse/v5.5/text-to-video`, `fal-ai/pixverse/v5.5/image-to-video`, `fal-ai/pixverse/v5.5/effects`, `fal-ai/pixverse/lipsync`, `fal-ai/pixverse/sound-effects`
|
|
631
|
+
|
|
632
|
+
**Minimax / Hailuo:**
|
|
633
|
+
`fal-ai/minimax/hailuo-2.3/text-to-video/pro`, `fal-ai/minimax/hailuo-2.3/image-to-video/pro`, `fal-ai/minimax/video-01-director`, `fal-ai/minimax/video-01-live`
|
|
634
|
+
|
|
635
|
+
**Other Video Models:**
|
|
636
|
+
| Provider | Models |
|
|
637
|
+
|---|---|
|
|
638
|
+
| Hunyuan | `fal-ai/hunyuan-video/text-to-video`, `image-to-video`, `video-to-video`, `foley` |
|
|
639
|
+
| Pika | `fal-ai/pika/v2.2/text-to-video`, `pikascenes`, `pikaffects` |
|
|
640
|
+
| LTX | `fal-ai/ltx-2/text-to-video`, `image-to-video`, `retake-video` |
|
|
641
|
+
| Luma | `fal-ai/luma-dream-machine/ray-2`, `ray-2-flash`, `luma-photon` |
|
|
642
|
+
| Vidu | `fal-ai/vidu/q2/text-to-video`, `image-to-video/pro` |
|
|
643
|
+
| CogVideoX | `fal-ai/cogvideox-5b/text-to-video`, `video-to-video` |
|
|
644
|
+
| Seedance | `fal-ai/bytedance/seedance/v1/text-to-video`, `image-to-video` |
|
|
645
|
+
| Magi | `fal-ai/magi/text-to-video`, `extend-video` |
|
|
646
|
+
|
|
647
|
+
#### TTS / Speech Models (50+)
|
|
648
|
+
|
|
649
|
+
**Kokoro (9 languages, 20+ voices per language):**
|
|
650
|
+
| Model | Language | Example Voices |
|
|
651
|
+
|---|---|---|
|
|
652
|
+
| `fal-ai/kokoro/american-english` | English (US) | af_heart, af_alloy, af_bella, af_nova, am_adam, am_echo, am_onyx |
|
|
653
|
+
| `fal-ai/kokoro/british-english` | English (UK) | British voice set |
|
|
654
|
+
| `fal-ai/kokoro/french` | French | French voice set |
|
|
655
|
+
| `fal-ai/kokoro/japanese` | Japanese | Japanese voice set |
|
|
656
|
+
| `fal-ai/kokoro/spanish` | Spanish | Spanish voice set |
|
|
657
|
+
| `fal-ai/kokoro/mandarin-chinese` | Chinese | Mandarin voice set |
|
|
658
|
+
| `fal-ai/kokoro/italian` | Italian | Italian voice set |
|
|
659
|
+
| `fal-ai/kokoro/hindi` | Hindi | Hindi voice set |
|
|
660
|
+
| `fal-ai/kokoro/brazilian-portuguese` | Portuguese | Portuguese voice set |
|
|
661
|
+
|
|
662
|
+
**ElevenLabs:**
|
|
663
|
+
| Model | Description |
|
|
664
|
+
|---|---|
|
|
665
|
+
| `fal-ai/elevenlabs/tts/eleven-v3` | Professional quality |
|
|
666
|
+
| `fal-ai/elevenlabs/tts/turbo-v2.5` | Faster inference |
|
|
667
|
+
| `fal-ai/elevenlabs/tts/multilingual-v2` | Multi-language |
|
|
668
|
+
| `fal-ai/elevenlabs/text-to-dialogue/eleven-v3` | Dialogue generation |
|
|
669
|
+
| `fal-ai/elevenlabs/sound-effects/v2` | Sound effects |
|
|
670
|
+
| `fal-ai/elevenlabs/speech-to-text` | Transcription |
|
|
671
|
+
| `fal-ai/elevenlabs/audio-isolation` | Background removal |
|
|
672
|
+
|
|
673
|
+
**Other TTS:**
|
|
674
|
+
`fal-ai/f5-tts` (voice cloning), `fal-ai/dia-tts`, `fal-ai/minimax/speech-2.6-turbo`, `fal-ai/minimax/speech-2.6-hd`, `fal-ai/chatterbox/text-to-speech`, `fal-ai/index-tts-2/text-to-speech`
|
|
675
|
+
|
|
676
|
+
#### FAL Client Capabilities
|
|
677
|
+
|
|
678
|
+
The `@fal-ai/client` provides additional features beyond what Noosphere surfaces:
|
|
679
|
+
|
|
680
|
+
- **Queue API** — Submit jobs, poll status, get results, cancel. Supports webhooks and priority levels
|
|
681
|
+
- **Streaming API** — Real-time streaming responses via async iterators
|
|
682
|
+
- **Realtime API** — WebSocket connections for interactive use (e.g., real-time image generation)
|
|
683
|
+
- **Storage API** — File upload with configurable TTL (1h, 1d, 7d, 30d, 1y, never)
|
|
684
|
+
- **Retry logic** — Configurable retries with exponential backoff and jitter
|
|
685
|
+
- **Request middleware** — Custom request interceptors and proxy support
|
|
686
|
+
|
|
687
|
+
---
|
|
688
|
+
|
|
689
|
+
### Hugging Face — Open Source AI (30+ tasks)
|
|
243
690
|
|
|
244
|
-
|
|
691
|
+
**Provider ID:** `huggingface`
|
|
692
|
+
**Modalities:** LLM, Image, TTS
|
|
693
|
+
**Library:** `@huggingface/inference`
|
|
694
|
+
|
|
695
|
+
Access to the entire Hugging Face Hub ecosystem. Any model hosted on HuggingFace can be used by passing its ID directly.
|
|
696
|
+
|
|
697
|
+
#### Default Models
|
|
698
|
+
|
|
699
|
+
| Modality | Default Model | Description |
|
|
700
|
+
|---|---|---|
|
|
701
|
+
| LLM | `meta-llama/Llama-3.1-8B-Instruct` | Llama 3.1 8B |
|
|
702
|
+
| Image | `stabilityai/stable-diffusion-xl-base-1.0` | SDXL Base |
|
|
703
|
+
| TTS | `facebook/mms-tts-eng` | MMS TTS English |
|
|
704
|
+
|
|
705
|
+
Any HuggingFace model ID works — just pass it as the `model` parameter:
|
|
245
706
|
|
|
246
707
|
```typescript
|
|
247
|
-
|
|
708
|
+
await ai.chat({
|
|
709
|
+
provider: 'huggingface',
|
|
710
|
+
model: 'mistralai/Mixtral-8x7B-v0.1',
|
|
711
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
712
|
+
});
|
|
713
|
+
```
|
|
248
714
|
|
|
249
|
-
|
|
250
|
-
id: 'my-provider',
|
|
251
|
-
name: 'My Provider',
|
|
252
|
-
modalities: ['llm'],
|
|
253
|
-
isLocal: false,
|
|
715
|
+
#### Full Library Capabilities
|
|
254
716
|
|
|
255
|
-
|
|
256
|
-
|
|
717
|
+
The `@huggingface/inference` library (v3.15.0) provides 30+ AI tasks, including capabilities not yet surfaced by Noosphere:
|
|
718
|
+
|
|
719
|
+
**Natural Language Processing:**
|
|
720
|
+
| Task | Method | Description |
|
|
721
|
+
|---|---|---|
|
|
722
|
+
| Chat | `chatCompletion()` | OpenAI-compatible chat completions |
|
|
723
|
+
| Chat Streaming | `chatCompletionStream()` | Token-by-token streaming |
|
|
724
|
+
| Text Generation | `textGeneration()` | Raw text completion |
|
|
725
|
+
| Summarization | `summarization()` | Text summarization |
|
|
726
|
+
| Translation | `translation()` | Language translation |
|
|
727
|
+
| Question Answering | `questionAnswering()` | Extract answers from context |
|
|
728
|
+
| Text Classification | `textClassification()` | Sentiment, topic classification |
|
|
729
|
+
| Zero-Shot Classification | `zeroShotClassification()` | Classify without training |
|
|
730
|
+
| Token Classification | `tokenClassification()` | NER, POS tagging |
|
|
731
|
+
| Sentence Similarity | `sentenceSimilarity()` | Semantic similarity scores |
|
|
732
|
+
| Feature Extraction | `featureExtraction()` | Text embeddings |
|
|
733
|
+
| Fill Mask | `fillMask()` | Fill in masked tokens |
|
|
734
|
+
| Table QA | `tableQuestionAnswering()` | Answer questions about tables |
|
|
735
|
+
|
|
736
|
+
**Computer Vision:**
|
|
737
|
+
| Task | Method | Description |
|
|
738
|
+
|---|---|---|
|
|
739
|
+
| Text-to-Image | `textToImage()` | Generate images from text |
|
|
740
|
+
| Image-to-Image | `imageToImage()` | Transform/edit images |
|
|
741
|
+
| Image Captioning | `imageToText()` | Describe images |
|
|
742
|
+
| Classification | `imageClassification()` | Classify image content |
|
|
743
|
+
| Object Detection | `objectDetection()` | Detect and locate objects |
|
|
744
|
+
| Segmentation | `imageSegmentation()` | Pixel-level segmentation |
|
|
745
|
+
| Zero-Shot Image | `zeroShotImageClassification()` | Classify without training |
|
|
746
|
+
| Text-to-Video | `textToVideo()` | Generate videos |
|
|
747
|
+
|
|
748
|
+
**Audio:**
|
|
749
|
+
| Task | Method | Description |
|
|
750
|
+
|---|---|---|
|
|
751
|
+
| Text-to-Speech | `textToSpeech()` | Generate speech |
|
|
752
|
+
| Speech-to-Text | `automaticSpeechRecognition()` | Transcription |
|
|
753
|
+
| Audio Classification | `audioClassification()` | Classify sounds |
|
|
754
|
+
| Audio-to-Audio | `audioToAudio()` | Source separation, enhancement |
|
|
755
|
+
|
|
756
|
+
**Multimodal:**
|
|
757
|
+
| Task | Method | Description |
|
|
758
|
+
|---|---|---|
|
|
759
|
+
| Visual QA | `visualQuestionAnswering()` | Answer questions about images |
|
|
760
|
+
| Document QA | `documentQuestionAnswering()` | Answer questions about documents |
|
|
761
|
+
|
|
762
|
+
**Tabular:**
|
|
763
|
+
| Task | Method | Description |
|
|
764
|
+
|---|---|---|
|
|
765
|
+
| Classification | `tabularClassification()` | Classify tabular data |
|
|
766
|
+
| Regression | `tabularRegression()` | Predict continuous values |
|
|
767
|
+
|
|
768
|
+
#### HuggingFace Agentic Features
|
|
769
|
+
|
|
770
|
+
- **Tool/Function Calling:** Full support via `tools` parameter with `tool_choice` control (auto/none/required)
|
|
771
|
+
- **JSON Schema Responses:** `response_format: { type: 'json_schema', json_schema: {...} }`
|
|
772
|
+
- **Reasoning:** `reasoning_effort` parameter (none/minimal/low/medium/high/xhigh)
|
|
773
|
+
- **Multimodal Input:** Images via `image_url` content chunks in chat messages
|
|
774
|
+
- **17 Inference Providers:** Route through Groq, Together, Fireworks, Replicate, Cerebras, Cohere, and more
|
|
775
|
+
|
|
776
|
+
---
|
|
777
|
+
|
|
778
|
+
### ComfyUI — Local Image Generation
|
|
779
|
+
|
|
780
|
+
**Provider ID:** `comfyui`
|
|
781
|
+
**Modalities:** Image, Video (planned)
|
|
782
|
+
**Type:** Local
|
|
783
|
+
**Default Port:** 8188
|
|
784
|
+
|
|
785
|
+
Connects to a local ComfyUI instance for Stable Diffusion workflows.
|
|
786
|
+
|
|
787
|
+
#### How It Works
|
|
257
788
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
789
|
+
1. Clones a built-in txt2img workflow template (KSampler + SDXL pipeline)
|
|
790
|
+
2. Injects your parameters (prompt, dimensions, seed, steps, guidance)
|
|
791
|
+
3. POSTs the workflow to ComfyUI's `/prompt` endpoint
|
|
792
|
+
4. Polls `/history/{promptId}` every second until completion (max 5 minutes)
|
|
793
|
+
5. Fetches the generated image from `/view`
|
|
794
|
+
6. Returns a PNG buffer
|
|
795
|
+
|
|
796
|
+
#### Configuration
|
|
797
|
+
|
|
798
|
+
```typescript
|
|
799
|
+
const ai = new Noosphere({
|
|
800
|
+
local: {
|
|
801
|
+
comfyui: {
|
|
802
|
+
enabled: true,
|
|
803
|
+
host: 'http://localhost',
|
|
804
|
+
port: 8188,
|
|
805
|
+
},
|
|
261
806
|
},
|
|
262
|
-
};
|
|
807
|
+
});
|
|
808
|
+
```
|
|
263
809
|
|
|
264
|
-
|
|
810
|
+
#### Default Workflow
|
|
811
|
+
|
|
812
|
+
- **Checkpoint:** `sd_xl_base_1.0.safetensors`
|
|
813
|
+
- **Sampler:** euler with normal scheduler
|
|
814
|
+
- **Default Steps:** 20
|
|
815
|
+
- **Default CFG/Guidance:** 7
|
|
816
|
+
- **Default Size:** 1024x1024
|
|
817
|
+
- **Max Size:** 2048x2048
|
|
818
|
+
- **Output:** PNG
|
|
819
|
+
|
|
820
|
+
#### Models Exposed
|
|
821
|
+
|
|
822
|
+
| Model ID | Modality | Description |
|
|
823
|
+
|---|---|---|
|
|
824
|
+
| `comfyui-txt2img` | Image | Text-to-image via workflow |
|
|
825
|
+
| `comfyui-txt2vid` | Video | Planned (requires AnimateDiff workflow) |
|
|
826
|
+
|
|
827
|
+
---
|
|
828
|
+
|
|
829
|
+
### Local TTS — Piper & Kokoro
|
|
830
|
+
|
|
831
|
+
**Provider IDs:** `piper`, `kokoro`
|
|
832
|
+
**Modality:** TTS
|
|
833
|
+
**Type:** Local
|
|
834
|
+
|
|
835
|
+
Connects to local OpenAI-compatible TTS servers.
|
|
836
|
+
|
|
837
|
+
#### Supported Engines
|
|
838
|
+
|
|
839
|
+
| Engine | Default Port | Health Check | Voice Discovery |
|
|
840
|
+
|---|---|---|---|
|
|
841
|
+
| Piper | 5500 | `GET /health` | `GET /voices` |
|
|
842
|
+
| Kokoro | 5501 | `GET /health` | `GET /v1/models` (fallback) |
|
|
843
|
+
|
|
844
|
+
#### API
|
|
845
|
+
|
|
846
|
+
Uses the OpenAI-compatible TTS endpoint:
|
|
847
|
+
|
|
848
|
+
```
|
|
849
|
+
POST /v1/audio/speech
|
|
850
|
+
{
|
|
851
|
+
"model": "tts-1",
|
|
852
|
+
"input": "Hello world",
|
|
853
|
+
"voice": "default",
|
|
854
|
+
"speed": 1.0,
|
|
855
|
+
"response_format": "mp3"
|
|
856
|
+
}
|
|
857
|
+
```
|
|
858
|
+
|
|
859
|
+
Supports `mp3`, `wav`, and `ogg` formats. Returns audio as a Buffer.
|
|
860
|
+
|
|
861
|
+
---
|
|
862
|
+
|
|
863
|
+
## Architecture
|
|
864
|
+
|
|
865
|
+
### Provider Resolution (Local-First)
|
|
866
|
+
|
|
867
|
+
When you call a generation method without specifying a provider, Noosphere resolves one automatically:
|
|
868
|
+
|
|
869
|
+
1. If `model` is specified without `provider` → looks up model in registry cache
|
|
870
|
+
2. If a `default` is configured for the modality → uses that
|
|
871
|
+
3. Otherwise → **local providers first**, then cloud providers
|
|
872
|
+
|
|
873
|
+
```
|
|
874
|
+
resolveProvider(modality):
|
|
875
|
+
1. Check user-specified provider ID → return if found
|
|
876
|
+
2. Check configured defaults → return if found
|
|
877
|
+
3. Scan all providers:
|
|
878
|
+
→ Return first LOCAL provider supporting this modality
|
|
879
|
+
→ Fallback to first CLOUD provider
|
|
880
|
+
4. Throw NO_PROVIDER error
|
|
881
|
+
```
|
|
882
|
+
|
|
883
|
+
### Retry & Failover Logic
|
|
884
|
+
|
|
885
|
+
```
|
|
886
|
+
executeWithRetry(modality, provider, fn):
|
|
887
|
+
for attempt = 0..maxRetries:
|
|
888
|
+
try: return fn()
|
|
889
|
+
catch:
|
|
890
|
+
if error is retryable AND attempts remain:
|
|
891
|
+
wait backoffMs * 2^attempt (exponential backoff)
|
|
892
|
+
retry same provider
|
|
893
|
+
if error is NOT GENERATION_FAILED AND failover enabled:
|
|
894
|
+
try each alternative provider for this modality
|
|
895
|
+
throw last error
|
|
896
|
+
```
|
|
897
|
+
|
|
898
|
+
**Retryable errors (same provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT`, `GENERATION_FAILED`
|
|
899
|
+
|
|
900
|
+
**Failover-eligible errors (cross-provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT` (NOT `GENERATION_FAILED`)
|
|
901
|
+
|
|
902
|
+
### Model Registry & Caching
|
|
903
|
+
|
|
904
|
+
- Models are fetched from providers via `listModels()` and cached in memory
|
|
905
|
+
- Cache TTL is configurable (default: 60 minutes)
|
|
906
|
+
- `syncModels()` forces a refresh of all provider model lists
|
|
907
|
+
- Registry tracks model → provider mappings for fast resolution
|
|
908
|
+
|
|
909
|
+
### Usage Tracking
|
|
910
|
+
|
|
911
|
+
Every API call (success or failure) records a `UsageEvent`:
|
|
912
|
+
|
|
913
|
+
```typescript
|
|
914
|
+
interface UsageEvent {
|
|
915
|
+
modality: 'llm' | 'image' | 'video' | 'tts';
|
|
916
|
+
provider: string;
|
|
917
|
+
model: string;
|
|
918
|
+
cost: number; // USD
|
|
919
|
+
latencyMs: number;
|
|
920
|
+
input?: number; // tokens or characters
|
|
921
|
+
output?: number; // tokens
|
|
922
|
+
unit?: string;
|
|
923
|
+
timestamp: string; // ISO 8601
|
|
924
|
+
success: boolean;
|
|
925
|
+
error?: string; // error message if failed
|
|
926
|
+
metadata?: Record<string, unknown>;
|
|
927
|
+
}
|
|
265
928
|
```
|
|
266
929
|
|
|
267
|
-
|
|
930
|
+
---
|
|
931
|
+
|
|
932
|
+
## Error Handling
|
|
268
933
|
|
|
269
934
|
All errors are instances of `NoosphereError`:
|
|
270
935
|
|
|
@@ -275,67 +940,100 @@ try {
|
|
|
275
940
|
await ai.chat({ messages: [{ role: 'user', content: 'Hello' }] });
|
|
276
941
|
} catch (err) {
|
|
277
942
|
if (err instanceof NoosphereError) {
|
|
278
|
-
console.log(err.code);
|
|
279
|
-
console.log(err.provider);
|
|
280
|
-
console.log(err.modality);
|
|
281
|
-
console.log(err.
|
|
943
|
+
console.log(err.code); // error code
|
|
944
|
+
console.log(err.provider); // which provider failed
|
|
945
|
+
console.log(err.modality); // which modality
|
|
946
|
+
console.log(err.model); // which model (if known)
|
|
947
|
+
console.log(err.cause); // underlying error
|
|
948
|
+
console.log(err.isRetryable()); // whether retry might help
|
|
282
949
|
}
|
|
283
950
|
}
|
|
284
951
|
```
|
|
285
952
|
|
|
286
|
-
Error
|
|
953
|
+
### Error Codes
|
|
287
954
|
|
|
288
|
-
|
|
955
|
+
| Code | Description | Retryable | Failover |
|
|
956
|
+
|---|---|---|---|
|
|
957
|
+
| `PROVIDER_UNAVAILABLE` | Provider is down or unreachable | Yes | Yes |
|
|
958
|
+
| `RATE_LIMITED` | API rate limit exceeded | Yes | Yes |
|
|
959
|
+
| `TIMEOUT` | Request exceeded timeout | Yes | Yes |
|
|
960
|
+
| `GENERATION_FAILED` | Generation error (bad prompt, model issue) | Yes | No |
|
|
961
|
+
| `AUTH_FAILED` | Invalid or missing API key | No | No |
|
|
962
|
+
| `MODEL_NOT_FOUND` | Requested model doesn't exist | No | No |
|
|
963
|
+
| `INVALID_INPUT` | Bad parameters or unsupported operation | No | No |
|
|
964
|
+
| `NO_PROVIDER` | No provider available for the requested modality | No | No |
|
|
289
965
|
|
|
290
|
-
|
|
291
|
-
const ai = new Noosphere({
|
|
292
|
-
retry: {
|
|
293
|
-
maxRetries: 3, // default: 2
|
|
294
|
-
backoffMs: 2000, // default: 1000
|
|
295
|
-
failover: true, // default: true — try other providers on failure
|
|
296
|
-
retryableErrors: ['RATE_LIMITED', 'TIMEOUT', 'PROVIDER_UNAVAILABLE'],
|
|
297
|
-
},
|
|
298
|
-
timeout: {
|
|
299
|
-
llm: 30000, // 30s (default)
|
|
300
|
-
image: 120000, // 2min (default)
|
|
301
|
-
video: 300000, // 5min (default)
|
|
302
|
-
tts: 60000, // 1min (default)
|
|
303
|
-
},
|
|
304
|
-
});
|
|
305
|
-
```
|
|
966
|
+
---
|
|
306
967
|
|
|
307
|
-
|
|
968
|
+
## Custom Providers
|
|
308
969
|
|
|
309
|
-
Noosphere
|
|
970
|
+
Extend Noosphere with your own providers:
|
|
310
971
|
|
|
311
972
|
```typescript
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
973
|
+
import type { NoosphereProvider, ModelInfo, ChatOptions, NoosphereResult, Modality } from 'noosphere';
|
|
974
|
+
|
|
975
|
+
const myProvider: NoosphereProvider = {
|
|
976
|
+
// Required properties
|
|
977
|
+
id: 'my-provider',
|
|
978
|
+
name: 'My Custom Provider',
|
|
979
|
+
modalities: ['llm', 'image'] as Modality[],
|
|
980
|
+
isLocal: false,
|
|
981
|
+
|
|
982
|
+
// Required methods
|
|
983
|
+
async ping() { return true; },
|
|
984
|
+
async listModels(modality?: Modality): Promise<ModelInfo[]> {
|
|
985
|
+
return [{
|
|
986
|
+
id: 'my-model',
|
|
987
|
+
provider: 'my-provider',
|
|
988
|
+
name: 'My Model',
|
|
989
|
+
modality: 'llm',
|
|
990
|
+
local: false,
|
|
991
|
+
cost: { price: 1.0, unit: 'per_1m_tokens' },
|
|
992
|
+
capabilities: {
|
|
993
|
+
contextWindow: 128000,
|
|
994
|
+
maxTokens: 4096,
|
|
995
|
+
supportsVision: false,
|
|
996
|
+
supportsStreaming: true,
|
|
997
|
+
},
|
|
998
|
+
}];
|
|
318
999
|
},
|
|
319
|
-
});
|
|
320
|
-
```
|
|
321
1000
|
|
|
322
|
-
|
|
1001
|
+
// Optional methods — implement per modality
|
|
1002
|
+
async chat(options: ChatOptions): Promise<NoosphereResult> {
|
|
1003
|
+
const start = Date.now();
|
|
1004
|
+
// ... your implementation
|
|
1005
|
+
return {
|
|
1006
|
+
content: 'Response text',
|
|
1007
|
+
provider: 'my-provider',
|
|
1008
|
+
model: 'my-model',
|
|
1009
|
+
modality: 'llm',
|
|
1010
|
+
latencyMs: Date.now() - start,
|
|
1011
|
+
usage: { cost: 0.001, input: 100, output: 50, unit: 'tokens' },
|
|
1012
|
+
};
|
|
1013
|
+
},
|
|
323
1014
|
|
|
324
|
-
|
|
1015
|
+
// stream?(options): NoosphereStream
|
|
1016
|
+
// image?(options): Promise<NoosphereResult>
|
|
1017
|
+
// video?(options): Promise<NoosphereResult>
|
|
1018
|
+
// speak?(options): Promise<NoosphereResult>
|
|
1019
|
+
// dispose?(): Promise<void>
|
|
1020
|
+
};
|
|
325
1021
|
|
|
326
|
-
|
|
327
|
-
await ai.dispose();
|
|
1022
|
+
ai.registerProvider(myProvider);
|
|
328
1023
|
```
|
|
329
1024
|
|
|
330
|
-
|
|
1025
|
+
---
|
|
331
1026
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
|
335
|
-
|
|
336
|
-
|
|
|
337
|
-
|
|
|
338
|
-
|
|
|
1027
|
+
## Provider Summary
|
|
1028
|
+
|
|
1029
|
+
| Provider | ID | Modalities | Type | Models | Library |
|
|
1030
|
+
|---|---|---|---|---|---|
|
|
1031
|
+
| Pi-AI Gateway | `pi-ai` | LLM | Cloud | 246+ | `@mariozechner/pi-ai` |
|
|
1032
|
+
| FAL.ai | `fal` | Image, Video, TTS | Cloud | 867+ | `@fal-ai/client` |
|
|
1033
|
+
| Hugging Face | `huggingface` | LLM, Image, TTS | Cloud | Unlimited (any HF model) | `@huggingface/inference` |
|
|
1034
|
+
| ComfyUI | `comfyui` | Image | Local | SDXL workflows | Direct HTTP |
|
|
1035
|
+
| Piper TTS | `piper` | TTS | Local | Piper voices | Direct HTTP |
|
|
1036
|
+
| Kokoro TTS | `kokoro` | TTS | Local | Kokoro voices | Direct HTTP |
|
|
339
1037
|
|
|
340
1038
|
## Requirements
|
|
341
1039
|
|