noosphere 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +1255 -118
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -2,13 +2,18 @@
2
2
 
3
3
  Unified AI creation engine — text, image, video, and audio generation across all providers through a single interface.
4
4
 
5
+ One import. Every model. Every modality.
6
+
5
7
  ## Features
6
8
 
7
- - **Multi-modal** — LLM chat, image generation, video generation, and text-to-speech
8
- - **Multi-provider** — OpenAI, Anthropic, Google, Groq, Mistral, xAI, OpenRouter, FAL, Hugging Face
9
- - **Local-first** — Auto-detects ComfyUI, Ollama, Piper, and Kokoro running on your machine
9
+ - **4 modalities** — LLM chat, image generation, video generation, and text-to-speech
10
+ - **246+ LLM models** — via Pi-AI gateway (OpenAI, Anthropic, Google, Groq, Mistral, xAI, Cerebras, OpenRouter)
11
+ - **867+ media endpoints** — via FAL (Flux, SDXL, Kling, Sora 2, VEO 3, Kokoro, ElevenLabs, and hundreds more)
12
+ - **30+ HuggingFace tasks** — LLM, image, TTS, translation, summarization, classification, and more
13
+ - **Local-first architecture** — Auto-detects ComfyUI, Ollama, Piper, and Kokoro on your machine
14
+ - **Agentic capabilities** — Tool use, function calling, reasoning/thinking, vision, and agent loops via Pi-AI
10
15
  - **Failover & retry** — Automatic retries with exponential backoff and cross-provider failover
11
- - **Usage tracking** — Track costs, latency, and token counts across all providers
16
+ - **Usage tracking** — Real-time cost, latency, and token tracking across all providers
12
17
  - **TypeScript-first** — Full type definitions with ESM and CommonJS support
13
18
 
14
19
  ## Install
@@ -56,7 +61,7 @@ const audio = await ai.speak({
56
61
 
57
62
  ## Configuration
58
63
 
59
- API keys are resolved from the constructor or environment variables:
64
+ API keys are resolved from the constructor config or environment variables (config takes priority):
60
65
 
61
66
  ```typescript
62
67
  const ai = new Noosphere({
@@ -80,47 +85,118 @@ Or set environment variables:
80
85
  |---|---|
81
86
  | `OPENAI_API_KEY` | OpenAI |
82
87
  | `ANTHROPIC_API_KEY` | Anthropic |
83
- | `GEMINI_API_KEY` | Google |
84
- | `FAL_KEY` | FAL |
88
+ | `GEMINI_API_KEY` | Google Gemini |
89
+ | `FAL_KEY` | FAL.ai |
85
90
  | `HUGGINGFACE_TOKEN` | Hugging Face |
86
91
  | `GROQ_API_KEY` | Groq |
87
92
  | `MISTRAL_API_KEY` | Mistral |
88
- | `XAI_API_KEY` | xAI |
93
+ | `XAI_API_KEY` | xAI (Grok) |
89
94
  | `OPENROUTER_API_KEY` | OpenRouter |
90
95
 
91
- ## API
96
+ ### Full Configuration Reference
97
+
98
+ ```typescript
99
+ const ai = new Noosphere({
100
+ // API keys (or use env vars above)
101
+ keys: { /* ... */ },
102
+
103
+ // Default models per modality
104
+ defaults: {
105
+ llm: { provider: 'pi-ai', model: 'claude-sonnet-4-20250514' },
106
+ image: { provider: 'fal', model: 'fal-ai/flux/schnell' },
107
+ video: { provider: 'fal', model: 'fal-ai/kling-video/v2/master/text-to-video' },
108
+ tts: { provider: 'fal', model: 'fal-ai/kokoro/american-english' },
109
+ },
110
+
111
+ // Local service configuration
112
+ autoDetectLocal: true, // env: NOOSPHERE_AUTO_DETECT_LOCAL
113
+ local: {
114
+ ollama: { enabled: true, host: 'http://localhost', port: 11434 },
115
+ comfyui: { enabled: true, host: 'http://localhost', port: 8188 },
116
+ piper: { enabled: true, host: 'http://localhost', port: 5500 },
117
+ kokoro: { enabled: true, host: 'http://localhost', port: 5501 },
118
+ custom: [], // additional LocalServiceConfig[]
119
+ },
120
+
121
+ // Retry & failover
122
+ retry: {
123
+ maxRetries: 2, // default: 2
124
+ backoffMs: 1000, // default: 1000 (exponential: 1s, 2s, 4s...)
125
+ failover: true, // default: true — try other providers on failure
126
+ retryableErrors: ['PROVIDER_UNAVAILABLE', 'RATE_LIMITED', 'TIMEOUT'],
127
+ },
128
+
129
+ // Timeouts per modality (ms)
130
+ timeout: {
131
+ llm: 30000, // 30s
132
+ image: 120000, // 2min
133
+ video: 300000, // 5min
134
+ tts: 60000, // 1min
135
+ },
136
+
137
+ // Model discovery cache (minutes)
138
+ discoveryCacheTTL: 60, // env: NOOSPHERE_DISCOVERY_CACHE_TTL
139
+
140
+ // Real-time usage callback
141
+ onUsage: (event) => {
142
+ console.log(`${event.provider}/${event.model}: $${event.cost} (${event.latencyMs}ms)`);
143
+ },
144
+ });
145
+ ```
146
+
147
+ ### Local Service Environment Variables
148
+
149
+ | Variable | Default | Description |
150
+ |---|---|---|
151
+ | `OLLAMA_HOST` | `http://localhost` | Ollama server host |
152
+ | `OLLAMA_PORT` | `11434` | Ollama server port |
153
+ | `COMFYUI_HOST` | `http://localhost` | ComfyUI server host |
154
+ | `COMFYUI_PORT` | `8188` | ComfyUI server port |
155
+ | `PIPER_HOST` | `http://localhost` | Piper TTS server host |
156
+ | `PIPER_PORT` | `5500` | Piper TTS server port |
157
+ | `KOKORO_HOST` | `http://localhost` | Kokoro TTS server host |
158
+ | `KOKORO_PORT` | `5501` | Kokoro TTS server port |
159
+ | `NOOSPHERE_AUTO_DETECT_LOCAL` | `true` | Enable/disable local service auto-detection |
160
+ | `NOOSPHERE_DISCOVERY_CACHE_TTL` | `60` | Model cache TTL in minutes |
161
+
162
+ ---
163
+
164
+ ## API Reference
92
165
 
93
166
  ### `new Noosphere(config?)`
94
167
 
95
- Creates a new instance. Providers are initialized lazily on first use.
168
+ Creates a new instance. Providers are initialized lazily on first API call. Auto-detects local services via HTTP pings (2s timeout each).
96
169
 
97
- ### Generation
170
+ ### Generation Methods
98
171
 
99
172
  #### `ai.chat(options): Promise<NoosphereResult>`
100
173
 
101
- Generate text with an LLM.
174
+ Generate text with any LLM. Supports 246+ models across 8 providers.
102
175
 
103
176
  ```typescript
104
177
  const result = await ai.chat({
105
- provider: 'anthropic', // optional — auto-resolved if omitted
106
- model: 'claude-sonnet-4-20250514', // optional
178
+ provider: 'anthropic', // optional — auto-resolved if omitted
179
+ model: 'claude-sonnet-4-20250514', // optional — uses default or first available
107
180
  messages: [
108
181
  { role: 'system', content: 'You are helpful.' },
109
182
  { role: 'user', content: 'Explain quantum computing' },
110
183
  ],
111
- temperature: 0.7, // optional
112
- maxTokens: 1024, // optional
113
- jsonMode: false, // optional
184
+ temperature: 0.7, // optional (0-2)
185
+ maxTokens: 1024, // optional
186
+ jsonMode: false, // optional
114
187
  });
115
188
 
116
- console.log(result.content); // response text
117
- console.log(result.thinking); // reasoning (if supported)
118
- console.log(result.usage.cost); // cost in USD
189
+ console.log(result.content); // response text
190
+ console.log(result.thinking); // reasoning output (Claude, GPT-5, o3, Gemini, Grok-4)
191
+ console.log(result.usage.cost); // cost in USD
192
+ console.log(result.usage.input); // input tokens
193
+ console.log(result.usage.output); // output tokens
194
+ console.log(result.latencyMs); // response time in ms
119
195
  ```
120
196
 
121
197
  #### `ai.stream(options): NoosphereStream`
122
198
 
123
- Stream LLM responses.
199
+ Stream LLM responses token-by-token. Same options as `chat()`.
124
200
 
125
201
  ```typescript
126
202
  const stream = ai.stream({
@@ -128,67 +204,95 @@ const stream = ai.stream({
128
204
  });
129
205
 
130
206
  for await (const event of stream) {
131
- if (event.type === 'text_delta') process.stdout.write(event.delta!);
132
- if (event.type === 'thinking_delta') console.log('[thinking]', event.delta);
207
+ switch (event.type) {
208
+ case 'text_delta':
209
+ process.stdout.write(event.delta!);
210
+ break;
211
+ case 'thinking_delta':
212
+ console.log('[thinking]', event.delta);
213
+ break;
214
+ case 'done':
215
+ console.log('\n\nUsage:', event.result!.usage);
216
+ break;
217
+ case 'error':
218
+ console.error(event.error);
219
+ break;
220
+ }
133
221
  }
134
222
 
135
- // Or get the full result after streaming
223
+ // Or consume the full result
136
224
  const result = await stream.result();
225
+
226
+ // Abort at any time
227
+ stream.abort();
137
228
  ```
138
229
 
139
230
  #### `ai.image(options): Promise<NoosphereResult>`
140
231
 
141
- Generate images.
232
+ Generate images. Supports 200+ image models via FAL, HuggingFace, and ComfyUI.
142
233
 
143
234
  ```typescript
144
235
  const result = await ai.image({
145
- prompt: 'A futuristic cityscape',
146
- negativePrompt: 'blurry, low quality', // optional
147
- width: 1024, // optional
148
- height: 768, // optional
149
- seed: 42, // optional
150
- steps: 30, // optional
151
- guidanceScale: 7.5, // optional
236
+ provider: 'fal', // optional
237
+ model: 'fal-ai/flux-2-pro', // optional
238
+ prompt: 'A futuristic cityscape at sunset',
239
+ negativePrompt: 'blurry, low quality', // optional
240
+ width: 1024, // optional
241
+ height: 768, // optional
242
+ seed: 42, // optional — reproducible results
243
+ steps: 30, // optional — inference steps (more = higher quality)
244
+ guidanceScale: 7.5, // optional — prompt adherence (higher = stricter)
152
245
  });
153
246
 
154
- console.log(result.url); // image URL
155
- console.log(result.media?.width); // dimensions
247
+ console.log(result.url); // image URL (FAL)
248
+ console.log(result.buffer); // image Buffer (HuggingFace, ComfyUI)
249
+ console.log(result.media?.width); // actual dimensions
250
+ console.log(result.media?.height);
251
+ console.log(result.media?.format); // 'png'
156
252
  ```
157
253
 
158
254
  #### `ai.video(options): Promise<NoosphereResult>`
159
255
 
160
- Generate videos.
256
+ Generate videos. Supports 150+ video models via FAL (Kling, Sora 2, VEO 3, WAN, Pixverse, and more).
161
257
 
162
258
  ```typescript
163
259
  const result = await ai.video({
260
+ provider: 'fal',
261
+ model: 'fal-ai/kling-video/v2/master/text-to-video',
164
262
  prompt: 'A bird flying through clouds',
165
- imageUrl: 'https://...', // optional — image-to-video
166
- duration: 5, // optional — seconds
167
- fps: 24, // optional
168
- width: 1280, // optional
169
- height: 720, // optional
263
+ imageUrl: 'https://...', // optional — image-to-video
264
+ duration: 5, // optional — seconds
265
+ fps: 24, // optional
266
+ width: 1280, // optional
267
+ height: 720, // optional
170
268
  });
171
269
 
172
- console.log(result.url);
270
+ console.log(result.url); // video URL
271
+ console.log(result.media?.duration); // actual duration
272
+ console.log(result.media?.fps); // frames per second
273
+ console.log(result.media?.format); // 'mp4'
173
274
  ```
174
275
 
175
276
  #### `ai.speak(options): Promise<NoosphereResult>`
176
277
 
177
- Text-to-speech synthesis.
278
+ Text-to-speech synthesis. Supports 50+ TTS models via FAL, HuggingFace, Piper, and Kokoro.
178
279
 
179
280
  ```typescript
180
281
  const result = await ai.speak({
282
+ provider: 'fal',
283
+ model: 'fal-ai/kokoro/american-english',
181
284
  text: 'Hello world',
182
- voice: 'alloy', // optional
183
- language: 'en', // optional
184
- speed: 1.0, // optional
185
- format: 'mp3', // optional — 'mp3' | 'wav' | 'ogg'
285
+ voice: 'af_heart', // optional — voice ID
286
+ language: 'en', // optional
287
+ speed: 1.0, // optional
288
+ format: 'mp3', // optional — 'mp3' | 'wav' | 'ogg'
186
289
  });
187
290
 
188
- // result.buffer contains the audio data
291
+ console.log(result.buffer); // audio Buffer
292
+ console.log(result.url); // audio URL (FAL)
189
293
  ```
190
294
 
191
- ### Discovery
295
+ ### Discovery Methods
192
296
 
193
297
  #### `ai.getProviders(modality?): Promise<ProviderInfo[]>`
194
298
 
@@ -196,15 +300,16 @@ List available providers, optionally filtered by modality.
196
300
 
197
301
  ```typescript
198
302
  const providers = await ai.getProviders('llm');
199
- // [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount: 42 }]
303
+ // [{ id: 'pi-ai', name: 'Pi-AI', modalities: ['llm'], local: false, status: 'online', modelCount: 246 }]
200
304
  ```
201
305
 
202
306
  #### `ai.getModels(modality?): Promise<ModelInfo[]>`
203
307
 
204
- List all available models.
308
+ List all available models with full metadata.
205
309
 
206
310
  ```typescript
207
311
  const models = await ai.getModels('image');
312
+ // Returns ModelInfo[] with id, provider, name, modality, local, cost, capabilities
208
313
  ```
209
314
 
210
315
  #### `ai.getModel(provider, modelId): Promise<ModelInfo | null>`
@@ -213,129 +318,1161 @@ Get details about a specific model.
213
318
 
214
319
  #### `ai.syncModels(): Promise<SyncResult>`
215
320
 
216
- Refresh model lists from all providers.
321
+ Refresh model lists from all providers. Returns sync count, per-provider breakdown, and any errors.
217
322
 
218
323
  ### Usage Tracking
219
324
 
220
325
  #### `ai.getUsage(options?): UsageSummary`
221
326
 
222
- Get aggregated usage statistics.
327
+ Get aggregated usage statistics with optional filtering.
223
328
 
224
329
  ```typescript
225
- const usage = ai.getUsage({ since: '2024-01-01', provider: 'openai' });
226
- console.log(usage.totalCost); // total USD spent
227
- console.log(usage.totalRequests); // number of requests
228
- console.log(usage.byProvider); // { openai: 2.50, anthropic: 1.20 }
229
- console.log(usage.byModality); // { llm: 3.00, image: 0.70 }
330
+ const usage = ai.getUsage({
331
+ since: '2024-01-01', // optional ISO date or Date object
332
+ until: '2024-12-31', // optional
333
+ provider: 'openai', // optional filter by provider
334
+ modality: 'llm', // optional filter by modality
335
+ });
336
+
337
+ console.log(usage.totalCost); // total USD spent
338
+ console.log(usage.totalRequests); // number of requests
339
+ console.log(usage.byProvider); // { openai: 2.50, anthropic: 1.20, fal: 0.30 }
340
+ console.log(usage.byModality); // { llm: 3.00, image: 0.70, video: 0.30, tts: 0.00 }
230
341
  ```
231
342
 
232
- Real-time usage callback:
343
+ ### Lifecycle
344
+
345
+ #### `ai.registerProvider(provider): void`
346
+
347
+ Register a custom provider (see [Custom Providers](#custom-providers)).
348
+
349
+ #### `ai.dispose(): Promise<void>`
350
+
351
+ Cleanup all provider resources, clear model cache, and reset usage tracker.
352
+
353
+ ### NoosphereResult
354
+
355
+ Every generation method returns a `NoosphereResult`:
233
356
 
234
357
  ```typescript
235
- const ai = new Noosphere({
236
- onUsage: (event) => {
237
- console.log(`${event.provider}/${event.model}: $${event.cost} (${event.latencyMs}ms)`);
358
+ interface NoosphereResult {
359
+ content?: string; // LLM response text
360
+ thinking?: string; // reasoning/thinking output (supported models)
361
+ url?: string; // media URL (images, videos, audio from cloud providers)
362
+ buffer?: Buffer; // media binary data (local providers, HuggingFace)
363
+ provider: string; // which provider handled the request
364
+ model: string; // which model was used
365
+ modality: Modality; // 'llm' | 'image' | 'video' | 'tts'
366
+ latencyMs: number; // request duration in milliseconds
367
+ usage: {
368
+ cost: number; // cost in USD
369
+ input?: number; // input tokens/characters
370
+ output?: number; // output tokens
371
+ unit?: string; // 'tokens' | 'characters' | 'per_image' | 'per_second' | 'free'
372
+ };
373
+ media?: {
374
+ width?: number; // image/video width
375
+ height?: number; // image/video height
376
+ duration?: number; // video/audio duration in seconds
377
+ format?: string; // 'png' | 'mp4' | 'mp3' | 'wav'
378
+ fps?: number; // video frames per second
379
+ };
380
+ }
381
+ ```
382
+
383
+ ---
384
+
385
+ ## Providers In Depth
386
+
387
+ ### Pi-AI — LLM Gateway (246+ models)
388
+
389
+ **Provider ID:** `pi-ai`
390
+ **Modalities:** LLM (chat + streaming)
391
+ **Library:** `@mariozechner/pi-ai`
392
+
393
+ A unified gateway that routes to 8 LLM providers through 4 different API protocols:
394
+
395
+ | API Protocol | Providers |
396
+ |---|---|
397
+ | `anthropic-messages` | Anthropic |
398
+ | `google-generative-ai` | Google |
399
+ | `openai-responses` | OpenAI (reasoning models) |
400
+ | `openai-completions` | OpenAI, xAI, Groq, Cerebras, Zai, OpenRouter |
401
+
402
+ #### Anthropic Models (19)
403
+
404
+ | Model | Context | Reasoning | Vision | Input Cost | Output Cost |
405
+ |---|---|---|---|---|---|
406
+ | `claude-opus-4-0` | 200k | Yes | Yes | $15/M | $75/M |
407
+ | `claude-opus-4-1` | 200k | Yes | Yes | $15/M | $75/M |
408
+ | `claude-sonnet-4-20250514` | 200k | Yes | Yes | $3/M | $15/M |
409
+ | `claude-sonnet-4-5-20250929` | 200k | Yes | Yes | $3/M | $15/M |
410
+ | `claude-3-7-sonnet-20250219` | 200k | Yes | Yes | $3/M | $15/M |
411
+ | `claude-3-5-sonnet-20241022` | 200k | No | Yes | $3/M | $15/M |
412
+ | `claude-haiku-4-5-20251001` | 200k | No | Yes | $0.80/M | $4/M |
413
+ | `claude-3-5-haiku-20241022` | 200k | No | Yes | $0.80/M | $4/M |
414
+ | `claude-3-haiku-20240307` | 200k | No | Yes | $0.25/M | $1.25/M |
415
+ | *...and 10 more variants* | | | | | |
416
+
417
+ #### OpenAI Models (24)
418
+
419
+ | Model | Context | Reasoning | Vision | Input Cost | Output Cost |
420
+ |---|---|---|---|---|---|
421
+ | `gpt-5` | 200k | Yes | Yes | $10/M | $30/M |
422
+ | `gpt-5-mini` | 200k | Yes | Yes | $2.50/M | $10/M |
423
+ | `gpt-4.1` | 128k | No | Yes | $2/M | $8/M |
424
+ | `gpt-4.1-mini` | 128k | No | Yes | $0.40/M | $1.60/M |
425
+ | `gpt-4.1-nano` | 128k | No | Yes | $0.10/M | $0.40/M |
426
+ | `gpt-4o` | 128k | No | Yes | $2.50/M | $10/M |
427
+ | `gpt-4o-mini` | 128k | No | Yes | $0.15/M | $0.60/M |
428
+ | `o3-pro` | 200k | Yes | Yes | $20/M | $80/M |
429
+ | `o3-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
430
+ | `o4-mini` | 200k | Yes | Yes | $1.10/M | $4.40/M |
431
+ | `codex-mini-latest` | 200k | Yes | No | $1.50/M | $6/M |
432
+ | *...and 13 more variants* | | | | | |
433
+
434
+ #### Google Gemini Models (19)
435
+
436
+ | Model | Context | Reasoning | Vision | Cost |
437
+ |---|---|---|---|---|
438
+ | `gemini-2.5-flash` | 1M | Yes | Yes | $0.15-0.60/M |
439
+ | `gemini-2.5-pro` | 1M | Yes | Yes | $1.25-10/M |
440
+ | `gemini-2.0-flash` | 1M | No | Yes | $0.10-0.40/M |
441
+ | `gemini-2.0-flash-lite` | 1M | No | Yes | $0.025-0.10/M |
442
+ | `gemini-1.5-flash` | 1M | No | Yes | $0.075-0.30/M |
443
+ | `gemini-1.5-pro` | 2M | No | Yes | $1.25-5/M |
444
+ | *...and 13 more variants* | | | | |
445
+
446
+ #### xAI Grok Models (20)
447
+
448
+ | Model | Context | Reasoning | Vision | Input Cost |
449
+ |---|---|---|---|---|
450
+ | `grok-4` | 256k | Yes | Yes | $5/M |
451
+ | `grok-4-fast` | 256k | Yes | Yes | $3/M |
452
+ | `grok-3` | 131k | No | Yes | $3/M |
453
+ | `grok-3-fast` | 131k | No | Yes | $5/M |
454
+ | `grok-3-mini-fast-latest` | 131k | Yes | No | $0.30/M |
455
+ | `grok-2-vision` | 32k | No | Yes | $2/M |
456
+ | *...and 14 more variants* | | | | |
457
+
458
+ #### Groq Models (15)
459
+
460
+ | Model | Context | Cost |
461
+ |---|---|---|
462
+ | `llama-3.3-70b-versatile` | 128k | $0.59/M |
463
+ | `llama-3.1-8b-instant` | 128k | $0.05/M |
464
+ | `mistral-saba-24b` | 32k | $0.40/M |
465
+ | `qwen-qwq-32b` | 128k | $0.29/M |
466
+ | `deepseek-r1-distill-llama-70b` | 128k | $0.75/M |
467
+ | *...and 10 more* | | |
468
+
469
+ #### Cerebras Models (3)
470
+
471
+ `gpt-oss-120b`, `qwen-3-235b-a22b-instruct-2507`, `qwen-3-coder-480b`
472
+
473
+ #### Zai Models (5)
474
+
475
+ `glm-4.6`, `glm-4.5`, `glm-4.5-flash`, `glm-4.5v`, `glm-4.5-air`
476
+
477
+ #### OpenRouter (141 models)
478
+
479
+ Aggregator providing access to hundreds of additional models including Llama, Deepseek, Mistral, Qwen, and many more. Full list available via `ai.getModels('llm')`.
480
+
481
+ #### The Pi-AI Engine — Deep Dive
482
+
483
+ Noosphere's LLM provider is powered by `@mariozechner/pi-ai`, part of the **Pi mono-repo** by Mario Zechner (badlogic). Pi is NOT a wrapper like LangChain or Mastra — it's a **micro-framework for agentic AI** (~15K LOC, 4 npm packages) that was built from scratch as a minimalist alternative to Claude Code.
484
+
485
+ Pi consists of 4 packages in 3 tiers:
486
+
487
+ ```
488
+ TIER 1 — FOUNDATION
489
+ @mariozechner/pi-ai LLM API: stream(), complete(), model registry
490
+ 0 internal deps, talks to 20+ providers
491
+
492
+ TIER 2 — INFRASTRUCTURE
493
+ @mariozechner/pi-agent-core Agent loop, tool execution, lifecycle events
494
+ Depends on pi-ai
495
+
496
+ @mariozechner/pi-tui Terminal UI with differential rendering
497
+ Standalone, 0 internal deps
498
+
499
+ TIER 3 — APPLICATION
500
+ @mariozechner/pi-coding-agent CLI + SDK: sessions, compaction, extensions
501
+ Depends on all above
502
+ ```
503
+
504
+ Noosphere uses `@mariozechner/pi-ai` (Tier 1) directly for LLM access. But the full Pi ecosystem provides capabilities that can be layered on top.
505
+
506
+ ---
507
+
508
+ #### How Pi Keeps 200+ Models Updated
509
+
510
+ Pi does NOT hardcode models. It has an **auto-generation pipeline** that runs at build time:
511
+
512
+ ```
513
+ STEP 1: FETCH (3 sources in parallel)
514
+ ┌──────────────────┐ ┌──────────────────┐ ┌───────────────┐
515
+ │ models.dev │ │ OpenRouter │ │ Vercel AI │
516
+ │ /api.json │ │ /v1/models │ │ Gateway │
517
+ │ │ │ │ │ /v1/models │
518
+ │ Context windows │ │ Pricing ($/M) │ │ Capability │
519
+ │ Capabilities │ │ Availability │ │ tags │
520
+ │ Tool support │ │ Provider routing │ │ │
521
+ └────────┬─────────┘ └────────┬─────────┘ └──────┬────────┘
522
+ └─────────┬───────────┴────────────────────┘
523
+
524
+ STEP 2: MERGE & DEDUPLICATE
525
+ Priority: models.dev > OpenRouter > Vercel
526
+ Key: provider + modelId
527
+
528
+
529
+ STEP 3: FILTER
530
+ ✅ tool_call === true
531
+ ✅ streaming supported
532
+ ✅ system messages supported
533
+ ✅ not deprecated
534
+
535
+
536
+ STEP 4: NORMALIZE
537
+ Costs → $/million tokens
538
+ API type → one of 4 protocols
539
+ Input modes → ["text"] or ["text","image"]
540
+
541
+
542
+ STEP 5: PATCH (manual corrections)
543
+ Claude Opus: cache pricing fix
544
+ GPT-5.4: context window override
545
+ Kimi K2.5: hardcoded pricing
546
+
547
+
548
+ STEP 6: GENERATE TypeScript
549
+ → models.generated.ts (~330KB)
550
+ → 200+ models with full type safety
551
+ ```
552
+
553
+ Each generated model entry looks like:
554
+
555
+ ```typescript
556
+ {
557
+ id: "claude-opus-4-6",
558
+ name: "Claude Opus 4.6",
559
+ api: "anthropic-messages",
560
+ provider: "anthropic",
561
+ baseUrl: "https://api.anthropic.com",
562
+ reasoning: true,
563
+ input: ["text", "image"],
564
+ cost: {
565
+ input: 15, // $15/M tokens
566
+ output: 75, // $75/M tokens
567
+ cacheRead: 1.5, // prompt cache hit
568
+ cacheWrite: 18.75, // prompt cache write
238
569
  },
239
- });
570
+ contextWindow: 200_000,
571
+ maxTokens: 32_000,
572
+ } satisfies Model<"anthropic-messages">
240
573
  ```
241
574
 
242
- ### Custom Providers
575
+ When a new model is released (e.g., Gemini 3.0), it appears in models.dev/OpenRouter → the script captures it → a new Pi version is published → Noosphere updates its dependency.
576
+
577
+ ---
243
578
 
244
- Register your own provider by implementing the `NoosphereProvider` interface:
579
+ #### 4 API Protocols How Pi Talks to Every Provider
580
+
581
+ Pi abstracts all LLM providers into 4 wire protocols. Each protocol handles the differences in request format, streaming format, auth headers, and response parsing:
582
+
583
+ | Protocol | Providers | Key Differences |
584
+ |---|---|---|
585
+ | `anthropic-messages` | Anthropic, AWS Bedrock | `system` as top-level field, content as `[{type:"text", text:"..."}]` blocks, `x-api-key` auth, `anthropic-beta` headers |
586
+ | `openai-completions` | OpenAI, xAI, Groq, Cerebras, OpenRouter, Ollama, vLLM | `system` as message with `role:"system"`, content as string, `Authorization: Bearer` auth, `tool_calls` array |
587
+ | `openai-responses` | OpenAI (reasoning models) | New Responses API with server-side context, `store: true`, reasoning summaries |
588
+ | `google-generative-ai` | Google Gemini, Vertex AI | `systemInstruction.parts[{text}]`, role `"model"` instead of `"assistant"`, `functionCall` instead of `tool_calls`, `thinkingConfig` |
589
+
590
+ The core function `streamSimple()` detects which protocol to use based on `model.api` and handles all the formatting/parsing transparently:
245
591
 
246
592
  ```typescript
247
- import type { NoosphereProvider } from 'noosphere';
593
+ // What happens inside Pi when you call Noosphere's chat():
594
+ async function* streamSimple(
595
+ model: Model, // includes model.api to determine protocol
596
+ context: Context, // { systemPrompt, messages, tools }
597
+ options?: StreamOptions // { signal, onPayload, thinkingLevel, ... }
598
+ ): AsyncIterable<AssistantMessageEvent> {
599
+ // 1. Format request according to model.api protocol
600
+ // 2. Open SSE/WebSocket stream
601
+ // 3. Parse provider-specific chunks
602
+ // 4. Emit normalized events:
603
+ // → text_delta, thinking_delta, tool_call, message_end
604
+ }
605
+ ```
248
606
 
249
- const myProvider: NoosphereProvider = {
250
- id: 'my-provider',
251
- name: 'My Provider',
252
- modalities: ['llm'],
253
- isLocal: false,
607
+ ---
254
608
 
255
- async ping() { return true; },
256
- async listModels() { return [/* ... */]; },
609
+ #### Agentic Capabilities
257
610
 
258
- async chat(options) {
259
- // your implementation
260
- return { content: '...', provider: 'my-provider', model: '...', modality: 'llm', latencyMs: 100, usage: { cost: 0 } };
261
- },
611
+ These are the capabilities people get access to through the Pi-AI engine:
612
+
613
+ ##### 1. Tool Use / Function Calling
614
+
615
+ Full structured tool calling supported across **all major providers**. Tool definitions use TypeBox schemas with runtime validation via AJV:
616
+
617
+ ```typescript
618
+ import { type Tool, StringEnum } from '@mariozechner/pi-ai';
619
+ import { Type } from '@sinclair/typebox';
620
+
621
+ // Define a tool with typed parameters
622
+ const searchTool: Tool = {
623
+ name: 'web_search',
624
+ description: 'Search the web for information',
625
+ parameters: Type.Object({
626
+ query: Type.String({ description: 'Search query' }),
627
+ maxResults: Type.Optional(Type.Number({ default: 5 })),
628
+ type: StringEnum(['web', 'images', 'news'], { description: 'Search type' }),
629
+ }),
262
630
  };
263
631
 
264
- ai.registerProvider(myProvider);
632
+ // Pass tools in context — Pi handles the rest
633
+ const context = {
634
+ systemPrompt: 'You are a helpful assistant.',
635
+ messages: [{ role: 'user', content: 'Search for recent AI news' }],
636
+ tools: [searchTool],
637
+ };
265
638
  ```
266
639
 
267
- ### Error Handling
640
+ **How tool calling works internally:**
268
641
 
269
- All errors are instances of `NoosphereError`:
642
+ ```
643
+ User prompt → LLM → "I need to call web_search"
644
+
645
+
646
+ Pi validates arguments with AJV
647
+ against the TypeBox schema
648
+
649
+ ┌─────┴─────┐
650
+ │ Valid? │
651
+ ├─Yes───────┤
652
+ │ Execute │
653
+ │ tool │
654
+ ├───────────┤
655
+ │ No │
656
+ │ Return │
657
+ │ validation│
658
+ │ error to │
659
+ │ LLM │
660
+ └───────────┘
661
+
662
+
663
+ Tool result → back into context → LLM continues
664
+ ```
665
+
666
+ **Provider-specific tool_choice control:**
667
+ - **Anthropic:** `"auto" | "any" | "none" | { type: "tool", name: "specific_tool" }`
668
+ - **OpenAI:** `"auto" | "none" | "required" | { type: "function", function: { name: "..." } }`
669
+ - **Google:** `"auto" | "none" | "any"`
670
+
671
+ **Partial JSON streaming:** During streaming, Pi parses tool call arguments incrementally using partial JSON parsing. This means you can see tool arguments being built in real-time, not just after the tool call completes.
672
+
673
+ ##### 2. Reasoning / Extended Thinking
674
+
675
+ Pi provides **unified thinking support** across all providers that support it. Thinking blocks are automatically extracted, separated from regular text, and streamed as distinct events:
676
+
677
+ | Provider | Models | Control Parameters | How It Works |
678
+ |---|---|---|---|
679
+ | **Anthropic** | Claude Opus, Sonnet 4+ | `thinkingEnabled: boolean`, `thinkingBudgetTokens: number` | Extended thinking blocks in response, separate `thinking` content type |
680
+ | **OpenAI** | o1, o3, o4, GPT-5 | `reasoningEffort: "minimal" \| "low" \| "medium" \| "high"` | Reasoning via Responses API, `reasoningSummary: "auto" \| "detailed" \| "concise"` |
681
+ | **Google** | Gemini 2.5 Flash/Pro | `thinking.enabled: boolean`, `thinking.budgetTokens: number` | Thinking via `thinkingConfig`, mapped to effort levels |
682
+ | **xAI** | Grok-4, Grok-3-mini | Native reasoning | Automatic when model supports it |
683
+
684
+ **Cross-provider thinking portability:** When switching models mid-conversation, Pi converts thinking blocks between formats. Anthropic thinking blocks become `<thinking>` tagged text when sent to OpenAI/Google, and vice versa.
270
685
 
271
686
  ```typescript
272
- import { NoosphereError } from 'noosphere';
687
+ // Thinking is automatically extracted in Noosphere responses:
688
+ const result = await ai.chat({
689
+ model: 'claude-opus-4-6',
690
+ messages: [{ role: 'user', content: 'Solve this step by step: 15! / 13!' }],
691
+ });
273
692
 
274
- try {
275
- await ai.chat({ messages: [{ role: 'user', content: 'Hello' }] });
276
- } catch (err) {
277
- if (err instanceof NoosphereError) {
278
- console.log(err.code); // 'RATE_LIMITED' | 'TIMEOUT' | 'AUTH_FAILED' | ...
279
- console.log(err.provider); // which provider failed
280
- console.log(err.modality); // 'llm' | 'image' | 'video' | 'tts'
281
- console.log(err.isRetryable());
693
+ console.log(result.thinking); // "Let me work through this... 15! = 15 × 14 × 13!..."
694
+ console.log(result.content); // "15! / 13! = 15 × 14 = 210"
695
+
696
+ // During streaming, thinking arrives as separate events:
697
+ const stream = ai.stream({ messages: [...] });
698
+ for await (const event of stream) {
699
+ if (event.type === 'thinking_delta') console.log('[THINKING]', event.delta);
700
+ if (event.type === 'text_delta') console.log('[RESPONSE]', event.delta);
701
+ }
702
+ ```
703
+
704
+ ##### 3. Vision / Multimodal Input
705
+
706
+ Models with `input: ["text", "image"]` accept images alongside text. Pi handles the encoding and format differences per provider:
707
+
708
+ ```typescript
709
+ // Send images to vision-capable models
710
+ const messages = [{
711
+ role: 'user',
712
+ content: [
713
+ { type: 'text', text: 'What is in this image?' },
714
+ { type: 'image', data: base64PngString, mimeType: 'image/png' },
715
+ ],
716
+ }];
717
+
718
+ // Supported MIME types: image/png, image/jpeg, image/gif, image/webp
719
+ // Images are silently ignored when sent to non-vision models
720
+ ```
721
+
722
+ **Vision-capable models include:** All Claude models, all GPT-4o/GPT-5 models, Gemini models, Grok-2-vision, Grok-4, and select Groq models.
723
+
724
+ ##### 4. Agent Loop — Autonomous Tool Execution
725
+
726
+ The `@mariozechner/pi-agent-core` package provides a complete agent loop that automatically cycles through `prompt → LLM → tool call → result → repeat` until the task is done:
727
+
728
+ ```typescript
729
+ import { agentLoop } from '@mariozechner/pi-ai';
730
+
731
+ const events = agentLoop(userMessage, agentContext, {
732
+ model: getModel('anthropic', 'claude-opus-4-6'),
733
+ tools: [searchTool, readFileTool, writeFileTool],
734
+ signal: abortController.signal,
735
+ });
736
+
737
+ for await (const event of events) {
738
+ switch (event.type) {
739
+ case 'agent_start': // Agent begins
740
+ case 'turn_start': // New LLM turn begins
741
+ case 'message_start': // LLM starts responding
742
+ case 'message_update': // Text/thinking delta received
743
+ case 'tool_execution_start': // About to execute a tool
744
+ case 'tool_execution_end': // Tool finished, result available
745
+ case 'message_end': // LLM finished this message
746
+ case 'turn_end': // Turn complete (may loop if tools were called)
747
+ case 'agent_end': // All done, final messages available
282
748
  }
283
749
  }
284
750
  ```
285
751
 
286
- Error codes: `PROVIDER_UNAVAILABLE`, `MODEL_NOT_FOUND`, `AUTH_FAILED`, `RATE_LIMITED`, `TIMEOUT`, `GENERATION_FAILED`, `INVALID_INPUT`, `NO_PROVIDER`
752
+ **The agent loop state machine:**
287
753
 
288
- ### Retry & Failover
754
+ ```
755
+ [User sends prompt]
756
+
757
+
758
+ ┌─[Build Context]──▶ [Check Queues]──▶ [Stream LLM]◄── streamFn()
759
+ │ │
760
+ │ ┌─────┴──────┐
761
+ │ │ │
762
+ │ text tool_call
763
+ │ │ │
764
+ │ ▼ ▼
765
+ │ [Done] [Execute Tool]
766
+ │ │
767
+ │ tool result
768
+ │ │
769
+ └──────────────────────────────────────────────────┘
770
+ (loops back to Stream LLM)
771
+ ```
772
+
773
+ **Key design decisions:**
774
+ - Tools execute **sequentially** by default (parallelism can be added on top)
775
+ - The `streamFn` is **injectable** — you can wrap it with middleware to modify requests per-provider
776
+ - Tool arguments are **validated at runtime** using TypeBox + AJV before execution
777
+ - Aborted/failed responses preserve partial content and usage data
778
+ - Tool results are automatically added to the conversation context
779
+
780
+ ##### 5. The `streamFn` Pattern — Injectable Middleware
781
+
782
+ This is Pi's most powerful architectural feature. The `streamFn` is the function that actually talks to the LLM, and it can be **wrapped with middleware** like Express.js request handlers:
289
783
 
290
784
  ```typescript
291
- const ai = new Noosphere({
292
- retry: {
293
- maxRetries: 3, // default: 2
294
- backoffMs: 2000, // default: 1000
295
- failover: true, // default: true try other providers on failure
296
- retryableErrors: ['RATE_LIMITED', 'TIMEOUT', 'PROVIDER_UNAVAILABLE'],
785
+ import type { StreamFn } from '@mariozechner/pi-agent-core';
786
+ import { streamSimple } from '@mariozechner/pi-ai';
787
+
788
+ // Start with Pi's base streaming function
789
+ let fn: StreamFn = streamSimple;
790
+
791
+ // Wrap it with middleware that modifies requests per-provider
792
+ fn = createMyCustomWrapper(fn, {
793
+ // Add custom headers for Anthropic
794
+ onPayload: (payload) => {
795
+ if (model.provider === 'anthropic') {
796
+ payload.headers['anthropic-beta'] = 'fine-grained-tool-streaming-2025-05-14';
797
+ }
297
798
  },
298
- timeout: {
299
- llm: 30000, // 30s (default)
300
- image: 120000, // 2min (default)
301
- video: 300000, // 5min (default)
302
- tts: 60000, // 1min (default)
799
+ });
800
+
801
+ // Each wrapper calls the previous one, forming a chain:
802
+ // request wrapper3 → wrapper2 → wrapper1 → streamSimple → API
803
+ ```
804
+
805
+ This pattern is what allows projects like OpenClaw to stack **16 provider-specific wrappers** on top of Pi's base streaming — adding beta headers for Anthropic, WebSocket transport for OpenAI, thinking sanitization for Google, reasoning effort headers for OpenRouter, and more — without modifying Pi's source code.
806
+
807
+ ##### 6. Session Management (via pi-coding-agent)
808
+
809
+ The `@mariozechner/pi-coding-agent` package provides persistent session management with JSONL-based storage:
810
+
811
+ ```typescript
812
+ import { createAgentSession, SessionManager } from '@mariozechner/pi-coding-agent';
813
+
814
+ // Create a session with full persistence
815
+ const session = await createAgentSession({
816
+ model: 'claude-opus-4-6',
817
+ tools: myTools,
818
+ sessionManager, // handles JSONL persistence
819
+ });
820
+
821
+ const result = await session.run('Build a REST API');
822
+ // Session is automatically saved to:
823
+ // ~/.pi/agent/sessions/session_abc123.jsonl
824
+ ```
825
+
826
+ **Session file format (append-only JSONL):**
827
+ ```jsonl
828
+ {"role":"user","content":"Build a REST API","timestamp":1710000000}
829
+ {"role":"assistant","content":"I'll create...","model":"claude-opus-4-6","usage":{...}}
830
+ {"role":"toolResult","toolCallId":"tc_001","toolName":"bash","content":"OK"}
831
+ {"type":"compaction","summary":"The user asked to build...","preservedMessages":[...]}
832
+ ```
833
+
834
+ **Session operations:**
835
+ - `create()` — new session
836
+ - `open(id)` — restore existing session
837
+ - `continueRecent()` — continue the most recent session
838
+ - `forkFrom(id)` — create a branch (new JSONL referencing parent)
839
+ - `inMemory()` — RAM-only session (for SDK/testing)
840
+
841
+ ##### 7. Context Compaction — Automatic Context Window Management
842
+
843
+ When the conversation approaches the model's context window limit, Pi automatically **compacts** the history:
844
+
845
+ ```
846
+ 1. DETECT: Calculate inputTokens + outputTokens vs model.contextWindow
847
+ 2. TRIGGER: Proactively before overflow, or as recovery after overflow error
848
+ 3. SUMMARIZE: Send history to LLM with a compaction prompt
849
+ 4. WRITE: Append compaction entry to JSONL:
850
+ {"type":"compaction","summary":"...","preservedMessages":[last N messages]}
851
+ 5. CONTINUE: Context is now summary + recent messages instead of full history
852
+ ```
853
+
854
+ The JSONL file is **never rewritten** — compaction entries are appended, maintaining a complete audit trail.
855
+
856
+ ##### 8. Cost Tracking — Cache-Aware Pricing
857
+
858
+ Pi tracks costs per-request with cache-aware pricing for providers that support prompt caching:
859
+
860
+ ```typescript
861
+ // Every model has 4 cost dimensions:
862
+ {
863
+ input: 15, // $15 per 1M input tokens
864
+ output: 75, // $75 per 1M output tokens
865
+ cacheRead: 1.5, // $1.50 per 1M cached prompt tokens (read)
866
+ cacheWrite: 18.75, // $18.75 per 1M cached prompt tokens (write)
867
+ }
868
+
869
+ // Usage tracking on every response:
870
+ {
871
+ input: 1500, // tokens consumed as input
872
+ output: 800, // tokens generated
873
+ cacheRead: 5000, // prompt cache hits
874
+ cacheWrite: 1500, // prompt cache writes
875
+ cost: {
876
+ total: 0.082, // total cost in USD
877
+ input: 0.0225,
878
+ output: 0.06,
879
+ cacheRead: 0.0075,
880
+ cacheWrite: 0.028,
303
881
  },
882
+ }
883
+ ```
884
+
885
+ **Anthropic and OpenAI** support prompt caching. For providers without caching, `cacheRead` and `cacheWrite` are always 0.
886
+
887
+ ##### 9. Extension System (via pi-coding-agent)
888
+
889
+ Pi supports a plugin system where extensions can register tools, commands, and lifecycle hooks:
890
+
891
+ ```typescript
892
+ // Extensions are TypeScript modules loaded at runtime via jiti
893
+ export default function(api: ExtensionAPI) {
894
+ // Register a custom tool
895
+ api.registerTool('my_tool', {
896
+ description: 'Does something useful',
897
+ parameters: { /* TypeBox schema */ },
898
+ execute: async (args) => 'result',
899
+ });
900
+
901
+ // Register a slash command
902
+ api.registerCommand('/mycommand', {
903
+ handler: async (args) => { /* ... */ },
904
+ description: 'Custom command',
905
+ });
906
+
907
+ // Hook into the agent lifecycle
908
+ api.on('before_agent_start', async (context) => {
909
+ context.systemPrompt += '\nExtra instructions';
910
+ });
911
+
912
+ api.on('tool_execution_end', async (event) => {
913
+ // Post-process tool results
914
+ });
915
+ }
916
+ ```
917
+
918
+ **Resource discovery chain (priority):**
919
+ 1. Project `.pi/` directory (highest)
920
+ 2. User `~/.pi/agent/`
921
+ 3. npm packages with Pi metadata
922
+ 4. Built-in defaults
923
+
924
+ ##### 10. The Anti-MCP Philosophy — Why Pi Uses CLI Instead
925
+
926
+ Pi explicitly **rejects MCP** (Model Context Protocol). Mario Zechner's argument, backed by benchmarks:
927
+
928
+ **The token cost problem:**
929
+
930
+ | Approach | Tools | Tokens Consumed | % of Claude's Context |
931
+ |---|---|---|---|
932
+ | Playwright MCP | 21 tools | 13,700 tokens | 6.8% |
933
+ | Chrome DevTools MCP | 26 tools | 18,000 tokens | 9.0% |
934
+ | Pi CLI + README | N/A | 225 tokens | ~0.1% |
935
+
936
+ That's a **60-80x reduction** in token consumption. With 5 MCP servers, you lose ~55,000 tokens before doing any work.
937
+
938
+ **Benchmark results (120 evaluations):**
939
+
940
+ | Approach | Avg Cost | Success Rate |
941
+ |---|---|---|
942
+ | CLI (tmux) | $0.37 | 100% |
943
+ | CLI (terminalcp) | $0.39 | 100% |
944
+ | MCP (terminalcp) | $0.48 | 100% |
945
+
946
+ Same success rate, MCP costs **30% more**.
947
+
948
+ **Pi's alternative: Progressive Disclosure via CLI tools + READMEs**
949
+
950
+ Instead of loading all tool definitions upfront, Pi's agent has `bash` as a built-in tool and discovers CLI tools only when needed:
951
+
952
+ ```
953
+ MCP approach: Pi approach:
954
+ ───────────── ──────────
955
+ Session start → Session start →
956
+ Load 21 Playwright tools Load 4 tools: read, write, edit, bash
957
+ Load 26 Chrome DevTools tools (225 tokens)
958
+ Load N more MCP tools
959
+ (~55,000 tokens wasted)
960
+
961
+ When browser needed: When browser needed:
962
+ Tools already loaded Agent reads SKILL.md (225 tokens)
963
+ (but context is polluted) Runs: browser-start.js
964
+ Runs: browser-nav.js https://...
965
+ Runs: browser-screenshot.js
966
+
967
+ When browser NOT needed: When browser NOT needed:
968
+ Tools still consume context 0 tokens wasted
969
+ ```
970
+
971
+ **The 4 built-in tools** (what Pi argues is sufficient):
972
+
973
+ | Tool | What It Does | Why It's Enough |
974
+ |---|---|---|
975
+ | `read` | Read files (text + images) | Supports offset/limit for large files |
976
+ | `write` | Create/overwrite files | Creates directories automatically |
977
+ | `edit` | Replace text (oldText→newText) | Surgical edits, like a diff |
978
+ | `bash` | Execute any shell command | **bash can do everything else** — replaces MCP entirely |
979
+
980
+ The key insight: `bash` replaces MCP. Any CLI tool, API call, database query, or system operation can be invoked through bash. The agent reads the tool's README only when it needs it, paying tokens on-demand instead of upfront.
981
+
982
+ ---
983
+
984
+ ### FAL — Media Generation (867+ endpoints)
985
+
986
+ **Provider ID:** `fal`
987
+ **Modalities:** Image, Video, TTS
988
+ **Library:** `@fal-ai/client`
989
+
990
+ The largest media generation provider with dynamic pricing fetched at runtime from `https://api.fal.ai/v1/models/pricing`.
991
+
992
+ #### Image Models (200+)
993
+
994
+ **FLUX Family (20+ variants):**
995
+ | Model | Description |
996
+ |---|---|
997
+ | `fal-ai/flux/schnell` | Fast generation (default) |
998
+ | `fal-ai/flux/dev` | Higher quality |
999
+ | `fal-ai/flux-2` | Next generation |
1000
+ | `fal-ai/flux-2-pro` | Professional quality |
1001
+ | `fal-ai/flux-2-flex` | Flexible variant |
1002
+ | `fal-ai/flux-2/edit` | Image editing |
1003
+ | `fal-ai/flux-2/lora` | LoRA fine-tuning |
1004
+ | `fal-ai/flux-pro/v1.1-ultra` | Ultra high quality |
1005
+ | `fal-ai/flux-pro/kontext` | Context-aware generation |
1006
+ | `fal-ai/flux-lora` | Custom style training |
1007
+ | `fal-ai/flux-vision-upscaler` | AI upscaling |
1008
+ | `fal-ai/flux-krea-trainer` | Model training |
1009
+ | `fal-ai/flux-lora-fast-training` | Fast fine-tuning |
1010
+ | `fal-ai/flux-lora-portrait-trainer` | Portrait specialist |
1011
+
1012
+ **Stable Diffusion:**
1013
+ `fal-ai/stable-diffusion-v15`, `fal-ai/stable-diffusion-v35-large`, `fal-ai/stable-diffusion-v35-medium`, `fal-ai/stable-diffusion-v3-medium`
1014
+
1015
+ **Other Image Models:**
1016
+ | Model | Description |
1017
+ |---|---|
1018
+ | `fal-ai/recraft/v3/text-to-image` | Artistic generation |
1019
+ | `fal-ai/ideogram/v2`, `v2a`, `v3` | Ideogram series |
1020
+ | `fal-ai/imagen3`, `fal-ai/imagen4/preview` | Google Imagen |
1021
+ | `fal-ai/gpt-image-1` | GPT image generation |
1022
+ | `fal-ai/gpt-image-1/edit-image` | GPT image editing |
1023
+ | `fal-ai/reve/text-to-image` | Reve generation |
1024
+ | `fal-ai/sana`, `fal-ai/sana/sprint` | Sana models |
1025
+ | `fal-ai/pixart-sigma` | PixArt Sigma |
1026
+ | `fal-ai/bria/text-to-image/base` | Bria AI |
1027
+
1028
+ **Pre-trained LoRA Styles:**
1029
+ `fal-ai/flux-2-lora-gallery/sepia-vintage`, `virtual-tryon`, `satellite-view-style`, `realism`, `multiple-angles`, `hdr-style`, `face-to-full-portrait`, `digital-comic-art`, `ballpoint-pen-sketch`, `apartment-staging`, `add-background`
1030
+
1031
+ **Image Editing/Enhancement (30+ tools):**
1032
+ `fal-ai/image-editing/age-progression`, `baby-version`, `background-change`, `hair-change`, `expression-change`, `object-removal`, `photo-restoration`, `style-transfer`, and many more.
1033
+
1034
+ #### Video Models (150+)
1035
+
1036
+ **Kling Video (20+ variants):**
1037
+ | Model | Description |
1038
+ |---|---|
1039
+ | `fal-ai/kling-video/v2/master/text-to-video` | Default text-to-video |
1040
+ | `fal-ai/kling-video/v2/master/image-to-video` | Image-to-video |
1041
+ | `fal-ai/kling-video/v2.5-turbo/pro/text-to-video` | Turbo pro |
1042
+ | `fal-ai/kling-video/o1/image-to-video` | O1 quality |
1043
+ | `fal-ai/kling-video/o1/video-to-video/edit` | Video editing |
1044
+ | `fal-ai/kling-video/lipsync/audio-to-video` | Lip sync |
1045
+ | `fal-ai/kling-video/video-to-audio` | Audio extraction |
1046
+
1047
+ **Sora 2 (OpenAI):**
1048
+ | Model | Description |
1049
+ |---|---|
1050
+ | `fal-ai/sora-2/text-to-video` | Text-to-video |
1051
+ | `fal-ai/sora-2/text-to-video/pro` | Pro quality |
1052
+ | `fal-ai/sora-2/image-to-video` | Image-to-video |
1053
+ | `fal-ai/sora-2/video-to-video/remix` | Video remixing |
1054
+
1055
+ **VEO 3 (Google):**
1056
+ | Model | Description |
1057
+ |---|---|
1058
+ | `fal-ai/veo3` | VEO 3 standard |
1059
+ | `fal-ai/veo3/fast` | Fast variant |
1060
+ | `fal-ai/veo3/image-to-video` | Image-to-video |
1061
+ | `fal-ai/veo3.1` | Latest version |
1062
+ | `fal-ai/veo3.1/reference-to-video` | Reference-guided |
1063
+ | `fal-ai/veo3.1/first-last-frame-to-video` | Frame interpolation |
1064
+
1065
+ **WAN (15+ variants):**
1066
+ `fal-ai/wan-pro/text-to-video`, `fal-ai/wan-pro/image-to-video`, `fal-ai/wan/v2.2-a14b/text-to-video`, `fal-ai/wan-vace-14b/depth`, `fal-ai/wan-vace-14b/inpainting`, `fal-ai/wan-vace-14b/pose`, `fal-ai/wan-effects`
1067
+
1068
+ **Pixverse (20+ variants):**
1069
+ `fal-ai/pixverse/v5.5/text-to-video`, `fal-ai/pixverse/v5.5/image-to-video`, `fal-ai/pixverse/v5.5/effects`, `fal-ai/pixverse/lipsync`, `fal-ai/pixverse/sound-effects`
1070
+
1071
+ **Minimax / Hailuo:**
1072
+ `fal-ai/minimax/hailuo-2.3/text-to-video/pro`, `fal-ai/minimax/hailuo-2.3/image-to-video/pro`, `fal-ai/minimax/video-01-director`, `fal-ai/minimax/video-01-live`
1073
+
1074
+ **Other Video Models:**
1075
+ | Provider | Models |
1076
+ |---|---|
1077
+ | Hunyuan | `fal-ai/hunyuan-video/text-to-video`, `image-to-video`, `video-to-video`, `foley` |
1078
+ | Pika | `fal-ai/pika/v2.2/text-to-video`, `pikascenes`, `pikaffects` |
1079
+ | LTX | `fal-ai/ltx-2/text-to-video`, `image-to-video`, `retake-video` |
1080
+ | Luma | `fal-ai/luma-dream-machine/ray-2`, `ray-2-flash`, `luma-photon` |
1081
+ | Vidu | `fal-ai/vidu/q2/text-to-video`, `image-to-video/pro` |
1082
+ | CogVideoX | `fal-ai/cogvideox-5b/text-to-video`, `video-to-video` |
1083
+ | Seedance | `fal-ai/bytedance/seedance/v1/text-to-video`, `image-to-video` |
1084
+ | Magi | `fal-ai/magi/text-to-video`, `extend-video` |
1085
+
1086
+ #### TTS / Speech Models (50+)
1087
+
1088
+ **Kokoro (9 languages, 20+ voices per language):**
1089
+ | Model | Language | Example Voices |
1090
+ |---|---|---|
1091
+ | `fal-ai/kokoro/american-english` | English (US) | af_heart, af_alloy, af_bella, af_nova, am_adam, am_echo, am_onyx |
1092
+ | `fal-ai/kokoro/british-english` | English (UK) | British voice set |
1093
+ | `fal-ai/kokoro/french` | French | French voice set |
1094
+ | `fal-ai/kokoro/japanese` | Japanese | Japanese voice set |
1095
+ | `fal-ai/kokoro/spanish` | Spanish | Spanish voice set |
1096
+ | `fal-ai/kokoro/mandarin-chinese` | Chinese | Mandarin voice set |
1097
+ | `fal-ai/kokoro/italian` | Italian | Italian voice set |
1098
+ | `fal-ai/kokoro/hindi` | Hindi | Hindi voice set |
1099
+ | `fal-ai/kokoro/brazilian-portuguese` | Portuguese | Portuguese voice set |
1100
+
1101
+ **ElevenLabs:**
1102
+ | Model | Description |
1103
+ |---|---|
1104
+ | `fal-ai/elevenlabs/tts/eleven-v3` | Professional quality |
1105
+ | `fal-ai/elevenlabs/tts/turbo-v2.5` | Faster inference |
1106
+ | `fal-ai/elevenlabs/tts/multilingual-v2` | Multi-language |
1107
+ | `fal-ai/elevenlabs/text-to-dialogue/eleven-v3` | Dialogue generation |
1108
+ | `fal-ai/elevenlabs/sound-effects/v2` | Sound effects |
1109
+ | `fal-ai/elevenlabs/speech-to-text` | Transcription |
1110
+ | `fal-ai/elevenlabs/audio-isolation` | Background removal |
1111
+
1112
+ **Other TTS:**
1113
+ `fal-ai/f5-tts` (voice cloning), `fal-ai/dia-tts`, `fal-ai/minimax/speech-2.6-turbo`, `fal-ai/minimax/speech-2.6-hd`, `fal-ai/chatterbox/text-to-speech`, `fal-ai/index-tts-2/text-to-speech`
1114
+
1115
+ #### FAL Client Capabilities
1116
+
1117
+ The `@fal-ai/client` provides additional features beyond what Noosphere surfaces:
1118
+
1119
+ - **Queue API** — Submit jobs, poll status, get results, cancel. Supports webhooks and priority levels
1120
+ - **Streaming API** — Real-time streaming responses via async iterators
1121
+ - **Realtime API** — WebSocket connections for interactive use (e.g., real-time image generation)
1122
+ - **Storage API** — File upload with configurable TTL (1h, 1d, 7d, 30d, 1y, never)
1123
+ - **Retry logic** — Configurable retries with exponential backoff and jitter
1124
+ - **Request middleware** — Custom request interceptors and proxy support
1125
+
1126
+ ---
1127
+
1128
+ ### Hugging Face — Open Source AI (30+ tasks)
1129
+
1130
+ **Provider ID:** `huggingface`
1131
+ **Modalities:** LLM, Image, TTS
1132
+ **Library:** `@huggingface/inference`
1133
+
1134
+ Access to the entire Hugging Face Hub ecosystem. Any model hosted on HuggingFace can be used by passing its ID directly.
1135
+
1136
+ #### Default Models
1137
+
1138
+ | Modality | Default Model | Description |
1139
+ |---|---|---|
1140
+ | LLM | `meta-llama/Llama-3.1-8B-Instruct` | Llama 3.1 8B |
1141
+ | Image | `stabilityai/stable-diffusion-xl-base-1.0` | SDXL Base |
1142
+ | TTS | `facebook/mms-tts-eng` | MMS TTS English |
1143
+
1144
+ Any HuggingFace model ID works — just pass it as the `model` parameter:
1145
+
1146
+ ```typescript
1147
+ await ai.chat({
1148
+ provider: 'huggingface',
1149
+ model: 'mistralai/Mixtral-8x7B-v0.1',
1150
+ messages: [{ role: 'user', content: 'Hello' }],
304
1151
  });
305
1152
  ```
306
1153
 
307
- ### Local Services
1154
+ #### Full Library Capabilities
1155
+
1156
+ The `@huggingface/inference` library (v3.15.0) provides 30+ AI tasks, including capabilities not yet surfaced by Noosphere:
1157
+
1158
+ **Natural Language Processing:**
1159
+ | Task | Method | Description |
1160
+ |---|---|---|
1161
+ | Chat | `chatCompletion()` | OpenAI-compatible chat completions |
1162
+ | Chat Streaming | `chatCompletionStream()` | Token-by-token streaming |
1163
+ | Text Generation | `textGeneration()` | Raw text completion |
1164
+ | Summarization | `summarization()` | Text summarization |
1165
+ | Translation | `translation()` | Language translation |
1166
+ | Question Answering | `questionAnswering()` | Extract answers from context |
1167
+ | Text Classification | `textClassification()` | Sentiment, topic classification |
1168
+ | Zero-Shot Classification | `zeroShotClassification()` | Classify without training |
1169
+ | Token Classification | `tokenClassification()` | NER, POS tagging |
1170
+ | Sentence Similarity | `sentenceSimilarity()` | Semantic similarity scores |
1171
+ | Feature Extraction | `featureExtraction()` | Text embeddings |
1172
+ | Fill Mask | `fillMask()` | Fill in masked tokens |
1173
+ | Table QA | `tableQuestionAnswering()` | Answer questions about tables |
1174
+
1175
+ **Computer Vision:**
1176
+ | Task | Method | Description |
1177
+ |---|---|---|
1178
+ | Text-to-Image | `textToImage()` | Generate images from text |
1179
+ | Image-to-Image | `imageToImage()` | Transform/edit images |
1180
+ | Image Captioning | `imageToText()` | Describe images |
1181
+ | Classification | `imageClassification()` | Classify image content |
1182
+ | Object Detection | `objectDetection()` | Detect and locate objects |
1183
+ | Segmentation | `imageSegmentation()` | Pixel-level segmentation |
1184
+ | Zero-Shot Image | `zeroShotImageClassification()` | Classify without training |
1185
+ | Text-to-Video | `textToVideo()` | Generate videos |
1186
+
1187
+ **Audio:**
1188
+ | Task | Method | Description |
1189
+ |---|---|---|
1190
+ | Text-to-Speech | `textToSpeech()` | Generate speech |
1191
+ | Speech-to-Text | `automaticSpeechRecognition()` | Transcription |
1192
+ | Audio Classification | `audioClassification()` | Classify sounds |
1193
+ | Audio-to-Audio | `audioToAudio()` | Source separation, enhancement |
1194
+
1195
+ **Multimodal:**
1196
+ | Task | Method | Description |
1197
+ |---|---|---|
1198
+ | Visual QA | `visualQuestionAnswering()` | Answer questions about images |
1199
+ | Document QA | `documentQuestionAnswering()` | Answer questions about documents |
1200
+
1201
+ **Tabular:**
1202
+ | Task | Method | Description |
1203
+ |---|---|---|
1204
+ | Classification | `tabularClassification()` | Classify tabular data |
1205
+ | Regression | `tabularRegression()` | Predict continuous values |
1206
+
1207
+ #### HuggingFace Agentic Features
1208
+
1209
+ - **Tool/Function Calling:** Full support via `tools` parameter with `tool_choice` control (auto/none/required)
1210
+ - **JSON Schema Responses:** `response_format: { type: 'json_schema', json_schema: {...} }`
1211
+ - **Reasoning:** `reasoning_effort` parameter (none/minimal/low/medium/high/xhigh)
1212
+ - **Multimodal Input:** Images via `image_url` content chunks in chat messages
1213
+ - **17 Inference Providers:** Route through Groq, Together, Fireworks, Replicate, Cerebras, Cohere, and more
1214
+
1215
+ ---
1216
+
1217
+ ### ComfyUI — Local Image Generation
1218
+
1219
+ **Provider ID:** `comfyui`
1220
+ **Modalities:** Image, Video (planned)
1221
+ **Type:** Local
1222
+ **Default Port:** 8188
308
1223
 
309
- Noosphere auto-detects local AI services on startup. Configure via constructor or environment variables:
1224
+ Connects to a local ComfyUI instance for Stable Diffusion workflows.
1225
+
1226
+ #### How It Works
1227
+
1228
+ 1. Clones a built-in txt2img workflow template (KSampler + SDXL pipeline)
1229
+ 2. Injects your parameters (prompt, dimensions, seed, steps, guidance)
1230
+ 3. POSTs the workflow to ComfyUI's `/prompt` endpoint
1231
+ 4. Polls `/history/{promptId}` every second until completion (max 5 minutes)
1232
+ 5. Fetches the generated image from `/view`
1233
+ 6. Returns a PNG buffer
1234
+
1235
+ #### Configuration
310
1236
 
311
1237
  ```typescript
312
1238
  const ai = new Noosphere({
313
- autoDetectLocal: true, // default
314
1239
  local: {
315
- comfyui: { enabled: true, host: 'http://localhost', port: 8188 },
316
- piper: { enabled: true, host: 'http://localhost', port: 5500 },
317
- kokoro: { enabled: true, host: 'http://localhost', port: 5501 },
1240
+ comfyui: {
1241
+ enabled: true,
1242
+ host: 'http://localhost',
1243
+ port: 8188,
1244
+ },
318
1245
  },
319
1246
  });
320
1247
  ```
321
1248
 
322
- Environment variables: `COMFYUI_HOST`, `COMFYUI_PORT`, `PIPER_HOST`, `PIPER_PORT`, `KOKORO_HOST`, `KOKORO_PORT`, `NOOSPHERE_AUTO_DETECT_LOCAL`
1249
+ #### Default Workflow
1250
+
1251
+ - **Checkpoint:** `sd_xl_base_1.0.safetensors`
1252
+ - **Sampler:** euler with normal scheduler
1253
+ - **Default Steps:** 20
1254
+ - **Default CFG/Guidance:** 7
1255
+ - **Default Size:** 1024x1024
1256
+ - **Max Size:** 2048x2048
1257
+ - **Output:** PNG
1258
+
1259
+ #### Models Exposed
1260
+
1261
+ | Model ID | Modality | Description |
1262
+ |---|---|---|
1263
+ | `comfyui-txt2img` | Image | Text-to-image via workflow |
1264
+ | `comfyui-txt2vid` | Video | Planned (requires AnimateDiff workflow) |
1265
+
1266
+ ---
1267
+
1268
+ ### Local TTS — Piper & Kokoro
1269
+
1270
+ **Provider IDs:** `piper`, `kokoro`
1271
+ **Modality:** TTS
1272
+ **Type:** Local
1273
+
1274
+ Connects to local OpenAI-compatible TTS servers.
1275
+
1276
+ #### Supported Engines
1277
+
1278
+ | Engine | Default Port | Health Check | Voice Discovery |
1279
+ |---|---|---|---|
1280
+ | Piper | 5500 | `GET /health` | `GET /voices` |
1281
+ | Kokoro | 5501 | `GET /health` | `GET /v1/models` (fallback) |
1282
+
1283
+ #### API
1284
+
1285
+ Uses the OpenAI-compatible TTS endpoint:
1286
+
1287
+ ```
1288
+ POST /v1/audio/speech
1289
+ {
1290
+ "model": "tts-1",
1291
+ "input": "Hello world",
1292
+ "voice": "default",
1293
+ "speed": 1.0,
1294
+ "response_format": "mp3"
1295
+ }
1296
+ ```
1297
+
1298
+ Supports `mp3`, `wav`, and `ogg` formats. Returns audio as a Buffer.
1299
+
1300
+ ---
1301
+
1302
+ ## Architecture
1303
+
1304
+ ### Provider Resolution (Local-First)
1305
+
1306
+ When you call a generation method without specifying a provider, Noosphere resolves one automatically:
1307
+
1308
+ 1. If `model` is specified without `provider` → looks up model in registry cache
1309
+ 2. If a `default` is configured for the modality → uses that
1310
+ 3. Otherwise → **local providers first**, then cloud providers
1311
+
1312
+ ```
1313
+ resolveProvider(modality):
1314
+ 1. Check user-specified provider ID → return if found
1315
+ 2. Check configured defaults → return if found
1316
+ 3. Scan all providers:
1317
+ → Return first LOCAL provider supporting this modality
1318
+ → Fallback to first CLOUD provider
1319
+ 4. Throw NO_PROVIDER error
1320
+ ```
1321
+
1322
+ ### Retry & Failover Logic
1323
+
1324
+ ```
1325
+ executeWithRetry(modality, provider, fn):
1326
+ for attempt = 0..maxRetries:
1327
+ try: return fn()
1328
+ catch:
1329
+ if error is retryable AND attempts remain:
1330
+ wait backoffMs * 2^attempt (exponential backoff)
1331
+ retry same provider
1332
+ if error is NOT GENERATION_FAILED AND failover enabled:
1333
+ try each alternative provider for this modality
1334
+ throw last error
1335
+ ```
1336
+
1337
+ **Retryable errors (same provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT`, `GENERATION_FAILED`
1338
+
1339
+ **Failover-eligible errors (cross-provider):** `PROVIDER_UNAVAILABLE`, `RATE_LIMITED`, `TIMEOUT` (NOT `GENERATION_FAILED`)
1340
+
1341
+ ### Model Registry & Caching
1342
+
1343
+ - Models are fetched from providers via `listModels()` and cached in memory
1344
+ - Cache TTL is configurable (default: 60 minutes)
1345
+ - `syncModels()` forces a refresh of all provider model lists
1346
+ - Registry tracks model → provider mappings for fast resolution
1347
+
1348
+ ### Usage Tracking
1349
+
1350
+ Every API call (success or failure) records a `UsageEvent`:
1351
+
1352
+ ```typescript
1353
+ interface UsageEvent {
1354
+ modality: 'llm' | 'image' | 'video' | 'tts';
1355
+ provider: string;
1356
+ model: string;
1357
+ cost: number; // USD
1358
+ latencyMs: number;
1359
+ input?: number; // tokens or characters
1360
+ output?: number; // tokens
1361
+ unit?: string;
1362
+ timestamp: string; // ISO 8601
1363
+ success: boolean;
1364
+ error?: string; // error message if failed
1365
+ metadata?: Record<string, unknown>;
1366
+ }
1367
+ ```
1368
+
1369
+ ---
323
1370
 
324
- ### Cleanup
1371
+ ## Error Handling
1372
+
1373
+ All errors are instances of `NoosphereError`:
325
1374
 
326
1375
  ```typescript
327
- await ai.dispose();
1376
+ import { NoosphereError } from 'noosphere';
1377
+
1378
+ try {
1379
+ await ai.chat({ messages: [{ role: 'user', content: 'Hello' }] });
1380
+ } catch (err) {
1381
+ if (err instanceof NoosphereError) {
1382
+ console.log(err.code); // error code
1383
+ console.log(err.provider); // which provider failed
1384
+ console.log(err.modality); // which modality
1385
+ console.log(err.model); // which model (if known)
1386
+ console.log(err.cause); // underlying error
1387
+ console.log(err.isRetryable()); // whether retry might help
1388
+ }
1389
+ }
328
1390
  ```
329
1391
 
330
- ## Providers
1392
+ ### Error Codes
331
1393
 
332
- | Provider | Modalities | Type |
333
- |---|---|---|
334
- | Pi-AI (OpenAI, Anthropic, Google, Groq, Mistral, xAI, OpenRouter) | LLM | Cloud |
335
- | FAL | Image, Video, TTS | Cloud |
336
- | Hugging Face | LLM, Image, TTS | Cloud |
337
- | ComfyUI | Image, Video | Local |
338
- | Piper / Kokoro | TTS | Local |
1394
+ | Code | Description | Retryable | Failover |
1395
+ |---|---|---|---|
1396
+ | `PROVIDER_UNAVAILABLE` | Provider is down or unreachable | Yes | Yes |
1397
+ | `RATE_LIMITED` | API rate limit exceeded | Yes | Yes |
1398
+ | `TIMEOUT` | Request exceeded timeout | Yes | Yes |
1399
+ | `GENERATION_FAILED` | Generation error (bad prompt, model issue) | Yes | No |
1400
+ | `AUTH_FAILED` | Invalid or missing API key | No | No |
1401
+ | `MODEL_NOT_FOUND` | Requested model doesn't exist | No | No |
1402
+ | `INVALID_INPUT` | Bad parameters or unsupported operation | No | No |
1403
+ | `NO_PROVIDER` | No provider available for the requested modality | No | No |
1404
+
1405
+ ---
1406
+
1407
+ ## Custom Providers
1408
+
1409
+ Extend Noosphere with your own providers:
1410
+
1411
+ ```typescript
1412
+ import type { NoosphereProvider, ModelInfo, ChatOptions, NoosphereResult, Modality } from 'noosphere';
1413
+
1414
+ const myProvider: NoosphereProvider = {
1415
+ // Required properties
1416
+ id: 'my-provider',
1417
+ name: 'My Custom Provider',
1418
+ modalities: ['llm', 'image'] as Modality[],
1419
+ isLocal: false,
1420
+
1421
+ // Required methods
1422
+ async ping() { return true; },
1423
+ async listModels(modality?: Modality): Promise<ModelInfo[]> {
1424
+ return [{
1425
+ id: 'my-model',
1426
+ provider: 'my-provider',
1427
+ name: 'My Model',
1428
+ modality: 'llm',
1429
+ local: false,
1430
+ cost: { price: 1.0, unit: 'per_1m_tokens' },
1431
+ capabilities: {
1432
+ contextWindow: 128000,
1433
+ maxTokens: 4096,
1434
+ supportsVision: false,
1435
+ supportsStreaming: true,
1436
+ },
1437
+ }];
1438
+ },
1439
+
1440
+ // Optional methods — implement per modality
1441
+ async chat(options: ChatOptions): Promise<NoosphereResult> {
1442
+ const start = Date.now();
1443
+ // ... your implementation
1444
+ return {
1445
+ content: 'Response text',
1446
+ provider: 'my-provider',
1447
+ model: 'my-model',
1448
+ modality: 'llm',
1449
+ latencyMs: Date.now() - start,
1450
+ usage: { cost: 0.001, input: 100, output: 50, unit: 'tokens' },
1451
+ };
1452
+ },
1453
+
1454
+ // stream?(options): NoosphereStream
1455
+ // image?(options): Promise<NoosphereResult>
1456
+ // video?(options): Promise<NoosphereResult>
1457
+ // speak?(options): Promise<NoosphereResult>
1458
+ // dispose?(): Promise<void>
1459
+ };
1460
+
1461
+ ai.registerProvider(myProvider);
1462
+ ```
1463
+
1464
+ ---
1465
+
1466
+ ## Provider Summary
1467
+
1468
+ | Provider | ID | Modalities | Type | Models | Library |
1469
+ |---|---|---|---|---|---|
1470
+ | Pi-AI Gateway | `pi-ai` | LLM | Cloud | 246+ | `@mariozechner/pi-ai` |
1471
+ | FAL.ai | `fal` | Image, Video, TTS | Cloud | 867+ | `@fal-ai/client` |
1472
+ | Hugging Face | `huggingface` | LLM, Image, TTS | Cloud | Unlimited (any HF model) | `@huggingface/inference` |
1473
+ | ComfyUI | `comfyui` | Image | Local | SDXL workflows | Direct HTTP |
1474
+ | Piper TTS | `piper` | TTS | Local | Piper voices | Direct HTTP |
1475
+ | Kokoro TTS | `kokoro` | TTS | Local | Kokoro voices | Direct HTTP |
339
1476
 
340
1477
  ## Requirements
341
1478