universal-llm-client 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/CHANGELOG.md +139 -103
  2. package/LICENSE +21 -21
  3. package/README.md +591 -591
  4. package/dist/ai-model.js.map +1 -1
  5. package/dist/auditor.js.map +1 -1
  6. package/dist/client.js.map +1 -1
  7. package/dist/http.js.map +1 -1
  8. package/dist/index.d.ts +1 -1
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +1 -1
  11. package/dist/index.js.map +1 -1
  12. package/dist/interfaces.d.ts +20 -0
  13. package/dist/interfaces.d.ts.map +1 -1
  14. package/dist/interfaces.js.map +1 -1
  15. package/dist/mcp.js.map +1 -1
  16. package/dist/providers/anthropic.js.map +1 -1
  17. package/dist/providers/google.d.ts.map +1 -1
  18. package/dist/providers/google.js +2 -0
  19. package/dist/providers/google.js.map +1 -1
  20. package/dist/providers/index.js.map +1 -1
  21. package/dist/providers/ollama.js.map +1 -1
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/router.js.map +1 -1
  24. package/dist/stream-decoder.js.map +1 -1
  25. package/dist/structured-output.d.ts +24 -1
  26. package/dist/structured-output.d.ts.map +1 -1
  27. package/dist/structured-output.js +58 -5
  28. package/dist/structured-output.js.map +1 -1
  29. package/dist/tools.js.map +1 -1
  30. package/dist/zod-adapter.js.map +1 -1
  31. package/package.json +115 -116
  32. package/src/ai-model.ts +0 -350
  33. package/src/auditor.ts +0 -213
  34. package/src/client.ts +0 -402
  35. package/src/debug/debug-google-streaming.ts +0 -97
  36. package/src/debug/debug-tool-execution.ts +0 -86
  37. package/src/debug/test-lmstudio-tools.ts +0 -155
  38. package/src/demos/README.md +0 -47
  39. package/src/demos/basic/universal-llm-examples.ts +0 -161
  40. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  41. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  42. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  43. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  44. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  45. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  46. package/src/demos/model-alias-demo.ts +0 -0
  47. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  48. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  49. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  50. package/src/demos/tools/astrid-production-memory.ts +0 -558
  51. package/src/demos/tools/basic-translation-test.ts +0 -66
  52. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  53. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  54. package/src/demos/tools/clean-translation-test.ts +0 -119
  55. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  56. package/src/demos/tools/complete-rag-demo.ts +0 -369
  57. package/src/demos/tools/complete-tool-demo.ts +0 -132
  58. package/src/demos/tools/demo-tool-calling.ts +0 -124
  59. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  60. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  61. package/src/demos/tools/memory-integration-test.ts +0 -420
  62. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  63. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  64. package/src/demos/tools/production-tool-demo.ts +0 -245
  65. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  66. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  67. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  68. package/src/demos/tools/translation-integration-guide.ts +0 -346
  69. package/src/demos/tools/universal-memory-system.ts +0 -560
  70. package/src/http.ts +0 -247
  71. package/src/index.ts +0 -160
  72. package/src/interfaces.ts +0 -657
  73. package/src/mcp.ts +0 -345
  74. package/src/providers/anthropic.ts +0 -762
  75. package/src/providers/google.ts +0 -620
  76. package/src/providers/index.ts +0 -8
  77. package/src/providers/ollama.ts +0 -469
  78. package/src/providers/openai.ts +0 -392
  79. package/src/router.ts +0 -780
  80. package/src/stream-decoder.ts +0 -361
  81. package/src/structured-output.ts +0 -702
  82. package/src/test-scripts/test-advanced-tools.ts +0 -310
  83. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  84. package/src/test-scripts/test-google-streaming.ts +0 -63
  85. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  86. package/src/test-scripts/test-mcp-config.ts +0 -28
  87. package/src/test-scripts/test-mcp-connection.ts +0 -29
  88. package/src/test-scripts/test-system-message-positions.ts +0 -163
  89. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  90. package/src/test-scripts/test-tool-calling.ts +0 -231
  91. package/src/tests/ai-model.test.ts +0 -1614
  92. package/src/tests/auditor.test.ts +0 -224
  93. package/src/tests/http.test.ts +0 -200
  94. package/src/tests/interfaces.test.ts +0 -117
  95. package/src/tests/providers/google.test.ts +0 -660
  96. package/src/tests/providers/ollama.test.ts +0 -954
  97. package/src/tests/providers/openai.test.ts +0 -1122
  98. package/src/tests/router.test.ts +0 -254
  99. package/src/tests/stream-decoder.test.ts +0 -179
  100. package/src/tests/structured-output.test.ts +0 -1340
  101. package/src/tests/tools.test.ts +0 -175
  102. package/src/tools.ts +0 -246
  103. package/src/zod-adapter.ts +0 -72
package/README.md CHANGED
@@ -1,591 +1,591 @@
1
- # universal-llm-client
2
-
3
- A universal LLM client for JavaScript/TypeScript with **transparent provider failover**, streaming tool execution, pluggable reasoning strategies, and native observability.
4
-
5
- ```typescript
6
- import { AIModel } from 'universal-llm-client';
7
-
8
- const model = new AIModel({
9
- model: 'gemini-2.5-flash',
10
- providers: [
11
- { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
12
- { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY },
13
- { type: 'ollama' },
14
- ],
15
- });
16
-
17
- const response = await model.chat([
18
- { role: 'user', content: 'Hello!' },
19
- ]);
20
- ```
21
-
22
- > **One model, multiple backends.** If Google fails, it transparently fails over to OpenRouter, then to local Ollama. Your code never knows the difference.
23
-
24
- ---
25
-
26
- ## Features
27
-
28
- - 🔄 **Transparent Failover** — Priority-ordered provider chain with retries, health tracking, and cooldowns
29
- - 🛠️ **Tool Calling** — Register tools once, works across all providers. Autonomous multi-turn execution loop
30
- - 📋 **Structured Output** — Zod schema validation, JSON Schema support, streaming, and type-safe responses
31
- - 🌊 **Streaming** — First-class async generator streaming with pluggable decoder strategies
32
- - 🧠 **Reasoning** — Native `<think>` tag parsing, interleaved reasoning, and model thinking support
33
- - 🔍 **Observability** — Built-in auditor interface for logging, cost tracking, and behavioral analysis
34
- - 🌐 **Universal Runtime** — Node.js 22+, Bun, Deno, and modern browsers
35
- - 🤖 **MCP Native** — Bridge MCP servers to LLM tools with zero glue code
36
- - 📊 **Embeddings** — Single and batch embedding generation
37
-
38
- ## Supported Providers
39
-
40
- | Provider | Type | Notes |
41
- |---|---|---|
42
- | **Ollama** | `ollama` | Local or cloud models, NDJSON streaming, model pulling, vision/multimodal |
43
- | **OpenAI** | `openai` | GPT-4o, o3, etc. Also works with OpenRouter, Groq, LM Studio, vLLM |
44
- | **Google AI Studio** | `google` | Gemini models, system instructions, multimodal |
45
- | **Vertex AI** | `vertex` | Same as Google AI but with regional endpoints and Bearer tokens |
46
- | **LlamaCpp** | `llamacpp` | Local llama.cpp / llama-server instances |
47
-
48
- ---
49
-
50
- ## Installation
51
-
52
- ```bash
53
- bun add universal-llm-client
54
- # or
55
- npm install universal-llm-client
56
- ```
57
-
58
- **Optional**: For MCP integration:
59
- ```bash
60
- bun add @modelcontextprotocol/sdk
61
- ```
62
-
63
- ---
64
-
65
- ## Quick Start
66
-
67
- ### Basic Chat
68
-
69
- ```typescript
70
- import { AIModel } from 'universal-llm-client';
71
-
72
- const model = new AIModel({
73
- model: 'qwen3:4b',
74
- providers: [{ type: 'ollama' }],
75
- });
76
-
77
- const response = await model.chat([
78
- { role: 'system', content: 'You are a helpful assistant.' },
79
- { role: 'user', content: 'What is the capital of France?' },
80
- ]);
81
-
82
- console.log(response.message.content);
83
- // "The capital of France is Paris."
84
- ```
85
-
86
- ### Streaming
87
-
88
- ```typescript
89
- for await (const event of model.chatStream([
90
- { role: 'user', content: 'Write a haiku about code.' },
91
- ])) {
92
- if (event.type === 'text') {
93
- process.stdout.write(event.content);
94
- } else if (event.type === 'thinking') {
95
- // Model reasoning (when supported)
96
- console.log('[thinking]', event.content);
97
- }
98
- }
99
- ```
100
-
101
- ### Tool Calling
102
-
103
- ```typescript
104
- model.registerTool(
105
- 'get_weather',
106
- 'Get current weather for a location',
107
- {
108
- type: 'object',
109
- properties: {
110
- city: { type: 'string', description: 'City name' },
111
- },
112
- required: ['city'],
113
- },
114
- async (args) => {
115
- const { city } = args as { city: string };
116
- return { temperature: 22, condition: 'sunny', city };
117
- },
118
- );
119
-
120
- // Autonomous tool execution — the model calls tools and loops until done
121
- const response = await model.chatWithTools([
122
- { role: 'user', content: "What's the weather in Tokyo?" },
123
- ]);
124
-
125
- console.log(response.message.content);
126
- // "The weather in Tokyo is 22°C and sunny."
127
- console.log(response.toolTrace);
128
- // [{ name: 'get_weather', args: { city: 'Tokyo' }, result: {...}, duration: 5 }]
129
- ```
130
-
131
- ### Provider Failover
132
-
133
- ```typescript
134
- const model = new AIModel({
135
- model: 'gemini-2.5-flash',
136
- retries: 2, // retries per provider before failover
137
- timeout: 30000, // request timeout in ms
138
- providers: [
139
- { type: 'google', apiKey: process.env.GOOGLE_KEY, priority: 0 },
140
- { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY, priority: 1 },
141
- { type: 'ollama', url: 'http://localhost:11434', priority: 2 },
142
- ],
143
- });
144
-
145
- // If Google returns 500, retries twice, then seamlessly tries OpenRouter.
146
- // If OpenRouter also fails, falls back to local Ollama.
147
- // Your code sees a single response.
148
- const response = await model.chat([{ role: 'user', content: 'Hello' }]);
149
-
150
- // Check provider health at any time
151
- console.log(model.getProviderStatus());
152
- // [{ id: 'google-0', healthy: true }, { id: 'openai-1', healthy: true }, ...]
153
- ```
154
-
155
- ### Multimodal (Vision)
156
-
157
- ```typescript
158
- import { AIModel, multimodalMessage } from 'universal-llm-client';
159
-
160
- const model = new AIModel({
161
- model: 'gemini-2.5-flash',
162
- providers: [{ type: 'google', apiKey: process.env.GOOGLE_KEY }],
163
- });
164
-
165
- const response = await model.chat([
166
- multimodalMessage('What do you see in this image?', [
167
- 'https://example.com/photo.jpg',
168
- ]),
169
- ]);
170
- ```
171
-
172
- ### Embeddings
173
-
174
- ```typescript
175
- const embedModel = new AIModel({
176
- model: 'nomic-embed-text-v2-moe:latest',
177
- providers: [{ type: 'ollama' }],
178
- });
179
-
180
- const vector = await embedModel.embed('Hello world');
181
- // [0.006, 0.026, -0.009, ...]
182
-
183
- const vectors = await embedModel.embedArray(['Hello', 'World']);
184
- // [[0.006, ...], [0.012, ...]]
185
- ```
186
-
187
- ### Structured Output
188
-
189
- Get typed, validated JSON responses from any LLM using Zod schemas:
190
-
191
- ```typescript
192
- import { AIModel } from 'universal-llm-client';
193
- import { z } from 'zod';
194
-
195
- const model = new AIModel({
196
- model: 'gemini-2.5-flash',
197
- providers: [
198
- { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
199
- { type: 'ollama' },
200
- ],
201
- });
202
-
203
- // Define your schema
204
- const UserSchema = z.object({
205
- name: z.string(),
206
- age: z.number(),
207
- email: z.string().email(),
208
- interests: z.array(z.string()),
209
- });
210
-
211
- // Method 1: generateStructured (throws on validation failure)
212
- const user = await model.generateStructured(UserSchema, [
213
- { role: 'user', content: 'Generate a user profile for a software developer' },
214
- ]);
215
-
216
- console.log(user.name); // TypeScript knows this is string
217
- console.log(user.age); // TypeScript knows this is number
218
- console.log(user.email); // TypeScript knows this is string
219
- console.log(user.interests); // TypeScript knows this is string[]
220
- ```
221
-
222
- **Non-throwing variant:**
223
-
224
- ```typescript
225
- // Method 2: tryParseStructured (returns result object, never throws)
226
- const result = await model.tryParseStructured(UserSchema, messages);
227
-
228
- if (result.ok) {
229
- console.log('User:', result.value.name);
230
- } else {
231
- console.log('Error:', result.error.message);
232
- console.log('Raw LLM output:', result.rawOutput);
233
- }
234
- ```
235
-
236
- **Via chat options:**
237
-
238
- ```typescript
239
- // Method 3: chat with output parameter
240
- const response = await model.chat(messages, {
241
- output: { schema: UserSchema },
242
- });
243
-
244
- // response.structured is typed as { name: string, age: number, ... }
245
- if (response.structured) {
246
- console.log(response.structured.name);
247
- }
248
- ```
249
-
250
- **Streaming structured output:**
251
-
252
- ```typescript
253
- // Stream partial validated objects as JSON generates
254
- for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
255
- console.log('Partial:', partial);
256
- // Partial: { name: 'Alice' }
257
- // Partial: { name: 'Alice', age: 30 }
258
- // Partial: { name: 'Alice', age: 30, email: 'alice@example.com' }
259
- }
260
- ```
261
-
262
- **Raw JSON Schema (without Zod):**
263
-
264
- ```typescript
265
- const response = await model.chat(messages, {
266
- jsonSchema: {
267
- type: 'object',
268
- properties: {
269
- name: { type: 'string' },
270
- age: { type: 'number' },
271
- },
272
- required: ['name', 'age'],
273
- },
274
- name: 'Person', // Optional, used for LLM guidance
275
- });
276
- ```
277
-
278
- **Separate module import (tree-shaking):**
279
-
280
- ```typescript
281
- // Import only structured output types if you don't need the full client
282
- import {
283
- StructuredOutputError,
284
- type StructuredOutputResult,
285
- type StructuredOutputOptions,
286
- parseStructured,
287
- tryParseStructured,
288
- zodToJsonSchema,
289
- } from 'universal-llm-client/structured-output';
290
- ```
291
-
292
- **Vision with structured output:**
293
-
294
- ```typescript
295
- const ImageAnalysisSchema = z.object({
296
- objects: z.array(z.string()),
297
- scene: z.string(),
298
- mood: z.string(),
299
- });
300
-
301
- const response = await model.generateStructured(ImageAnalysisSchema, [
302
- multimodalMessage('Analyze this image', ['https://example.com/photo.jpg']),
303
- ]);
304
- ```
305
-
306
- **Provider compatibility:**
307
-
308
- | Provider | Method | Notes |
309
- |----------|--------|-------|
310
- | OpenAI | `response_format.json_schema` | Strict mode enabled |
311
- | Ollama | `format: { schema }` | Model must support grammar |
312
- | Google | `responseMimeType + responseSchema` | Some features stripped |
313
-
314
- ### Observability
315
-
316
- ```typescript
317
- import { AIModel, ConsoleAuditor, BufferedAuditor } from 'universal-llm-client';
318
-
319
- // Simple console logging
320
- const model = new AIModel({
321
- model: 'qwen3:4b',
322
- providers: [{ type: 'ollama' }],
323
- auditor: new ConsoleAuditor('[LLM]'),
324
- });
325
- // [LLM] REQUEST [ollama] (qwen3:4b) →
326
- // [LLM] RESPONSE [ollama] (qwen3:4b) 1200ms 68 tokens
327
-
328
- // Buffered for custom sinks (OpenTelemetry, DB, etc.)
329
- const auditor = new BufferedAuditor({
330
- maxBufferSize: 100,
331
- onFlush: async (events) => {
332
- await sendToOpenTelemetry(events);
333
- },
334
- });
335
- ```
336
-
337
- ### MCP Integration
338
-
339
- ```typescript
340
- import { AIModel, MCPToolBridge } from 'universal-llm-client';
341
-
342
- const model = new AIModel({
343
- model: 'qwen3:4b',
344
- providers: [{ type: 'ollama' }],
345
- });
346
-
347
- const mcp = new MCPToolBridge({
348
- servers: {
349
- filesystem: {
350
- command: 'npx',
351
- args: ['-y', '@modelcontextprotocol/server-filesystem', './'],
352
- },
353
- weather: {
354
- url: 'https://mcp.example.com/weather',
355
- },
356
- },
357
- });
358
-
359
- await mcp.connect();
360
- await mcp.registerTools(model);
361
-
362
- // MCP tools are now callable via chatWithTools
363
- const response = await model.chatWithTools([
364
- { role: 'user', content: 'List files in the current directory' },
365
- ]);
366
-
367
- await mcp.disconnect();
368
- ```
369
-
370
- ### Stream Decoders
371
-
372
- ```typescript
373
- import { AIModel, createDecoder } from 'universal-llm-client';
374
-
375
- // Passthrough — raw text, no parsing
376
- // Standard Chat — text + native reasoning + tool calls
377
- // Interleaved Reasoning — parses <think> and <progress> tags from text streams
378
-
379
- const decoder = createDecoder('interleaved-reasoning', (event) => {
380
- switch (event.type) {
381
- case 'text': console.log(event.content); break;
382
- case 'thinking': console.log('[think]', event.content); break;
383
- case 'progress': console.log('[progress]', event.content); break;
384
- case 'tool_call': console.log('[tool]', event.calls); break;
385
- }
386
- });
387
-
388
- decoder.push('<think>Let me analyze this</think>The answer is 42');
389
- decoder.flush();
390
-
391
- console.log(decoder.getCleanContent()); // "The answer is 42"
392
- console.log(decoder.getReasoning()); // "Let me analyze this"
393
- ```
394
-
395
- ---
396
-
397
- ## API Reference
398
-
399
- ### `AIModel`
400
-
401
- The universal client. One class, multiple backends.
402
-
403
- ```typescript
404
- new AIModel(config: AIModelConfig)
405
- ```
406
-
407
- **Config:**
408
-
409
- | Property | Type | Default | Description |
410
- |---|---|---|---|
411
- | `model` | `string` | — | Model name (e.g., `'gemini-2.5-flash'`) |
412
- | `providers` | `ProviderConfig[]` | — | Ordered list of provider backends |
413
- | `retries` | `number` | `2` | Retries per provider before failover |
414
- | `timeout` | `number` | `30000` | Request timeout in ms |
415
- | `auditor` | `Auditor` | `NoopAuditor` | Observability sink |
416
- | `thinking` | `boolean` | `false` | Enable model thinking/reasoning |
417
- | `debug` | `boolean` | `false` | Debug logging |
418
- | `defaultParameters` | `object` | — | Default parameters for all requests |
419
-
420
- **Provider Config:**
421
-
422
- | Property | Type | Description |
423
- |---|---|---|
424
- | `type` | `string` | `'ollama'`, `'openai'`, `'google'`, `'vertex'`, `'llamacpp'` |
425
- | `url` | `string` | Provider URL (has sensible defaults) |
426
- | `apiKey` | `string` | API key or Bearer token |
427
- | `priority` | `number` | Lower = tried first (defaults to array index) |
428
- | `model` | `string` | Override model name for this provider |
429
- | `region` | `string` | Vertex AI region (e.g., `'us-central1'`) |
430
- | `apiVersion` | `string` | API version (e.g., `'v1beta'`) |
431
-
432
- **Methods:**
433
-
434
- | Method | Returns | Description |
435
- |---|---|---|
436
- | `chat(messages, options?)` | `Promise<LLMChatResponse>` | Send chat request |
437
- | `chatWithTools(messages, options?)` | `Promise<LLMChatResponse>` | Chat with autonomous tool execution |
438
- | `chatStream(messages, options?)` | `AsyncGenerator<DecodedEvent>` | Stream chat response |
439
- | `generateStructured(schema, messages, options?)` | `Promise<T>` | Generate typed JSON validated against Zod schema |
440
- | `tryParseStructured(schema, messages, options?)` | `Promise<StructuredOutputResult<T>>` | Non-throwing variant returning result object |
441
- | `generateStructuredStream(schema, messages, options?)` | `AsyncGenerator<T, T>` | Stream partial validated objects as JSON generates |
442
- | `embed(text)` | `Promise<number[]>` | Generate single embedding |
443
- | `embedArray(texts)` | `Promise<number[][]>` | Generate batch embeddings |
444
- | `registerTool(name, desc, params, handler)` | `void` | Register a callable tool |
445
- | `registerTools(tools)` | `void` | Register multiple tools |
446
- | `getModels()` | `Promise<string[]>` | List available models |
447
- | `getModelInfo()` | `Promise<ModelMetadata>` | Get model metadata |
448
- | `getProviderStatus()` | `ProviderStatus[]` | Check provider health |
449
- | `setModel(name)` | `void` | Switch model at runtime |
450
- | `dispose()` | `Promise<void>` | Clean shutdown |
451
-
452
- ### Structured Output
453
-
454
- ```typescript
455
- import { z } from 'zod';
456
-
457
- // Define your schema
458
- const UserSchema = z.object({
459
- name: z.string(),
460
- age: z.number(),
461
- email: z.string().email(),
462
- });
463
-
464
- // Generate typed JSON
465
- const user = await model.generateStructured(UserSchema, messages);
466
- // TypeScript infers: { name: string; age: number; email: string }
467
-
468
- // Non-throwing variant
469
- const result = await model.tryParseStructured(UserSchema, messages);
470
- if (result.ok) {
471
- console.log(result.value.name); // Fully typed
472
- } else {
473
- console.log(result.error.message);
474
- }
475
-
476
- // Stream partial objects
477
- for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
478
- console.log(partial); // Partial validated objects
479
- }
480
- ```
481
-
482
- **Separate module import (tree-shaking):**
483
-
484
- ```typescript
485
- import {
486
- StructuredOutputError,
487
- type StructuredOutputResult,
488
- parseStructured,
489
- tryParseStructured,
490
- zodToJsonSchema,
491
- } from 'universal-llm-client/structured-output';
492
-
493
- // Use without importing the full client
494
- const schema = z.object({ name: z.string() });
495
- const jsonSchema = zodToJsonSchema(schema);
496
- ```
497
-
498
- ### `ToolBuilder` / `ToolExecutor`
499
-
500
- ```typescript
501
- import { ToolBuilder, ToolExecutor } from 'universal-llm-client';
502
-
503
- // Fluent builder
504
- const tool = new ToolBuilder('search')
505
- .description('Search the web')
506
- .addParameter('query', 'string', 'Search query', true)
507
- .addParameter('limit', 'number', 'Max results', false)
508
- .build();
509
-
510
- // Execution wrappers
511
- const safeHandler = ToolExecutor.compose(
512
- myHandler,
513
- h => ToolExecutor.withTimeout(h, 5000),
514
- h => ToolExecutor.safe(h),
515
- h => ToolExecutor.withValidation(h, ['query']),
516
- );
517
- ```
518
-
519
- ### Auditor Interface
520
-
521
- Implement custom observability by providing an `Auditor`:
522
-
523
- ```typescript
524
- interface Auditor {
525
- record(event: AuditEvent): void;
526
- flush?(): Promise<void>;
527
- }
528
- ```
529
-
530
- **Built-in implementations:**
531
- - `NoopAuditor` — Zero overhead (default)
532
- - `ConsoleAuditor` — Structured console logging
533
- - `BufferedAuditor` — Collects events for custom sinks
534
-
535
- ---
536
-
537
- ## Architecture
538
-
539
- ```
540
- universal-llm-client
541
- ├── AIModel ← Public API (the only class you import)
542
- ├── Router ← Internal failover engine
543
- ├── BaseLLMClient ← Abstract client with tool execution
544
- ├── Providers
545
- │ ├── OllamaClient
546
- │ ├── OpenAICompatibleClient (OpenAI, OpenRouter, Groq, LM Studio, vLLM, LlamaCpp)
547
- │ └── GoogleClient (AI Studio + Vertex AI)
548
- ├── StreamDecoder ← Pluggable reasoning strategies
549
- ├── Auditor ← Observability interface
550
- ├── MCPToolBridge ← MCP server integration
551
- └── HTTP Utilities ← Universal fetch-based transport
552
- ```
553
-
554
- ### Design Principles
555
-
556
- 1. **Single import** — `AIModel` is the only class users need
557
- 2. **Provider agnostic** — Same code works with any backend
558
- 3. **Transparent failover** — Health tracking and cooldowns happen behind the scenes
559
- 4. **Zero dependencies** — Core library depends only on native `fetch`
560
- 5. **Agent-ready** — Stateless, composable instances designed as foundation for agent frameworks
561
- 6. **Observable** — Every request, response, tool call, retry, and failover is auditable
562
-
563
- ---
564
-
565
- ## Runtime Support
566
-
567
- | Runtime | Version | Status |
568
- |---|---|---|
569
- | **Node.js** | 22+ | ✅ Full support |
570
- | **Bun** | 1.0+ | ✅ Full support |
571
- | **Deno** | 2.0+ | ✅ Full support |
572
- | **Browsers** | Modern | ✅ No stdio MCP, HTTP transport only |
573
-
574
- ---
575
-
576
- ## For Agent Framework Authors
577
-
578
- `AIModel` is designed as the transport layer for agentic systems:
579
-
580
- - **Stateless** — No conversation history stored. Your framework manages memory
581
- - **Composable** — Create separate instances for chat, embeddings, vision
582
- - **Tool tracing** — `chatWithTools()` returns full execution trace
583
- - **Context budget** — `getModelInfo()` exposes `contextLength`
584
- - **Auditor as system bus** — Inject custom sinks for cost tracking, behavioral scoring
585
- - **StreamDecoder as UI bridge** — Select decoder strategy per-call
586
-
587
- ---
588
-
589
- ## License
590
-
591
- MIT
1
+ # universal-llm-client
2
+
3
+ A universal LLM client for JavaScript/TypeScript with **transparent provider failover**, streaming tool execution, pluggable reasoning strategies, and native observability.
4
+
5
+ ```typescript
6
+ import { AIModel } from 'universal-llm-client';
7
+
8
+ const model = new AIModel({
9
+ model: 'gemini-2.5-flash',
10
+ providers: [
11
+ { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
12
+ { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY },
13
+ { type: 'ollama' },
14
+ ],
15
+ });
16
+
17
+ const response = await model.chat([
18
+ { role: 'user', content: 'Hello!' },
19
+ ]);
20
+ ```
21
+
22
+ > **One model, multiple backends.** If Google fails, it transparently fails over to OpenRouter, then to local Ollama. Your code never knows the difference.
23
+
24
+ ---
25
+
26
+ ## Features
27
+
28
+ - 🔄 **Transparent Failover** — Priority-ordered provider chain with retries, health tracking, and cooldowns
29
+ - 🛠️ **Tool Calling** — Register tools once, works across all providers. Autonomous multi-turn execution loop
30
+ - 📋 **Structured Output** — Zod schema validation, JSON Schema support, streaming, and type-safe responses
31
+ - 🌊 **Streaming** — First-class async generator streaming with pluggable decoder strategies
32
+ - 🧠 **Reasoning** — Native `<think>` tag parsing, interleaved reasoning, and model thinking support
33
+ - 🔍 **Observability** — Built-in auditor interface for logging, cost tracking, and behavioral analysis
34
+ - 🌐 **Universal Runtime** — Node.js 22+, Bun, Deno, and modern browsers
35
+ - 🤖 **MCP Native** — Bridge MCP servers to LLM tools with zero glue code
36
+ - 📊 **Embeddings** — Single and batch embedding generation
37
+
38
+ ## Supported Providers
39
+
40
+ | Provider | Type | Notes |
41
+ |---|---|---|
42
+ | **Ollama** | `ollama` | Local or cloud models, NDJSON streaming, model pulling, vision/multimodal |
43
+ | **OpenAI** | `openai` | GPT-4o, o3, etc. Also works with OpenRouter, Groq, LM Studio, vLLM |
44
+ | **Google AI Studio** | `google` | Gemini models, system instructions, multimodal |
45
+ | **Vertex AI** | `vertex` | Same as Google AI but with regional endpoints and Bearer tokens |
46
+ | **LlamaCpp** | `llamacpp` | Local llama.cpp / llama-server instances |
47
+
48
+ ---
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ bun add universal-llm-client
54
+ # or
55
+ npm install universal-llm-client
56
+ ```
57
+
58
+ **Optional**: For MCP integration:
59
+ ```bash
60
+ bun add @modelcontextprotocol/sdk
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Quick Start
66
+
67
+ ### Basic Chat
68
+
69
+ ```typescript
70
+ import { AIModel } from 'universal-llm-client';
71
+
72
+ const model = new AIModel({
73
+ model: 'qwen3:4b',
74
+ providers: [{ type: 'ollama' }],
75
+ });
76
+
77
+ const response = await model.chat([
78
+ { role: 'system', content: 'You are a helpful assistant.' },
79
+ { role: 'user', content: 'What is the capital of France?' },
80
+ ]);
81
+
82
+ console.log(response.message.content);
83
+ // "The capital of France is Paris."
84
+ ```
85
+
86
+ ### Streaming
87
+
88
+ ```typescript
89
+ for await (const event of model.chatStream([
90
+ { role: 'user', content: 'Write a haiku about code.' },
91
+ ])) {
92
+ if (event.type === 'text') {
93
+ process.stdout.write(event.content);
94
+ } else if (event.type === 'thinking') {
95
+ // Model reasoning (when supported)
96
+ console.log('[thinking]', event.content);
97
+ }
98
+ }
99
+ ```
100
+
101
+ ### Tool Calling
102
+
103
+ ```typescript
104
+ model.registerTool(
105
+ 'get_weather',
106
+ 'Get current weather for a location',
107
+ {
108
+ type: 'object',
109
+ properties: {
110
+ city: { type: 'string', description: 'City name' },
111
+ },
112
+ required: ['city'],
113
+ },
114
+ async (args) => {
115
+ const { city } = args as { city: string };
116
+ return { temperature: 22, condition: 'sunny', city };
117
+ },
118
+ );
119
+
120
+ // Autonomous tool execution — the model calls tools and loops until done
121
+ const response = await model.chatWithTools([
122
+ { role: 'user', content: "What's the weather in Tokyo?" },
123
+ ]);
124
+
125
+ console.log(response.message.content);
126
+ // "The weather in Tokyo is 22°C and sunny."
127
+ console.log(response.toolTrace);
128
+ // [{ name: 'get_weather', args: { city: 'Tokyo' }, result: {...}, duration: 5 }]
129
+ ```
130
+
131
+ ### Provider Failover
132
+
133
+ ```typescript
134
+ const model = new AIModel({
135
+ model: 'gemini-2.5-flash',
136
+ retries: 2, // retries per provider before failover
137
+ timeout: 30000, // request timeout in ms
138
+ providers: [
139
+ { type: 'google', apiKey: process.env.GOOGLE_KEY, priority: 0 },
140
+ { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY, priority: 1 },
141
+ { type: 'ollama', url: 'http://localhost:11434', priority: 2 },
142
+ ],
143
+ });
144
+
145
+ // If Google returns 500, retries twice, then seamlessly tries OpenRouter.
146
+ // If OpenRouter also fails, falls back to local Ollama.
147
+ // Your code sees a single response.
148
+ const response = await model.chat([{ role: 'user', content: 'Hello' }]);
149
+
150
+ // Check provider health at any time
151
+ console.log(model.getProviderStatus());
152
+ // [{ id: 'google-0', healthy: true }, { id: 'openai-1', healthy: true }, ...]
153
+ ```
154
+
155
+ ### Multimodal (Vision)
156
+
157
+ ```typescript
158
+ import { AIModel, multimodalMessage } from 'universal-llm-client';
159
+
160
+ const model = new AIModel({
161
+ model: 'gemini-2.5-flash',
162
+ providers: [{ type: 'google', apiKey: process.env.GOOGLE_KEY }],
163
+ });
164
+
165
+ const response = await model.chat([
166
+ multimodalMessage('What do you see in this image?', [
167
+ 'https://example.com/photo.jpg',
168
+ ]),
169
+ ]);
170
+ ```
171
+
172
+ ### Embeddings
173
+
174
+ ```typescript
175
+ const embedModel = new AIModel({
176
+ model: 'nomic-embed-text-v2-moe:latest',
177
+ providers: [{ type: 'ollama' }],
178
+ });
179
+
180
+ const vector = await embedModel.embed('Hello world');
181
+ // [0.006, 0.026, -0.009, ...]
182
+
183
+ const vectors = await embedModel.embedArray(['Hello', 'World']);
184
+ // [[0.006, ...], [0.012, ...]]
185
+ ```
186
+
187
+ ### Structured Output
188
+
189
+ Get typed, validated JSON responses from any LLM using Zod schemas:
190
+
191
+ ```typescript
192
+ import { AIModel } from 'universal-llm-client';
193
+ import { z } from 'zod';
194
+
195
+ const model = new AIModel({
196
+ model: 'gemini-2.5-flash',
197
+ providers: [
198
+ { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
199
+ { type: 'ollama' },
200
+ ],
201
+ });
202
+
203
+ // Define your schema
204
+ const UserSchema = z.object({
205
+ name: z.string(),
206
+ age: z.number(),
207
+ email: z.string().email(),
208
+ interests: z.array(z.string()),
209
+ });
210
+
211
+ // Method 1: generateStructured (throws on validation failure)
212
+ const user = await model.generateStructured(UserSchema, [
213
+ { role: 'user', content: 'Generate a user profile for a software developer' },
214
+ ]);
215
+
216
+ console.log(user.name); // TypeScript knows this is string
217
+ console.log(user.age); // TypeScript knows this is number
218
+ console.log(user.email); // TypeScript knows this is string
219
+ console.log(user.interests); // TypeScript knows this is string[]
220
+ ```
221
+
222
+ **Non-throwing variant:**
223
+
224
+ ```typescript
225
+ // Method 2: tryParseStructured (returns result object, never throws)
226
+ const result = await model.tryParseStructured(UserSchema, messages);
227
+
228
+ if (result.ok) {
229
+ console.log('User:', result.value.name);
230
+ } else {
231
+ console.log('Error:', result.error.message);
232
+ console.log('Raw LLM output:', result.rawOutput);
233
+ }
234
+ ```
235
+
236
+ **Via chat options:**
237
+
238
+ ```typescript
239
+ // Method 3: chat with output parameter
240
+ const response = await model.chat(messages, {
241
+ output: { schema: UserSchema },
242
+ });
243
+
244
+ // response.structured is typed as { name: string, age: number, ... }
245
+ if (response.structured) {
246
+ console.log(response.structured.name);
247
+ }
248
+ ```
249
+
250
+ **Streaming structured output:**
251
+
252
+ ```typescript
253
+ // Stream partial validated objects as JSON generates
254
+ for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
255
+ console.log('Partial:', partial);
256
+ // Partial: { name: 'Alice' }
257
+ // Partial: { name: 'Alice', age: 30 }
258
+ // Partial: { name: 'Alice', age: 30, email: 'alice@example.com' }
259
+ }
260
+ ```
261
+
262
+ **Raw JSON Schema (without Zod):**
263
+
264
+ ```typescript
265
+ const response = await model.chat(messages, {
266
+ jsonSchema: {
267
+ type: 'object',
268
+ properties: {
269
+ name: { type: 'string' },
270
+ age: { type: 'number' },
271
+ },
272
+ required: ['name', 'age'],
273
+ },
274
+ name: 'Person', // Optional, used for LLM guidance
275
+ });
276
+ ```
277
+
278
+ **Separate module import (tree-shaking):**
279
+
280
+ ```typescript
281
+ // Import only structured output types if you don't need the full client
282
+ import {
283
+ StructuredOutputError,
284
+ type StructuredOutputResult,
285
+ type StructuredOutputOptions,
286
+ parseStructured,
287
+ tryParseStructured,
288
+ zodToJsonSchema,
289
+ } from 'universal-llm-client/structured-output';
290
+ ```
291
+
292
+ **Vision with structured output:**
293
+
294
+ ```typescript
295
+ const ImageAnalysisSchema = z.object({
296
+ objects: z.array(z.string()),
297
+ scene: z.string(),
298
+ mood: z.string(),
299
+ });
300
+
301
+ const response = await model.generateStructured(ImageAnalysisSchema, [
302
+ multimodalMessage('Analyze this image', ['https://example.com/photo.jpg']),
303
+ ]);
304
+ ```
305
+
306
+ **Provider compatibility:**
307
+
308
+ | Provider | Method | Notes |
309
+ |----------|--------|-------|
310
+ | OpenAI | `response_format.json_schema` | Strict mode enabled |
311
+ | Ollama | `format: { schema }` | Model must support grammar |
312
+ | Google | `responseMimeType + responseSchema` | Some features stripped |
313
+
314
+ ### Observability
315
+
316
+ ```typescript
317
+ import { AIModel, ConsoleAuditor, BufferedAuditor } from 'universal-llm-client';
318
+
319
+ // Simple console logging
320
+ const model = new AIModel({
321
+ model: 'qwen3:4b',
322
+ providers: [{ type: 'ollama' }],
323
+ auditor: new ConsoleAuditor('[LLM]'),
324
+ });
325
+ // [LLM] REQUEST [ollama] (qwen3:4b) →
326
+ // [LLM] RESPONSE [ollama] (qwen3:4b) 1200ms 68 tokens
327
+
328
+ // Buffered for custom sinks (OpenTelemetry, DB, etc.)
329
+ const auditor = new BufferedAuditor({
330
+ maxBufferSize: 100,
331
+ onFlush: async (events) => {
332
+ await sendToOpenTelemetry(events);
333
+ },
334
+ });
335
+ ```
336
+
337
+ ### MCP Integration
338
+
339
+ ```typescript
340
+ import { AIModel, MCPToolBridge } from 'universal-llm-client';
341
+
342
+ const model = new AIModel({
343
+ model: 'qwen3:4b',
344
+ providers: [{ type: 'ollama' }],
345
+ });
346
+
347
+ const mcp = new MCPToolBridge({
348
+ servers: {
349
+ filesystem: {
350
+ command: 'npx',
351
+ args: ['-y', '@modelcontextprotocol/server-filesystem', './'],
352
+ },
353
+ weather: {
354
+ url: 'https://mcp.example.com/weather',
355
+ },
356
+ },
357
+ });
358
+
359
+ await mcp.connect();
360
+ await mcp.registerTools(model);
361
+
362
+ // MCP tools are now callable via chatWithTools
363
+ const response = await model.chatWithTools([
364
+ { role: 'user', content: 'List files in the current directory' },
365
+ ]);
366
+
367
+ await mcp.disconnect();
368
+ ```
369
+
370
+ ### Stream Decoders
371
+
372
+ ```typescript
373
+ import { AIModel, createDecoder } from 'universal-llm-client';
374
+
375
+ // Passthrough — raw text, no parsing
376
+ // Standard Chat — text + native reasoning + tool calls
377
+ // Interleaved Reasoning — parses <think> and <progress> tags from text streams
378
+
379
+ const decoder = createDecoder('interleaved-reasoning', (event) => {
380
+ switch (event.type) {
381
+ case 'text': console.log(event.content); break;
382
+ case 'thinking': console.log('[think]', event.content); break;
383
+ case 'progress': console.log('[progress]', event.content); break;
384
+ case 'tool_call': console.log('[tool]', event.calls); break;
385
+ }
386
+ });
387
+
388
+ decoder.push('<think>Let me analyze this</think>The answer is 42');
389
+ decoder.flush();
390
+
391
+ console.log(decoder.getCleanContent()); // "The answer is 42"
392
+ console.log(decoder.getReasoning()); // "Let me analyze this"
393
+ ```
394
+
395
+ ---
396
+
397
+ ## API Reference
398
+
399
+ ### `AIModel`
400
+
401
+ The universal client. One class, multiple backends.
402
+
403
+ ```typescript
404
+ new AIModel(config: AIModelConfig)
405
+ ```
406
+
407
+ **Config:**
408
+
409
+ | Property | Type | Default | Description |
410
+ |---|---|---|---|
411
+ | `model` | `string` | — | Model name (e.g., `'gemini-2.5-flash'`) |
412
+ | `providers` | `ProviderConfig[]` | — | Ordered list of provider backends |
413
+ | `retries` | `number` | `2` | Retries per provider before failover |
414
+ | `timeout` | `number` | `30000` | Request timeout in ms |
415
+ | `auditor` | `Auditor` | `NoopAuditor` | Observability sink |
416
+ | `thinking` | `boolean` | `false` | Enable model thinking/reasoning |
417
+ | `debug` | `boolean` | `false` | Debug logging |
418
+ | `defaultParameters` | `object` | — | Default parameters for all requests |
419
+
420
+ **Provider Config:**
421
+
422
+ | Property | Type | Description |
423
+ |---|---|---|
424
+ | `type` | `string` | `'ollama'`, `'openai'`, `'google'`, `'vertex'`, `'llamacpp'` |
425
+ | `url` | `string` | Provider URL (has sensible defaults) |
426
+ | `apiKey` | `string` | API key or Bearer token |
427
+ | `priority` | `number` | Lower = tried first (defaults to array index) |
428
+ | `model` | `string` | Override model name for this provider |
429
+ | `region` | `string` | Vertex AI region (e.g., `'us-central1'`) |
430
+ | `apiVersion` | `string` | API version (e.g., `'v1beta'`) |
431
+
432
+ **Methods:**
433
+
434
+ | Method | Returns | Description |
435
+ |---|---|---|
436
+ | `chat(messages, options?)` | `Promise<LLMChatResponse>` | Send chat request |
437
+ | `chatWithTools(messages, options?)` | `Promise<LLMChatResponse>` | Chat with autonomous tool execution |
438
+ | `chatStream(messages, options?)` | `AsyncGenerator<DecodedEvent>` | Stream chat response |
439
+ | `generateStructured(schema, messages, options?)` | `Promise<T>` | Generate typed JSON validated against Zod schema |
440
+ | `tryParseStructured(schema, messages, options?)` | `Promise<StructuredOutputResult<T>>` | Non-throwing variant returning result object |
441
+ | `generateStructuredStream(schema, messages, options?)` | `AsyncGenerator<T, T>` | Stream partial validated objects as JSON generates |
442
+ | `embed(text)` | `Promise<number[]>` | Generate single embedding |
443
+ | `embedArray(texts)` | `Promise<number[][]>` | Generate batch embeddings |
444
+ | `registerTool(name, desc, params, handler)` | `void` | Register a callable tool |
445
+ | `registerTools(tools)` | `void` | Register multiple tools |
446
+ | `getModels()` | `Promise<string[]>` | List available models |
447
+ | `getModelInfo()` | `Promise<ModelMetadata>` | Get model metadata |
448
+ | `getProviderStatus()` | `ProviderStatus[]` | Check provider health |
449
+ | `setModel(name)` | `void` | Switch model at runtime |
450
+ | `dispose()` | `Promise<void>` | Clean shutdown |
451
+
452
+ ### Structured Output
453
+
454
+ ```typescript
455
+ import { z } from 'zod';
456
+
457
+ // Define your schema
458
+ const UserSchema = z.object({
459
+ name: z.string(),
460
+ age: z.number(),
461
+ email: z.string().email(),
462
+ });
463
+
464
+ // Generate typed JSON
465
+ const user = await model.generateStructured(UserSchema, messages);
466
+ // TypeScript infers: { name: string; age: number; email: string }
467
+
468
+ // Non-throwing variant
469
+ const result = await model.tryParseStructured(UserSchema, messages);
470
+ if (result.ok) {
471
+ console.log(result.value.name); // Fully typed
472
+ } else {
473
+ console.log(result.error.message);
474
+ }
475
+
476
+ // Stream partial objects
477
+ for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
478
+ console.log(partial); // Partial validated objects
479
+ }
480
+ ```
481
+
482
+ **Separate module import (tree-shaking):**
483
+
484
+ ```typescript
485
+ import {
486
+ StructuredOutputError,
487
+ type StructuredOutputResult,
488
+ parseStructured,
489
+ tryParseStructured,
490
+ zodToJsonSchema,
491
+ } from 'universal-llm-client/structured-output';
492
+
493
+ // Use without importing the full client
494
+ const schema = z.object({ name: z.string() });
495
+ const jsonSchema = zodToJsonSchema(schema);
496
+ ```
497
+
498
+ ### `ToolBuilder` / `ToolExecutor`
499
+
500
+ ```typescript
501
+ import { ToolBuilder, ToolExecutor } from 'universal-llm-client';
502
+
503
+ // Fluent builder
504
+ const tool = new ToolBuilder('search')
505
+ .description('Search the web')
506
+ .addParameter('query', 'string', 'Search query', true)
507
+ .addParameter('limit', 'number', 'Max results', false)
508
+ .build();
509
+
510
+ // Execution wrappers
511
+ const safeHandler = ToolExecutor.compose(
512
+ myHandler,
513
+ h => ToolExecutor.withTimeout(h, 5000),
514
+ h => ToolExecutor.safe(h),
515
+ h => ToolExecutor.withValidation(h, ['query']),
516
+ );
517
+ ```
518
+
519
+ ### Auditor Interface
520
+
521
+ Implement custom observability by providing an `Auditor`:
522
+
523
+ ```typescript
524
+ interface Auditor {
525
+ record(event: AuditEvent): void;
526
+ flush?(): Promise<void>;
527
+ }
528
+ ```
529
+
530
+ **Built-in implementations:**
531
+ - `NoopAuditor` — Zero overhead (default)
532
+ - `ConsoleAuditor` — Structured console logging
533
+ - `BufferedAuditor` — Collects events for custom sinks
534
+
535
+ ---
536
+
537
+ ## Architecture
538
+
539
+ ```
540
+ universal-llm-client
541
+ ├── AIModel ← Public API (the only class you import)
542
+ ├── Router ← Internal failover engine
543
+ ├── BaseLLMClient ← Abstract client with tool execution
544
+ ├── Providers
545
+ │ ├── OllamaClient
546
+ │ ├── OpenAICompatibleClient (OpenAI, OpenRouter, Groq, LM Studio, vLLM, LlamaCpp)
547
+ │ └── GoogleClient (AI Studio + Vertex AI)
548
+ ├── StreamDecoder ← Pluggable reasoning strategies
549
+ ├── Auditor ← Observability interface
550
+ ├── MCPToolBridge ← MCP server integration
551
+ └── HTTP Utilities ← Universal fetch-based transport
552
+ ```
553
+
554
+ ### Design Principles
555
+
556
+ 1. **Single import** — `AIModel` is the only class users need
557
+ 2. **Provider agnostic** — Same code works with any backend
558
+ 3. **Transparent failover** — Health tracking and cooldowns happen behind the scenes
559
+ 4. **Zero dependencies** — Core library depends only on native `fetch`
560
+ 5. **Agent-ready** — Stateless, composable instances designed as foundation for agent frameworks
561
+ 6. **Observable** — Every request, response, tool call, retry, and failover is auditable
562
+
563
+ ---
564
+
565
+ ## Runtime Support
566
+
567
+ | Runtime | Version | Status |
568
+ |---|---|---|
569
+ | **Node.js** | 22+ | ✅ Full support |
570
+ | **Bun** | 1.0+ | ✅ Full support |
571
+ | **Deno** | 2.0+ | ✅ Full support |
572
+ | **Browsers** | Modern | ✅ No stdio MCP, HTTP transport only |
573
+
574
+ ---
575
+
576
+ ## For Agent Framework Authors
577
+
578
+ `AIModel` is designed as the transport layer for agentic systems:
579
+
580
+ - **Stateless** — No conversation history stored. Your framework manages memory
581
+ - **Composable** — Create separate instances for chat, embeddings, vision
582
+ - **Tool tracing** — `chatWithTools()` returns full execution trace
583
+ - **Context budget** — `getModelInfo()` exposes `contextLength`
584
+ - **Auditor as system bus** — Inject custom sinks for cost tracking, behavioral scoring
585
+ - **StreamDecoder as UI bridge** — Select decoder strategy per-call
586
+
587
+ ---
588
+
589
+ ## License
590
+
591
+ MIT