universal-llm-client 4.2.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +139 -103
  2. package/LICENSE +21 -21
  3. package/README.md +591 -591
  4. package/dist/ai-model.js.map +1 -1
  5. package/dist/auditor.js.map +1 -1
  6. package/dist/client.js.map +1 -1
  7. package/dist/http.js.map +1 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/interfaces.d.ts +20 -0
  10. package/dist/interfaces.d.ts.map +1 -1
  11. package/dist/interfaces.js.map +1 -1
  12. package/dist/mcp.js.map +1 -1
  13. package/dist/providers/anthropic.js.map +1 -1
  14. package/dist/providers/google.d.ts.map +1 -1
  15. package/dist/providers/google.js +2 -0
  16. package/dist/providers/google.js.map +1 -1
  17. package/dist/providers/index.js.map +1 -1
  18. package/dist/providers/ollama.js.map +1 -1
  19. package/dist/providers/openai.js.map +1 -1
  20. package/dist/router.js.map +1 -1
  21. package/dist/stream-decoder.js.map +1 -1
  22. package/dist/structured-output.js.map +1 -1
  23. package/dist/tools.js.map +1 -1
  24. package/dist/zod-adapter.js.map +1 -1
  25. package/package.json +115 -116
  26. package/src/ai-model.ts +0 -350
  27. package/src/auditor.ts +0 -213
  28. package/src/client.ts +0 -402
  29. package/src/debug/debug-google-streaming.ts +0 -97
  30. package/src/debug/debug-tool-execution.ts +0 -86
  31. package/src/debug/test-lmstudio-tools.ts +0 -155
  32. package/src/demos/README.md +0 -47
  33. package/src/demos/basic/universal-llm-examples.ts +0 -161
  34. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  35. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  36. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  37. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  38. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  39. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  40. package/src/demos/model-alias-demo.ts +0 -0
  41. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  42. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  43. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  44. package/src/demos/tools/astrid-production-memory.ts +0 -558
  45. package/src/demos/tools/basic-translation-test.ts +0 -66
  46. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  47. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  48. package/src/demos/tools/clean-translation-test.ts +0 -119
  49. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  50. package/src/demos/tools/complete-rag-demo.ts +0 -369
  51. package/src/demos/tools/complete-tool-demo.ts +0 -132
  52. package/src/demos/tools/demo-tool-calling.ts +0 -124
  53. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  54. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  55. package/src/demos/tools/memory-integration-test.ts +0 -420
  56. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  57. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  58. package/src/demos/tools/production-tool-demo.ts +0 -245
  59. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  60. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  61. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  62. package/src/demos/tools/translation-integration-guide.ts +0 -346
  63. package/src/demos/tools/universal-memory-system.ts +0 -560
  64. package/src/http.ts +0 -247
  65. package/src/index.ts +0 -161
  66. package/src/interfaces.ts +0 -657
  67. package/src/mcp.ts +0 -345
  68. package/src/providers/anthropic.ts +0 -762
  69. package/src/providers/google.ts +0 -620
  70. package/src/providers/index.ts +0 -8
  71. package/src/providers/ollama.ts +0 -469
  72. package/src/providers/openai.ts +0 -392
  73. package/src/router.ts +0 -780
  74. package/src/stream-decoder.ts +0 -361
  75. package/src/structured-output.ts +0 -759
  76. package/src/test-scripts/test-advanced-tools.ts +0 -310
  77. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  78. package/src/test-scripts/test-google-streaming.ts +0 -63
  79. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  80. package/src/test-scripts/test-mcp-config.ts +0 -28
  81. package/src/test-scripts/test-mcp-connection.ts +0 -29
  82. package/src/test-scripts/test-system-message-positions.ts +0 -163
  83. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  84. package/src/test-scripts/test-tool-calling.ts +0 -231
  85. package/src/tests/ai-model.test.ts +0 -1614
  86. package/src/tests/auditor.test.ts +0 -224
  87. package/src/tests/http.test.ts +0 -200
  88. package/src/tests/interfaces.test.ts +0 -117
  89. package/src/tests/providers/google.test.ts +0 -660
  90. package/src/tests/providers/ollama.test.ts +0 -954
  91. package/src/tests/providers/openai.test.ts +0 -1122
  92. package/src/tests/router.test.ts +0 -254
  93. package/src/tests/stream-decoder.test.ts +0 -179
  94. package/src/tests/structured-output.test.ts +0 -1450
  95. package/src/tests/tools.test.ts +0 -175
  96. package/src/tools.ts +0 -246
  97. package/src/zod-adapter.ts +0 -72
package/README.md CHANGED
@@ -1,591 +1,591 @@
1
- # universal-llm-client
2
-
3
- A universal LLM client for JavaScript/TypeScript with **transparent provider failover**, streaming tool execution, pluggable reasoning strategies, and native observability.
4
-
5
- ```typescript
6
- import { AIModel } from 'universal-llm-client';
7
-
8
- const model = new AIModel({
9
- model: 'gemini-2.5-flash',
10
- providers: [
11
- { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
12
- { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY },
13
- { type: 'ollama' },
14
- ],
15
- });
16
-
17
- const response = await model.chat([
18
- { role: 'user', content: 'Hello!' },
19
- ]);
20
- ```
21
-
22
- > **One model, multiple backends.** If Google fails, it transparently fails over to OpenRouter, then to local Ollama. Your code never knows the difference.
23
-
24
- ---
25
-
26
- ## Features
27
-
28
- - 🔄 **Transparent Failover** — Priority-ordered provider chain with retries, health tracking, and cooldowns
29
- - 🛠️ **Tool Calling** — Register tools once, works across all providers. Autonomous multi-turn execution loop
30
- - 📋 **Structured Output** — Zod schema validation, JSON Schema support, streaming, and type-safe responses
31
- - 🌊 **Streaming** — First-class async generator streaming with pluggable decoder strategies
32
- - 🧠 **Reasoning** — Native `<think>` tag parsing, interleaved reasoning, and model thinking support
33
- - 🔍 **Observability** — Built-in auditor interface for logging, cost tracking, and behavioral analysis
34
- - 🌐 **Universal Runtime** — Node.js 22+, Bun, Deno, and modern browsers
35
- - 🤖 **MCP Native** — Bridge MCP servers to LLM tools with zero glue code
36
- - 📊 **Embeddings** — Single and batch embedding generation
37
-
38
- ## Supported Providers
39
-
40
- | Provider | Type | Notes |
41
- |---|---|---|
42
- | **Ollama** | `ollama` | Local or cloud models, NDJSON streaming, model pulling, vision/multimodal |
43
- | **OpenAI** | `openai` | GPT-4o, o3, etc. Also works with OpenRouter, Groq, LM Studio, vLLM |
44
- | **Google AI Studio** | `google` | Gemini models, system instructions, multimodal |
45
- | **Vertex AI** | `vertex` | Same as Google AI but with regional endpoints and Bearer tokens |
46
- | **LlamaCpp** | `llamacpp` | Local llama.cpp / llama-server instances |
47
-
48
- ---
49
-
50
- ## Installation
51
-
52
- ```bash
53
- bun add universal-llm-client
54
- # or
55
- npm install universal-llm-client
56
- ```
57
-
58
- **Optional**: For MCP integration:
59
- ```bash
60
- bun add @modelcontextprotocol/sdk
61
- ```
62
-
63
- ---
64
-
65
- ## Quick Start
66
-
67
- ### Basic Chat
68
-
69
- ```typescript
70
- import { AIModel } from 'universal-llm-client';
71
-
72
- const model = new AIModel({
73
- model: 'qwen3:4b',
74
- providers: [{ type: 'ollama' }],
75
- });
76
-
77
- const response = await model.chat([
78
- { role: 'system', content: 'You are a helpful assistant.' },
79
- { role: 'user', content: 'What is the capital of France?' },
80
- ]);
81
-
82
- console.log(response.message.content);
83
- // "The capital of France is Paris."
84
- ```
85
-
86
- ### Streaming
87
-
88
- ```typescript
89
- for await (const event of model.chatStream([
90
- { role: 'user', content: 'Write a haiku about code.' },
91
- ])) {
92
- if (event.type === 'text') {
93
- process.stdout.write(event.content);
94
- } else if (event.type === 'thinking') {
95
- // Model reasoning (when supported)
96
- console.log('[thinking]', event.content);
97
- }
98
- }
99
- ```
100
-
101
- ### Tool Calling
102
-
103
- ```typescript
104
- model.registerTool(
105
- 'get_weather',
106
- 'Get current weather for a location',
107
- {
108
- type: 'object',
109
- properties: {
110
- city: { type: 'string', description: 'City name' },
111
- },
112
- required: ['city'],
113
- },
114
- async (args) => {
115
- const { city } = args as { city: string };
116
- return { temperature: 22, condition: 'sunny', city };
117
- },
118
- );
119
-
120
- // Autonomous tool execution — the model calls tools and loops until done
121
- const response = await model.chatWithTools([
122
- { role: 'user', content: "What's the weather in Tokyo?" },
123
- ]);
124
-
125
- console.log(response.message.content);
126
- // "The weather in Tokyo is 22°C and sunny."
127
- console.log(response.toolTrace);
128
- // [{ name: 'get_weather', args: { city: 'Tokyo' }, result: {...}, duration: 5 }]
129
- ```
130
-
131
- ### Provider Failover
132
-
133
- ```typescript
134
- const model = new AIModel({
135
- model: 'gemini-2.5-flash',
136
- retries: 2, // retries per provider before failover
137
- timeout: 30000, // request timeout in ms
138
- providers: [
139
- { type: 'google', apiKey: process.env.GOOGLE_KEY, priority: 0 },
140
- { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY, priority: 1 },
141
- { type: 'ollama', url: 'http://localhost:11434', priority: 2 },
142
- ],
143
- });
144
-
145
- // If Google returns 500, retries twice, then seamlessly tries OpenRouter.
146
- // If OpenRouter also fails, falls back to local Ollama.
147
- // Your code sees a single response.
148
- const response = await model.chat([{ role: 'user', content: 'Hello' }]);
149
-
150
- // Check provider health at any time
151
- console.log(model.getProviderStatus());
152
- // [{ id: 'google-0', healthy: true }, { id: 'openai-1', healthy: true }, ...]
153
- ```
154
-
155
- ### Multimodal (Vision)
156
-
157
- ```typescript
158
- import { AIModel, multimodalMessage } from 'universal-llm-client';
159
-
160
- const model = new AIModel({
161
- model: 'gemini-2.5-flash',
162
- providers: [{ type: 'google', apiKey: process.env.GOOGLE_KEY }],
163
- });
164
-
165
- const response = await model.chat([
166
- multimodalMessage('What do you see in this image?', [
167
- 'https://example.com/photo.jpg',
168
- ]),
169
- ]);
170
- ```
171
-
172
- ### Embeddings
173
-
174
- ```typescript
175
- const embedModel = new AIModel({
176
- model: 'nomic-embed-text-v2-moe:latest',
177
- providers: [{ type: 'ollama' }],
178
- });
179
-
180
- const vector = await embedModel.embed('Hello world');
181
- // [0.006, 0.026, -0.009, ...]
182
-
183
- const vectors = await embedModel.embedArray(['Hello', 'World']);
184
- // [[0.006, ...], [0.012, ...]]
185
- ```
186
-
187
- ### Structured Output
188
-
189
- Get typed, validated JSON responses from any LLM using Zod schemas:
190
-
191
- ```typescript
192
- import { AIModel } from 'universal-llm-client';
193
- import { z } from 'zod';
194
-
195
- const model = new AIModel({
196
- model: 'gemini-2.5-flash',
197
- providers: [
198
- { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
199
- { type: 'ollama' },
200
- ],
201
- });
202
-
203
- // Define your schema
204
- const UserSchema = z.object({
205
- name: z.string(),
206
- age: z.number(),
207
- email: z.string().email(),
208
- interests: z.array(z.string()),
209
- });
210
-
211
- // Method 1: generateStructured (throws on validation failure)
212
- const user = await model.generateStructured(UserSchema, [
213
- { role: 'user', content: 'Generate a user profile for a software developer' },
214
- ]);
215
-
216
- console.log(user.name); // TypeScript knows this is string
217
- console.log(user.age); // TypeScript knows this is number
218
- console.log(user.email); // TypeScript knows this is string
219
- console.log(user.interests); // TypeScript knows this is string[]
220
- ```
221
-
222
- **Non-throwing variant:**
223
-
224
- ```typescript
225
- // Method 2: tryParseStructured (returns result object, never throws)
226
- const result = await model.tryParseStructured(UserSchema, messages);
227
-
228
- if (result.ok) {
229
- console.log('User:', result.value.name);
230
- } else {
231
- console.log('Error:', result.error.message);
232
- console.log('Raw LLM output:', result.rawOutput);
233
- }
234
- ```
235
-
236
- **Via chat options:**
237
-
238
- ```typescript
239
- // Method 3: chat with output parameter
240
- const response = await model.chat(messages, {
241
- output: { schema: UserSchema },
242
- });
243
-
244
- // response.structured is typed as { name: string, age: number, ... }
245
- if (response.structured) {
246
- console.log(response.structured.name);
247
- }
248
- ```
249
-
250
- **Streaming structured output:**
251
-
252
- ```typescript
253
- // Stream partial validated objects as JSON generates
254
- for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
255
- console.log('Partial:', partial);
256
- // Partial: { name: 'Alice' }
257
- // Partial: { name: 'Alice', age: 30 }
258
- // Partial: { name: 'Alice', age: 30, email: 'alice@example.com' }
259
- }
260
- ```
261
-
262
- **Raw JSON Schema (without Zod):**
263
-
264
- ```typescript
265
- const response = await model.chat(messages, {
266
- jsonSchema: {
267
- type: 'object',
268
- properties: {
269
- name: { type: 'string' },
270
- age: { type: 'number' },
271
- },
272
- required: ['name', 'age'],
273
- },
274
- name: 'Person', // Optional, used for LLM guidance
275
- });
276
- ```
277
-
278
- **Separate module import (tree-shaking):**
279
-
280
- ```typescript
281
- // Import only structured output types if you don't need the full client
282
- import {
283
- StructuredOutputError,
284
- type StructuredOutputResult,
285
- type StructuredOutputOptions,
286
- parseStructured,
287
- tryParseStructured,
288
- zodToJsonSchema,
289
- } from 'universal-llm-client/structured-output';
290
- ```
291
-
292
- **Vision with structured output:**
293
-
294
- ```typescript
295
- const ImageAnalysisSchema = z.object({
296
- objects: z.array(z.string()),
297
- scene: z.string(),
298
- mood: z.string(),
299
- });
300
-
301
- const response = await model.generateStructured(ImageAnalysisSchema, [
302
- multimodalMessage('Analyze this image', ['https://example.com/photo.jpg']),
303
- ]);
304
- ```
305
-
306
- **Provider compatibility:**
307
-
308
- | Provider | Method | Notes |
309
- |----------|--------|-------|
310
- | OpenAI | `response_format.json_schema` | Strict mode enabled |
311
- | Ollama | `format: { schema }` | Model must support grammar |
312
- | Google | `responseMimeType + responseSchema` | Some features stripped |
313
-
314
- ### Observability
315
-
316
- ```typescript
317
- import { AIModel, ConsoleAuditor, BufferedAuditor } from 'universal-llm-client';
318
-
319
- // Simple console logging
320
- const model = new AIModel({
321
- model: 'qwen3:4b',
322
- providers: [{ type: 'ollama' }],
323
- auditor: new ConsoleAuditor('[LLM]'),
324
- });
325
- // [LLM] REQUEST [ollama] (qwen3:4b) →
326
- // [LLM] RESPONSE [ollama] (qwen3:4b) 1200ms 68 tokens
327
-
328
- // Buffered for custom sinks (OpenTelemetry, DB, etc.)
329
- const auditor = new BufferedAuditor({
330
- maxBufferSize: 100,
331
- onFlush: async (events) => {
332
- await sendToOpenTelemetry(events);
333
- },
334
- });
335
- ```
336
-
337
- ### MCP Integration
338
-
339
- ```typescript
340
- import { AIModel, MCPToolBridge } from 'universal-llm-client';
341
-
342
- const model = new AIModel({
343
- model: 'qwen3:4b',
344
- providers: [{ type: 'ollama' }],
345
- });
346
-
347
- const mcp = new MCPToolBridge({
348
- servers: {
349
- filesystem: {
350
- command: 'npx',
351
- args: ['-y', '@modelcontextprotocol/server-filesystem', './'],
352
- },
353
- weather: {
354
- url: 'https://mcp.example.com/weather',
355
- },
356
- },
357
- });
358
-
359
- await mcp.connect();
360
- await mcp.registerTools(model);
361
-
362
- // MCP tools are now callable via chatWithTools
363
- const response = await model.chatWithTools([
364
- { role: 'user', content: 'List files in the current directory' },
365
- ]);
366
-
367
- await mcp.disconnect();
368
- ```
369
-
370
- ### Stream Decoders
371
-
372
- ```typescript
373
- import { AIModel, createDecoder } from 'universal-llm-client';
374
-
375
- // Passthrough — raw text, no parsing
376
- // Standard Chat — text + native reasoning + tool calls
377
- // Interleaved Reasoning — parses <think> and <progress> tags from text streams
378
-
379
- const decoder = createDecoder('interleaved-reasoning', (event) => {
380
- switch (event.type) {
381
- case 'text': console.log(event.content); break;
382
- case 'thinking': console.log('[think]', event.content); break;
383
- case 'progress': console.log('[progress]', event.content); break;
384
- case 'tool_call': console.log('[tool]', event.calls); break;
385
- }
386
- });
387
-
388
- decoder.push('<think>Let me analyze this</think>The answer is 42');
389
- decoder.flush();
390
-
391
- console.log(decoder.getCleanContent()); // "The answer is 42"
392
- console.log(decoder.getReasoning()); // "Let me analyze this"
393
- ```
394
-
395
- ---
396
-
397
- ## API Reference
398
-
399
- ### `AIModel`
400
-
401
- The universal client. One class, multiple backends.
402
-
403
- ```typescript
404
- new AIModel(config: AIModelConfig)
405
- ```
406
-
407
- **Config:**
408
-
409
- | Property | Type | Default | Description |
410
- |---|---|---|---|
411
- | `model` | `string` | — | Model name (e.g., `'gemini-2.5-flash'`) |
412
- | `providers` | `ProviderConfig[]` | — | Ordered list of provider backends |
413
- | `retries` | `number` | `2` | Retries per provider before failover |
414
- | `timeout` | `number` | `30000` | Request timeout in ms |
415
- | `auditor` | `Auditor` | `NoopAuditor` | Observability sink |
416
- | `thinking` | `boolean` | `false` | Enable model thinking/reasoning |
417
- | `debug` | `boolean` | `false` | Debug logging |
418
- | `defaultParameters` | `object` | — | Default parameters for all requests |
419
-
420
- **Provider Config:**
421
-
422
- | Property | Type | Description |
423
- |---|---|---|
424
- | `type` | `string` | `'ollama'`, `'openai'`, `'google'`, `'vertex'`, `'llamacpp'` |
425
- | `url` | `string` | Provider URL (has sensible defaults) |
426
- | `apiKey` | `string` | API key or Bearer token |
427
- | `priority` | `number` | Lower = tried first (defaults to array index) |
428
- | `model` | `string` | Override model name for this provider |
429
- | `region` | `string` | Vertex AI region (e.g., `'us-central1'`) |
430
- | `apiVersion` | `string` | API version (e.g., `'v1beta'`) |
431
-
432
- **Methods:**
433
-
434
- | Method | Returns | Description |
435
- |---|---|---|
436
- | `chat(messages, options?)` | `Promise<LLMChatResponse>` | Send chat request |
437
- | `chatWithTools(messages, options?)` | `Promise<LLMChatResponse>` | Chat with autonomous tool execution |
438
- | `chatStream(messages, options?)` | `AsyncGenerator<DecodedEvent>` | Stream chat response |
439
- | `generateStructured(schema, messages, options?)` | `Promise<T>` | Generate typed JSON validated against Zod schema |
440
- | `tryParseStructured(schema, messages, options?)` | `Promise<StructuredOutputResult<T>>` | Non-throwing variant returning result object |
441
- | `generateStructuredStream(schema, messages, options?)` | `AsyncGenerator<T, T>` | Stream partial validated objects as JSON generates |
442
- | `embed(text)` | `Promise<number[]>` | Generate single embedding |
443
- | `embedArray(texts)` | `Promise<number[][]>` | Generate batch embeddings |
444
- | `registerTool(name, desc, params, handler)` | `void` | Register a callable tool |
445
- | `registerTools(tools)` | `void` | Register multiple tools |
446
- | `getModels()` | `Promise<string[]>` | List available models |
447
- | `getModelInfo()` | `Promise<ModelMetadata>` | Get model metadata |
448
- | `getProviderStatus()` | `ProviderStatus[]` | Check provider health |
449
- | `setModel(name)` | `void` | Switch model at runtime |
450
- | `dispose()` | `Promise<void>` | Clean shutdown |
451
-
452
- ### Structured Output
453
-
454
- ```typescript
455
- import { z } from 'zod';
456
-
457
- // Define your schema
458
- const UserSchema = z.object({
459
- name: z.string(),
460
- age: z.number(),
461
- email: z.string().email(),
462
- });
463
-
464
- // Generate typed JSON
465
- const user = await model.generateStructured(UserSchema, messages);
466
- // TypeScript infers: { name: string; age: number; email: string }
467
-
468
- // Non-throwing variant
469
- const result = await model.tryParseStructured(UserSchema, messages);
470
- if (result.ok) {
471
- console.log(result.value.name); // Fully typed
472
- } else {
473
- console.log(result.error.message);
474
- }
475
-
476
- // Stream partial objects
477
- for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
478
- console.log(partial); // Partial validated objects
479
- }
480
- ```
481
-
482
- **Separate module import (tree-shaking):**
483
-
484
- ```typescript
485
- import {
486
- StructuredOutputError,
487
- type StructuredOutputResult,
488
- parseStructured,
489
- tryParseStructured,
490
- zodToJsonSchema,
491
- } from 'universal-llm-client/structured-output';
492
-
493
- // Use without importing the full client
494
- const schema = z.object({ name: z.string() });
495
- const jsonSchema = zodToJsonSchema(schema);
496
- ```
497
-
498
- ### `ToolBuilder` / `ToolExecutor`
499
-
500
- ```typescript
501
- import { ToolBuilder, ToolExecutor } from 'universal-llm-client';
502
-
503
- // Fluent builder
504
- const tool = new ToolBuilder('search')
505
- .description('Search the web')
506
- .addParameter('query', 'string', 'Search query', true)
507
- .addParameter('limit', 'number', 'Max results', false)
508
- .build();
509
-
510
- // Execution wrappers
511
- const safeHandler = ToolExecutor.compose(
512
- myHandler,
513
- h => ToolExecutor.withTimeout(h, 5000),
514
- h => ToolExecutor.safe(h),
515
- h => ToolExecutor.withValidation(h, ['query']),
516
- );
517
- ```
518
-
519
- ### Auditor Interface
520
-
521
- Implement custom observability by providing an `Auditor`:
522
-
523
- ```typescript
524
- interface Auditor {
525
- record(event: AuditEvent): void;
526
- flush?(): Promise<void>;
527
- }
528
- ```
529
-
530
- **Built-in implementations:**
531
- - `NoopAuditor` — Zero overhead (default)
532
- - `ConsoleAuditor` — Structured console logging
533
- - `BufferedAuditor` — Collects events for custom sinks
534
-
535
- ---
536
-
537
- ## Architecture
538
-
539
- ```
540
- universal-llm-client
541
- ├── AIModel ← Public API (the only class you import)
542
- ├── Router ← Internal failover engine
543
- ├── BaseLLMClient ← Abstract client with tool execution
544
- ├── Providers
545
- │ ├── OllamaClient
546
- │ ├── OpenAICompatibleClient (OpenAI, OpenRouter, Groq, LM Studio, vLLM, LlamaCpp)
547
- │ └── GoogleClient (AI Studio + Vertex AI)
548
- ├── StreamDecoder ← Pluggable reasoning strategies
549
- ├── Auditor ← Observability interface
550
- ├── MCPToolBridge ← MCP server integration
551
- └── HTTP Utilities ← Universal fetch-based transport
552
- ```
553
-
554
- ### Design Principles
555
-
556
- 1. **Single import** — `AIModel` is the only class users need
557
- 2. **Provider agnostic** — Same code works with any backend
558
- 3. **Transparent failover** — Health tracking and cooldowns happen behind the scenes
559
- 4. **Zero dependencies** — Core library depends only on native `fetch`
560
- 5. **Agent-ready** — Stateless, composable instances designed as foundation for agent frameworks
561
- 6. **Observable** — Every request, response, tool call, retry, and failover is auditable
562
-
563
- ---
564
-
565
- ## Runtime Support
566
-
567
- | Runtime | Version | Status |
568
- |---|---|---|
569
- | **Node.js** | 22+ | ✅ Full support |
570
- | **Bun** | 1.0+ | ✅ Full support |
571
- | **Deno** | 2.0+ | ✅ Full support |
572
- | **Browsers** | Modern | ✅ No stdio MCP, HTTP transport only |
573
-
574
- ---
575
-
576
- ## For Agent Framework Authors
577
-
578
- `AIModel` is designed as the transport layer for agentic systems:
579
-
580
- - **Stateless** — No conversation history stored. Your framework manages memory
581
- - **Composable** — Create separate instances for chat, embeddings, vision
582
- - **Tool tracing** — `chatWithTools()` returns full execution trace
583
- - **Context budget** — `getModelInfo()` exposes `contextLength`
584
- - **Auditor as system bus** — Inject custom sinks for cost tracking, behavioral scoring
585
- - **StreamDecoder as UI bridge** — Select decoder strategy per-call
586
-
587
- ---
588
-
589
- ## License
590
-
591
- MIT
1
+ # universal-llm-client
2
+
3
+ A universal LLM client for JavaScript/TypeScript with **transparent provider failover**, streaming tool execution, pluggable reasoning strategies, and native observability.
4
+
5
+ ```typescript
6
+ import { AIModel } from 'universal-llm-client';
7
+
8
+ const model = new AIModel({
9
+ model: 'gemini-2.5-flash',
10
+ providers: [
11
+ { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
12
+ { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY },
13
+ { type: 'ollama' },
14
+ ],
15
+ });
16
+
17
+ const response = await model.chat([
18
+ { role: 'user', content: 'Hello!' },
19
+ ]);
20
+ ```
21
+
22
+ > **One model, multiple backends.** If Google fails, it transparently fails over to OpenRouter, then to local Ollama. Your code never knows the difference.
23
+
24
+ ---
25
+
26
+ ## Features
27
+
28
+ - 🔄 **Transparent Failover** — Priority-ordered provider chain with retries, health tracking, and cooldowns
29
+ - 🛠️ **Tool Calling** — Register tools once, works across all providers. Autonomous multi-turn execution loop
30
+ - 📋 **Structured Output** — Zod schema validation, JSON Schema support, streaming, and type-safe responses
31
+ - 🌊 **Streaming** — First-class async generator streaming with pluggable decoder strategies
32
+ - 🧠 **Reasoning** — Native `<think>` tag parsing, interleaved reasoning, and model thinking support
33
+ - 🔍 **Observability** — Built-in auditor interface for logging, cost tracking, and behavioral analysis
34
+ - 🌐 **Universal Runtime** — Node.js 22+, Bun, Deno, and modern browsers
35
+ - 🤖 **MCP Native** — Bridge MCP servers to LLM tools with zero glue code
36
+ - 📊 **Embeddings** — Single and batch embedding generation
37
+
38
+ ## Supported Providers
39
+
40
+ | Provider | Type | Notes |
41
+ |---|---|---|
42
+ | **Ollama** | `ollama` | Local or cloud models, NDJSON streaming, model pulling, vision/multimodal |
43
+ | **OpenAI** | `openai` | GPT-4o, o3, etc. Also works with OpenRouter, Groq, LM Studio, vLLM |
44
+ | **Google AI Studio** | `google` | Gemini models, system instructions, multimodal |
45
+ | **Vertex AI** | `vertex` | Same as Google AI but with regional endpoints and Bearer tokens |
46
+ | **LlamaCpp** | `llamacpp` | Local llama.cpp / llama-server instances |
47
+
48
+ ---
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ bun add universal-llm-client
54
+ # or
55
+ npm install universal-llm-client
56
+ ```
57
+
58
+ **Optional**: For MCP integration:
59
+ ```bash
60
+ bun add @modelcontextprotocol/sdk
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Quick Start
66
+
67
+ ### Basic Chat
68
+
69
+ ```typescript
70
+ import { AIModel } from 'universal-llm-client';
71
+
72
+ const model = new AIModel({
73
+ model: 'qwen3:4b',
74
+ providers: [{ type: 'ollama' }],
75
+ });
76
+
77
+ const response = await model.chat([
78
+ { role: 'system', content: 'You are a helpful assistant.' },
79
+ { role: 'user', content: 'What is the capital of France?' },
80
+ ]);
81
+
82
+ console.log(response.message.content);
83
+ // "The capital of France is Paris."
84
+ ```
85
+
86
+ ### Streaming
87
+
88
+ ```typescript
89
+ for await (const event of model.chatStream([
90
+ { role: 'user', content: 'Write a haiku about code.' },
91
+ ])) {
92
+ if (event.type === 'text') {
93
+ process.stdout.write(event.content);
94
+ } else if (event.type === 'thinking') {
95
+ // Model reasoning (when supported)
96
+ console.log('[thinking]', event.content);
97
+ }
98
+ }
99
+ ```
100
+
101
+ ### Tool Calling
102
+
103
+ ```typescript
104
+ model.registerTool(
105
+ 'get_weather',
106
+ 'Get current weather for a location',
107
+ {
108
+ type: 'object',
109
+ properties: {
110
+ city: { type: 'string', description: 'City name' },
111
+ },
112
+ required: ['city'],
113
+ },
114
+ async (args) => {
115
+ const { city } = args as { city: string };
116
+ return { temperature: 22, condition: 'sunny', city };
117
+ },
118
+ );
119
+
120
+ // Autonomous tool execution — the model calls tools and loops until done
121
+ const response = await model.chatWithTools([
122
+ { role: 'user', content: "What's the weather in Tokyo?" },
123
+ ]);
124
+
125
+ console.log(response.message.content);
126
+ // "The weather in Tokyo is 22°C and sunny."
127
+ console.log(response.toolTrace);
128
+ // [{ name: 'get_weather', args: { city: 'Tokyo' }, result: {...}, duration: 5 }]
129
+ ```
130
+
131
+ ### Provider Failover
132
+
133
+ ```typescript
134
+ const model = new AIModel({
135
+ model: 'gemini-2.5-flash',
136
+ retries: 2, // retries per provider before failover
137
+ timeout: 30000, // request timeout in ms
138
+ providers: [
139
+ { type: 'google', apiKey: process.env.GOOGLE_KEY, priority: 0 },
140
+ { type: 'openai', url: 'https://openrouter.ai/api', apiKey: process.env.OPENROUTER_KEY, priority: 1 },
141
+ { type: 'ollama', url: 'http://localhost:11434', priority: 2 },
142
+ ],
143
+ });
144
+
145
+ // If Google returns 500, retries twice, then seamlessly tries OpenRouter.
146
+ // If OpenRouter also fails, falls back to local Ollama.
147
+ // Your code sees a single response.
148
+ const response = await model.chat([{ role: 'user', content: 'Hello' }]);
149
+
150
+ // Check provider health at any time
151
+ console.log(model.getProviderStatus());
152
+ // [{ id: 'google-0', healthy: true }, { id: 'openai-1', healthy: true }, ...]
153
+ ```
154
+
155
+ ### Multimodal (Vision)
156
+
157
+ ```typescript
158
+ import { AIModel, multimodalMessage } from 'universal-llm-client';
159
+
160
+ const model = new AIModel({
161
+ model: 'gemini-2.5-flash',
162
+ providers: [{ type: 'google', apiKey: process.env.GOOGLE_KEY }],
163
+ });
164
+
165
+ const response = await model.chat([
166
+ multimodalMessage('What do you see in this image?', [
167
+ 'https://example.com/photo.jpg',
168
+ ]),
169
+ ]);
170
+ ```
171
+
172
+ ### Embeddings
173
+
174
+ ```typescript
175
+ const embedModel = new AIModel({
176
+ model: 'nomic-embed-text-v2-moe:latest',
177
+ providers: [{ type: 'ollama' }],
178
+ });
179
+
180
+ const vector = await embedModel.embed('Hello world');
181
+ // [0.006, 0.026, -0.009, ...]
182
+
183
+ const vectors = await embedModel.embedArray(['Hello', 'World']);
184
+ // [[0.006, ...], [0.012, ...]]
185
+ ```
186
+
187
+ ### Structured Output
188
+
189
+ Get typed, validated JSON responses from any LLM using Zod schemas:
190
+
191
+ ```typescript
192
+ import { AIModel } from 'universal-llm-client';
193
+ import { z } from 'zod';
194
+
195
+ const model = new AIModel({
196
+ model: 'gemini-2.5-flash',
197
+ providers: [
198
+ { type: 'google', apiKey: process.env.GOOGLE_API_KEY },
199
+ { type: 'ollama' },
200
+ ],
201
+ });
202
+
203
+ // Define your schema
204
+ const UserSchema = z.object({
205
+ name: z.string(),
206
+ age: z.number(),
207
+ email: z.string().email(),
208
+ interests: z.array(z.string()),
209
+ });
210
+
211
+ // Method 1: generateStructured (throws on validation failure)
212
+ const user = await model.generateStructured(UserSchema, [
213
+ { role: 'user', content: 'Generate a user profile for a software developer' },
214
+ ]);
215
+
216
+ console.log(user.name); // TypeScript knows this is string
217
+ console.log(user.age); // TypeScript knows this is number
218
+ console.log(user.email); // TypeScript knows this is string
219
+ console.log(user.interests); // TypeScript knows this is string[]
220
+ ```
221
+
222
+ **Non-throwing variant:**
223
+
224
+ ```typescript
225
+ // Method 2: tryParseStructured (returns result object, never throws)
226
+ const result = await model.tryParseStructured(UserSchema, messages);
227
+
228
+ if (result.ok) {
229
+ console.log('User:', result.value.name);
230
+ } else {
231
+ console.log('Error:', result.error.message);
232
+ console.log('Raw LLM output:', result.rawOutput);
233
+ }
234
+ ```
235
+
236
+ **Via chat options:**
237
+
238
+ ```typescript
239
+ // Method 3: chat with output parameter
240
+ const response = await model.chat(messages, {
241
+ output: { schema: UserSchema },
242
+ });
243
+
244
+ // response.structured is typed as { name: string, age: number, ... }
245
+ if (response.structured) {
246
+ console.log(response.structured.name);
247
+ }
248
+ ```
249
+
250
+ **Streaming structured output:**
251
+
252
+ ```typescript
253
+ // Stream partial validated objects as JSON generates
254
+ for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
255
+ console.log('Partial:', partial);
256
+ // Partial: { name: 'Alice' }
257
+ // Partial: { name: 'Alice', age: 30 }
258
+ // Partial: { name: 'Alice', age: 30, email: 'alice@example.com' }
259
+ }
260
+ ```
261
+
262
+ **Raw JSON Schema (without Zod):**
263
+
264
+ ```typescript
265
+ const response = await model.chat(messages, {
266
+ jsonSchema: {
267
+ type: 'object',
268
+ properties: {
269
+ name: { type: 'string' },
270
+ age: { type: 'number' },
271
+ },
272
+ required: ['name', 'age'],
273
+ },
274
+ name: 'Person', // Optional, used for LLM guidance
275
+ });
276
+ ```
277
+
278
+ **Separate module import (tree-shaking):**
279
+
280
+ ```typescript
281
+ // Import only structured output types if you don't need the full client
282
+ import {
283
+ StructuredOutputError,
284
+ type StructuredOutputResult,
285
+ type StructuredOutputOptions,
286
+ parseStructured,
287
+ tryParseStructured,
288
+ zodToJsonSchema,
289
+ } from 'universal-llm-client/structured-output';
290
+ ```
291
+
292
+ **Vision with structured output:**
293
+
294
+ ```typescript
295
+ const ImageAnalysisSchema = z.object({
296
+ objects: z.array(z.string()),
297
+ scene: z.string(),
298
+ mood: z.string(),
299
+ });
300
+
301
+ const response = await model.generateStructured(ImageAnalysisSchema, [
302
+ multimodalMessage('Analyze this image', ['https://example.com/photo.jpg']),
303
+ ]);
304
+ ```
305
+
306
+ **Provider compatibility:**
307
+
308
+ | Provider | Method | Notes |
309
+ |----------|--------|-------|
310
+ | OpenAI | `response_format.json_schema` | Strict mode enabled |
311
+ | Ollama | `format: { schema }` | Model must support grammar |
312
+ | Google | `responseMimeType + responseSchema` | Some features stripped |
313
+
314
+ ### Observability
315
+
316
+ ```typescript
317
+ import { AIModel, ConsoleAuditor, BufferedAuditor } from 'universal-llm-client';
318
+
319
+ // Simple console logging
320
+ const model = new AIModel({
321
+ model: 'qwen3:4b',
322
+ providers: [{ type: 'ollama' }],
323
+ auditor: new ConsoleAuditor('[LLM]'),
324
+ });
325
+ // [LLM] REQUEST [ollama] (qwen3:4b) →
326
+ // [LLM] RESPONSE [ollama] (qwen3:4b) 1200ms 68 tokens
327
+
328
+ // Buffered for custom sinks (OpenTelemetry, DB, etc.)
329
+ const auditor = new BufferedAuditor({
330
+ maxBufferSize: 100,
331
+ onFlush: async (events) => {
332
+ await sendToOpenTelemetry(events);
333
+ },
334
+ });
335
+ ```
336
+
337
+ ### MCP Integration
338
+
339
+ ```typescript
340
+ import { AIModel, MCPToolBridge } from 'universal-llm-client';
341
+
342
+ const model = new AIModel({
343
+ model: 'qwen3:4b',
344
+ providers: [{ type: 'ollama' }],
345
+ });
346
+
347
+ const mcp = new MCPToolBridge({
348
+ servers: {
349
+ filesystem: {
350
+ command: 'npx',
351
+ args: ['-y', '@modelcontextprotocol/server-filesystem', './'],
352
+ },
353
+ weather: {
354
+ url: 'https://mcp.example.com/weather',
355
+ },
356
+ },
357
+ });
358
+
359
+ await mcp.connect();
360
+ await mcp.registerTools(model);
361
+
362
+ // MCP tools are now callable via chatWithTools
363
+ const response = await model.chatWithTools([
364
+ { role: 'user', content: 'List files in the current directory' },
365
+ ]);
366
+
367
+ await mcp.disconnect();
368
+ ```
369
+
370
+ ### Stream Decoders
371
+
372
+ ```typescript
373
+ import { AIModel, createDecoder } from 'universal-llm-client';
374
+
375
+ // Passthrough — raw text, no parsing
376
+ // Standard Chat — text + native reasoning + tool calls
377
+ // Interleaved Reasoning — parses <think> and <progress> tags from text streams
378
+
379
+ const decoder = createDecoder('interleaved-reasoning', (event) => {
380
+ switch (event.type) {
381
+ case 'text': console.log(event.content); break;
382
+ case 'thinking': console.log('[think]', event.content); break;
383
+ case 'progress': console.log('[progress]', event.content); break;
384
+ case 'tool_call': console.log('[tool]', event.calls); break;
385
+ }
386
+ });
387
+
388
+ decoder.push('<think>Let me analyze this</think>The answer is 42');
389
+ decoder.flush();
390
+
391
+ console.log(decoder.getCleanContent()); // "The answer is 42"
392
+ console.log(decoder.getReasoning()); // "Let me analyze this"
393
+ ```
394
+
395
+ ---
396
+
397
+ ## API Reference
398
+
399
+ ### `AIModel`
400
+
401
+ The universal client. One class, multiple backends.
402
+
403
+ ```typescript
404
+ new AIModel(config: AIModelConfig)
405
+ ```
406
+
407
+ **Config:**
408
+
409
+ | Property | Type | Default | Description |
410
+ |---|---|---|---|
411
+ | `model` | `string` | — | Model name (e.g., `'gemini-2.5-flash'`) |
412
+ | `providers` | `ProviderConfig[]` | — | Ordered list of provider backends |
413
+ | `retries` | `number` | `2` | Retries per provider before failover |
414
+ | `timeout` | `number` | `30000` | Request timeout in ms |
415
+ | `auditor` | `Auditor` | `NoopAuditor` | Observability sink |
416
+ | `thinking` | `boolean` | `false` | Enable model thinking/reasoning |
417
+ | `debug` | `boolean` | `false` | Debug logging |
418
+ | `defaultParameters` | `object` | — | Default parameters for all requests |
419
+
420
+ **Provider Config:**
421
+
422
+ | Property | Type | Description |
423
+ |---|---|---|
424
+ | `type` | `string` | `'ollama'`, `'openai'`, `'google'`, `'vertex'`, `'llamacpp'` |
425
+ | `url` | `string` | Provider URL (has sensible defaults) |
426
+ | `apiKey` | `string` | API key or Bearer token |
427
+ | `priority` | `number` | Lower = tried first (defaults to array index) |
428
+ | `model` | `string` | Override model name for this provider |
429
+ | `region` | `string` | Vertex AI region (e.g., `'us-central1'`) |
430
+ | `apiVersion` | `string` | API version (e.g., `'v1beta'`) |
431
+
432
+ **Methods:**
433
+
434
+ | Method | Returns | Description |
435
+ |---|---|---|
436
+ | `chat(messages, options?)` | `Promise<LLMChatResponse>` | Send chat request |
437
+ | `chatWithTools(messages, options?)` | `Promise<LLMChatResponse>` | Chat with autonomous tool execution |
438
+ | `chatStream(messages, options?)` | `AsyncGenerator<DecodedEvent>` | Stream chat response |
439
+ | `generateStructured(schema, messages, options?)` | `Promise<T>` | Generate typed JSON validated against Zod schema |
440
+ | `tryParseStructured(schema, messages, options?)` | `Promise<StructuredOutputResult<T>>` | Non-throwing variant returning result object |
441
+ | `generateStructuredStream(schema, messages, options?)` | `AsyncGenerator<T, T>` | Stream partial validated objects as JSON generates |
442
+ | `embed(text)` | `Promise<number[]>` | Generate single embedding |
443
+ | `embedArray(texts)` | `Promise<number[][]>` | Generate batch embeddings |
444
+ | `registerTool(name, desc, params, handler)` | `void` | Register a callable tool |
445
+ | `registerTools(tools)` | `void` | Register multiple tools |
446
+ | `getModels()` | `Promise<string[]>` | List available models |
447
+ | `getModelInfo()` | `Promise<ModelMetadata>` | Get model metadata |
448
+ | `getProviderStatus()` | `ProviderStatus[]` | Check provider health |
449
+ | `setModel(name)` | `void` | Switch model at runtime |
450
+ | `dispose()` | `Promise<void>` | Clean shutdown |
451
+
452
+ ### Structured Output
453
+
454
+ ```typescript
455
+ import { z } from 'zod';
456
+
457
+ // Define your schema
458
+ const UserSchema = z.object({
459
+ name: z.string(),
460
+ age: z.number(),
461
+ email: z.string().email(),
462
+ });
463
+
464
+ // Generate typed JSON
465
+ const user = await model.generateStructured(UserSchema, messages);
466
+ // TypeScript infers: { name: string; age: number; email: string }
467
+
468
+ // Non-throwing variant
469
+ const result = await model.tryParseStructured(UserSchema, messages);
470
+ if (result.ok) {
471
+ console.log(result.value.name); // Fully typed
472
+ } else {
473
+ console.log(result.error.message);
474
+ }
475
+
476
+ // Stream partial objects
477
+ for await (const partial of model.generateStructuredStream(UserSchema, messages)) {
478
+ console.log(partial); // Partial validated objects
479
+ }
480
+ ```
481
+
482
+ **Separate module import (tree-shaking):**
483
+
484
+ ```typescript
485
+ import {
486
+ StructuredOutputError,
487
+ type StructuredOutputResult,
488
+ parseStructured,
489
+ tryParseStructured,
490
+ zodToJsonSchema,
491
+ } from 'universal-llm-client/structured-output';
492
+
493
+ // Use without importing the full client
494
+ const schema = z.object({ name: z.string() });
495
+ const jsonSchema = zodToJsonSchema(schema);
496
+ ```
497
+
498
+ ### `ToolBuilder` / `ToolExecutor`
499
+
500
+ ```typescript
501
+ import { ToolBuilder, ToolExecutor } from 'universal-llm-client';
502
+
503
+ // Fluent builder
504
+ const tool = new ToolBuilder('search')
505
+ .description('Search the web')
506
+ .addParameter('query', 'string', 'Search query', true)
507
+ .addParameter('limit', 'number', 'Max results', false)
508
+ .build();
509
+
510
+ // Execution wrappers
511
+ const safeHandler = ToolExecutor.compose(
512
+ myHandler,
513
+ h => ToolExecutor.withTimeout(h, 5000),
514
+ h => ToolExecutor.safe(h),
515
+ h => ToolExecutor.withValidation(h, ['query']),
516
+ );
517
+ ```
518
+
519
+ ### Auditor Interface
520
+
521
+ Implement custom observability by providing an `Auditor`:
522
+
523
+ ```typescript
524
+ interface Auditor {
525
+ record(event: AuditEvent): void;
526
+ flush?(): Promise<void>;
527
+ }
528
+ ```
529
+
530
+ **Built-in implementations:**
531
+ - `NoopAuditor` — Zero overhead (default)
532
+ - `ConsoleAuditor` — Structured console logging
533
+ - `BufferedAuditor` — Collects events for custom sinks
534
+
535
+ ---
536
+
537
+ ## Architecture
538
+
539
+ ```
540
+ universal-llm-client
541
+ ├── AIModel ← Public API (the only class you import)
542
+ ├── Router ← Internal failover engine
543
+ ├── BaseLLMClient ← Abstract client with tool execution
544
+ ├── Providers
545
+ │ ├── OllamaClient
546
+ │ ├── OpenAICompatibleClient (OpenAI, OpenRouter, Groq, LM Studio, vLLM, LlamaCpp)
547
+ │ └── GoogleClient (AI Studio + Vertex AI)
548
+ ├── StreamDecoder ← Pluggable reasoning strategies
549
+ ├── Auditor ← Observability interface
550
+ ├── MCPToolBridge ← MCP server integration
551
+ └── HTTP Utilities ← Universal fetch-based transport
552
+ ```
553
+
554
+ ### Design Principles
555
+
556
+ 1. **Single import** — `AIModel` is the only class users need
557
+ 2. **Provider agnostic** — Same code works with any backend
558
+ 3. **Transparent failover** — Health tracking and cooldowns happen behind the scenes
559
+ 4. **Zero dependencies** — Core library depends only on native `fetch`
560
+ 5. **Agent-ready** — Stateless, composable instances designed as foundation for agent frameworks
561
+ 6. **Observable** — Every request, response, tool call, retry, and failover is auditable
562
+
563
+ ---
564
+
565
+ ## Runtime Support
566
+
567
+ | Runtime | Version | Status |
568
+ |---|---|---|
569
+ | **Node.js** | 22+ | ✅ Full support |
570
+ | **Bun** | 1.0+ | ✅ Full support |
571
+ | **Deno** | 2.0+ | ✅ Full support |
572
+ | **Browsers** | Modern | ✅ No stdio MCP, HTTP transport only |
573
+
574
+ ---
575
+
576
+ ## For Agent Framework Authors
577
+
578
+ `AIModel` is designed as the transport layer for agentic systems:
579
+
580
+ - **Stateless** — No conversation history stored. Your framework manages memory
581
+ - **Composable** — Create separate instances for chat, embeddings, vision
582
+ - **Tool tracing** — `chatWithTools()` returns full execution trace
583
+ - **Context budget** — `getModelInfo()` exposes `contextLength`
584
+ - **Auditor as system bus** — Inject custom sinks for cost tracking, behavioral scoring
585
+ - **StreamDecoder as UI bridge** — Select decoder strategy per-call
586
+
587
+ ---
588
+
589
+ ## License
590
+
591
+ MIT