universal-llm-client 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/CHANGELOG.md +27 -24
  2. package/README.md +60 -11
  3. package/dist/ai-model.d.ts +12 -1
  4. package/dist/ai-model.d.ts.map +1 -1
  5. package/dist/ai-model.js +36 -1
  6. package/dist/ai-model.js.map +1 -1
  7. package/dist/auditor.js.map +1 -1
  8. package/dist/client.js.map +1 -1
  9. package/dist/gemma-channel.d.ts +14 -0
  10. package/dist/gemma-channel.d.ts.map +1 -0
  11. package/dist/gemma-channel.js +38 -0
  12. package/dist/gemma-channel.js.map +1 -0
  13. package/dist/gemma-diffusion.d.ts +49 -0
  14. package/dist/gemma-diffusion.d.ts.map +1 -0
  15. package/dist/gemma-diffusion.js +147 -0
  16. package/dist/gemma-diffusion.js.map +1 -0
  17. package/dist/http.d.ts +4 -0
  18. package/dist/http.d.ts.map +1 -1
  19. package/dist/http.js +14 -1
  20. package/dist/http.js.map +1 -1
  21. package/dist/index.d.ts +2 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +4 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/interfaces.d.ts +163 -7
  26. package/dist/interfaces.d.ts.map +1 -1
  27. package/dist/interfaces.js.map +1 -1
  28. package/dist/mcp.js.map +1 -1
  29. package/dist/providers/anthropic.d.ts.map +1 -1
  30. package/dist/providers/anthropic.js +28 -3
  31. package/dist/providers/anthropic.js.map +1 -1
  32. package/dist/providers/google.d.ts +22 -1
  33. package/dist/providers/google.d.ts.map +1 -1
  34. package/dist/providers/google.js +223 -13
  35. package/dist/providers/google.js.map +1 -1
  36. package/dist/providers/index.js.map +1 -1
  37. package/dist/providers/ollama.d.ts +2 -0
  38. package/dist/providers/ollama.d.ts.map +1 -1
  39. package/dist/providers/ollama.js +59 -30
  40. package/dist/providers/ollama.js.map +1 -1
  41. package/dist/providers/openai.d.ts +14 -0
  42. package/dist/providers/openai.d.ts.map +1 -1
  43. package/dist/providers/openai.js +200 -22
  44. package/dist/providers/openai.js.map +1 -1
  45. package/dist/router.d.ts +2 -0
  46. package/dist/router.d.ts.map +1 -1
  47. package/dist/router.js +4 -0
  48. package/dist/router.js.map +1 -1
  49. package/dist/stream-decoder.d.ts +12 -0
  50. package/dist/stream-decoder.d.ts.map +1 -1
  51. package/dist/stream-decoder.js +182 -5
  52. package/dist/stream-decoder.js.map +1 -1
  53. package/dist/structured-output.js.map +1 -1
  54. package/dist/thinking.d.ts +36 -0
  55. package/dist/thinking.d.ts.map +1 -0
  56. package/dist/thinking.js +52 -0
  57. package/dist/thinking.js.map +1 -0
  58. package/dist/tools.js.map +1 -1
  59. package/dist/zod-adapter.js.map +1 -1
  60. package/package.json +4 -1
  61. package/src/ai-model.ts +400 -0
  62. package/src/auditor.ts +213 -0
  63. package/src/client.ts +402 -0
  64. package/src/debug/debug-google-streaming.ts +97 -0
  65. package/src/debug/debug-tool-execution.ts +86 -0
  66. package/src/debug/test-lmstudio-tools.ts +155 -0
  67. package/src/demos/README.md +47 -0
  68. package/src/demos/basic/universal-llm-examples.ts +161 -0
  69. package/src/demos/diffusion-gemma/.env +29 -0
  70. package/src/demos/diffusion-gemma/.env.example +27 -0
  71. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  72. package/src/demos/diffusion-gemma/README.md +59 -0
  73. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  74. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  75. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  76. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  77. package/src/demos/diffusion-gemma/server.ts +1205 -0
  78. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  79. package/src/demos/mcp/astrid-memory-demo.ts +295 -0
  80. package/src/demos/mcp/astrid-persona-memory.ts +357 -0
  81. package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
  82. package/src/demos/mcp/simple-astrid-memory.ts +148 -0
  83. package/src/demos/mcp/simple-mcp-demo.ts +68 -0
  84. package/src/demos/mcp/working-mcp-demo.ts +62 -0
  85. package/src/demos/model-alias-demo.ts +0 -0
  86. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
  87. package/src/demos/tools/astrid-memory-demo.ts +270 -0
  88. package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
  89. package/src/demos/tools/astrid-production-memory.ts +558 -0
  90. package/src/demos/tools/basic-translation-test.ts +66 -0
  91. package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
  92. package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
  93. package/src/demos/tools/clean-translation-test.ts +119 -0
  94. package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
  95. package/src/demos/tools/complete-rag-demo.ts +369 -0
  96. package/src/demos/tools/complete-tool-demo.ts +132 -0
  97. package/src/demos/tools/demo-tool-calling.ts +124 -0
  98. package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
  99. package/src/demos/tools/hybrid-thinking-test.ts +154 -0
  100. package/src/demos/tools/memory-integration-test.ts +420 -0
  101. package/src/demos/tools/multilingual-memory-system.ts +802 -0
  102. package/src/demos/tools/ondemand-translation-demo.ts +655 -0
  103. package/src/demos/tools/production-tool-demo.ts +245 -0
  104. package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
  105. package/src/demos/tools/rigorous-language-analysis.ts +218 -0
  106. package/src/demos/tools/test-universal-memory-system.ts +126 -0
  107. package/src/demos/tools/translation-integration-guide.ts +346 -0
  108. package/src/demos/tools/universal-memory-system.ts +560 -0
  109. package/src/gemma-channel.ts +47 -0
  110. package/src/gemma-diffusion.ts +167 -0
  111. package/src/http.ts +261 -0
  112. package/src/index.ts +180 -0
  113. package/src/interfaces.ts +843 -0
  114. package/src/mcp.ts +345 -0
  115. package/src/providers/anthropic.ts +796 -0
  116. package/src/providers/google.ts +840 -0
  117. package/src/providers/index.ts +8 -0
  118. package/src/providers/ollama.ts +503 -0
  119. package/src/providers/openai.ts +587 -0
  120. package/src/router.ts +785 -0
  121. package/src/stream-decoder.ts +535 -0
  122. package/src/structured-output.ts +759 -0
  123. package/src/test-scripts/test-advanced-tools.ts +310 -0
  124. package/src/test-scripts/test-google-deep-research.ts +33 -0
  125. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
  126. package/src/test-scripts/test-google-streaming.ts +63 -0
  127. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
  128. package/src/test-scripts/test-google-thinking.ts +46 -0
  129. package/src/test-scripts/test-mcp-config.ts +28 -0
  130. package/src/test-scripts/test-mcp-connection.ts +29 -0
  131. package/src/test-scripts/test-system-message-positions.ts +163 -0
  132. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
  133. package/src/test-scripts/test-tool-calling.ts +231 -0
  134. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  135. package/src/tests/ai-model.test.ts +1614 -0
  136. package/src/tests/auditor.test.ts +224 -0
  137. package/src/tests/gemma-diffusion.test.ts +115 -0
  138. package/src/tests/http.test.ts +200 -0
  139. package/src/tests/interfaces.test.ts +117 -0
  140. package/src/tests/providers/anthropic.test.ts +118 -0
  141. package/src/tests/providers/google.test.ts +841 -0
  142. package/src/tests/providers/ollama.test.ts +1034 -0
  143. package/src/tests/providers/openai.test.ts +1511 -0
  144. package/src/tests/router.test.ts +254 -0
  145. package/src/tests/stream-decoder.test.ts +263 -0
  146. package/src/tests/structured-output.test.ts +1450 -0
  147. package/src/tests/thinking.test.ts +65 -0
  148. package/src/tests/tools.test.ts +175 -0
  149. package/src/thinking.ts +73 -0
  150. package/src/tools.ts +246 -0
  151. package/src/zod-adapter.ts +72 -0
package/src/router.ts ADDED
@@ -0,0 +1,785 @@
1
+ /**
2
+ * Universal LLM Client v3 — Router (Internal Failover Engine)
3
+ *
4
+ * Manages the ordered provider chain with:
5
+ * - Priority ordering
6
+ * - Per-provider retries
7
+ * - Health tracking with failure thresholds
8
+ * - Cooldown periods for unhealthy providers
9
+ * - Audit integration for every retry/failover event
10
+ *
11
+ * Not exposed publicly — AIModel delegates to it.
12
+ */
13
+
14
+ import { BaseLLMClient } from './client.js';
15
+ import type { Auditor } from './auditor.js';
16
+ import { NoopAuditor } from './auditor.js';
17
+ import type {
18
+ LLMChatMessage,
19
+ LLMChatResponse,
20
+ ChatOptions,
21
+ ModelMetadata,
22
+ OutputOptions,
23
+ } from './interfaces.js';
24
+ import type { DecodedEvent } from './stream-decoder.js';
25
+ import {
26
+ parseStructured,
27
+ StructuredOutputError,
28
+ StreamingJsonParser,
29
+ getJsonSchemaFromConfig,
30
+ type SchemaConfig,
31
+ type StructuredOutputResult,
32
+ } from './structured-output.js';
33
+
34
+ // ============================================================================
35
+ // Types
36
+ // ============================================================================
37
+
38
+ export interface ProviderEntry {
39
+ /** Unique identifier for this provider entry */
40
+ id: string;
41
+ /** The underlying LLM client */
42
+ client: BaseLLMClient;
43
+ /** Priority (lower = tried first, defaults to insertion order) */
44
+ priority: number;
45
+ /** Override model name for this provider */
46
+ modelOverride?: string;
47
+ }
48
+
49
+ interface ProviderHealth {
50
+ healthy: boolean;
51
+ consecutiveFailures: number;
52
+ lastFailure?: number;
53
+ cooldownUntil?: number;
54
+ }
55
+
56
+ export interface RouterConfig {
57
+ /** Max retries per provider before failover (default: 2) */
58
+ retriesPerProvider?: number;
59
+ /** Max consecutive failures before marking unhealthy (default: 3) */
60
+ maxFailures?: number;
61
+ /** Cooldown period in ms for unhealthy providers (default: 30000) */
62
+ cooldownMs?: number;
63
+ /** Auditor for observability */
64
+ auditor?: Auditor;
65
+ }
66
+
67
+ export interface ProviderStatus {
68
+ id: string;
69
+ healthy: boolean;
70
+ active: boolean;
71
+ consecutiveFailures: number;
72
+ cooldownUntil?: number;
73
+ model: string;
74
+ }
75
+
76
+ // ============================================================================
77
+ // Router
78
+ // ============================================================================
79
+
80
+ export class Router {
81
+ private providers: ProviderEntry[] = [];
82
+ private health: Map<string, ProviderHealth> = new Map();
83
+ private auditor: Auditor;
84
+ private config: Required<Omit<RouterConfig, 'auditor'>>;
85
+
86
+ constructor(config: RouterConfig = {}) {
87
+ this.auditor = config.auditor ?? new NoopAuditor();
88
+ this.config = {
89
+ retriesPerProvider: config.retriesPerProvider ?? 2,
90
+ maxFailures: config.maxFailures ?? 3,
91
+ cooldownMs: config.cooldownMs ?? 30000,
92
+ };
93
+ }
94
+
95
+ // ========================================================================
96
+ // Provider Management
97
+ // ========================================================================
98
+
99
+ addProvider(entry: ProviderEntry): void {
100
+ this.providers.push(entry);
101
+ this.health.set(entry.id, {
102
+ healthy: true,
103
+ consecutiveFailures: 0,
104
+ });
105
+ // Re-sort by priority
106
+ this.providers.sort((a, b) => a.priority - b.priority);
107
+ }
108
+
109
+ removeProvider(id: string): void {
110
+ this.providers = this.providers.filter(p => p.id !== id);
111
+ this.health.delete(id);
112
+ }
113
+
114
+ setAuditor(auditor: Auditor): void {
115
+ this.auditor = auditor;
116
+ }
117
+
118
+ /** All registered provider clients, in priority order. */
119
+ getClients(): BaseLLMClient[] {
120
+ return this.providers.map(p => p.client);
121
+ }
122
+
123
+ getStatus(): ProviderStatus[] {
124
+ return this.providers.map(p => ({
125
+ id: p.id,
126
+ healthy: this.isAvailable(p.id),
127
+ active: true,
128
+ consecutiveFailures: this.health.get(p.id)?.consecutiveFailures ?? 0,
129
+ cooldownUntil: this.health.get(p.id)?.cooldownUntil,
130
+ model: p.modelOverride ?? p.client.model,
131
+ }));
132
+ }
133
+
134
+ // ========================================================================
135
+ // Execution with Failover
136
+ // ========================================================================
137
+
138
+ /**
139
+ * Execute a function against providers with automatic failover.
140
+ * Tries each available provider in priority order.
141
+ */
142
+ async execute<T>(
143
+ fn: (client: BaseLLMClient) => Promise<T>,
144
+ context: string = 'execute',
145
+ ): Promise<T> {
146
+ const available = this.getAvailableProviders();
147
+
148
+ if (available.length === 0) {
149
+ throw new Error('No available LLM providers. All providers are unhealthy or in cooldown.');
150
+ }
151
+
152
+ let lastError: Error | undefined;
153
+
154
+ for (const provider of available) {
155
+ for (let attempt = 0; attempt <= this.config.retriesPerProvider; attempt++) {
156
+ try {
157
+ if (attempt > 0) {
158
+ this.auditor.record({
159
+ timestamp: Date.now(),
160
+ type: 'retry',
161
+ provider: provider.id,
162
+ model: provider.modelOverride ?? provider.client.model,
163
+ metadata: { attempt, context },
164
+ });
165
+ }
166
+
167
+ const result = await fn(provider.client);
168
+ this.recordSuccess(provider.id);
169
+ return result;
170
+ } catch (error) {
171
+ lastError = error instanceof Error ? error : new Error(String(error));
172
+ this.auditor.record({
173
+ timestamp: Date.now(),
174
+ type: 'error',
175
+ provider: provider.id,
176
+ model: provider.modelOverride ?? provider.client.model,
177
+ error: lastError.message,
178
+ metadata: { attempt, context },
179
+ });
180
+ }
181
+ }
182
+
183
+ // All retries exhausted for this provider
184
+ this.recordFailure(provider.id);
185
+
186
+ // Try next provider (failover)
187
+ const nextProvider = this.getNextAvailableAfter(provider.id);
188
+ if (nextProvider) {
189
+ this.auditor.record({
190
+ timestamp: Date.now(),
191
+ type: 'failover',
192
+ provider: provider.id,
193
+ metadata: {
194
+ from: provider.id,
195
+ nextProvider: nextProvider.id,
196
+ context,
197
+ reason: lastError?.message,
198
+ },
199
+ });
200
+ }
201
+ }
202
+
203
+ throw lastError ?? new Error('All providers failed');
204
+ }
205
+
206
+ /**
207
+ * Execute a streaming function with failover.
208
+ * On failure, retries with the next provider from the beginning.
209
+ */
210
+ async *executeStream(
211
+ fn: (client: BaseLLMClient) => AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>,
212
+ context: string = 'stream',
213
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
214
+ const available = this.getAvailableProviders();
215
+
216
+ if (available.length === 0) {
217
+ throw new Error('No available LLM providers for streaming.');
218
+ }
219
+
220
+ let lastError: Error | undefined;
221
+
222
+ for (const provider of available) {
223
+ try {
224
+ const stream = fn(provider.client);
225
+ let returnValue: LLMChatResponse | void;
226
+
227
+ // We need to yield all values and capture the return
228
+ while (true) {
229
+ const result = await stream.next();
230
+ if (result.done) {
231
+ returnValue = result.value;
232
+ break;
233
+ }
234
+ yield result.value;
235
+ }
236
+
237
+ this.recordSuccess(provider.id);
238
+ return returnValue;
239
+ } catch (error) {
240
+ lastError = error instanceof Error ? error : new Error(String(error));
241
+ this.recordFailure(provider.id);
242
+
243
+ this.auditor.record({
244
+ timestamp: Date.now(),
245
+ type: 'failover',
246
+ provider: provider.id,
247
+ error: lastError.message,
248
+ metadata: { context },
249
+ });
250
+
251
+ // Continue to next provider
252
+ }
253
+ }
254
+
255
+ throw lastError ?? new Error('All providers failed for streaming');
256
+ }
257
+
258
+ // ========================================================================
259
+ // Convenience Methods
260
+ // ========================================================================
261
+
262
+ /**
263
+ * @deprecated No longer needed — structured output and tools can now be used together.
264
+ */
265
+ private validateOutputAndTools(_options?: ChatOptions): void {
266
+ // Structured output and tools are now allowed together.
267
+ }
268
+
269
+ /**
270
+ * Extract schema from output options.
271
+ * Returns a SchemaConfig or a bare jsonSchema object.
272
+ */
273
+ private getSchemaFromOutput<T>(output: OutputOptions<T>): { config: SchemaConfig<T>; name?: string; description?: string } | { jsonSchema: Record<string, unknown>; name?: string; description?: string } {
274
+ if (output.schema) {
275
+ return {
276
+ config: output.schema,
277
+ name: output.name,
278
+ description: output.description,
279
+ };
280
+ }
281
+ if (output.jsonSchema) {
282
+ return {
283
+ jsonSchema: output.jsonSchema as Record<string, unknown>,
284
+ name: output.name,
285
+ description: output.description,
286
+ };
287
+ }
288
+ throw new Error('output must have either schema or jsonSchema');
289
+ }
290
+
291
+ async chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse> {
292
+ // Validate that output and tools are not used together (VAL-API-005)
293
+ this.validateOutputAndTools(options);
294
+
295
+ // If output parameter is provided, use structured output flow (VAL-API-004)
296
+ if (options?.output) {
297
+ // Type assertion: we know output is defined at this point
298
+ return this.chatWithStructuredOutput(messages, options as ChatOptions & { output: OutputOptions });
299
+ }
300
+
301
+ return this.execute(
302
+ client => client.chat(messages, options),
303
+ 'chat',
304
+ );
305
+ }
306
+
307
+ /**
308
+ * Chat with structured output using the output parameter.
309
+ * Validates response against the schema and returns structured property.
310
+ */
311
+ private async chatWithStructuredOutput<T>(
312
+ messages: LLMChatMessage[],
313
+ options: ChatOptions & { output: OutputOptions<T> },
314
+ ): Promise<LLMChatResponse<T>> {
315
+ const { output } = options;
316
+ const schemaInfo = this.getSchemaFromOutput(output);
317
+ const schemaName = schemaInfo.name ?? 'response';
318
+
319
+ // Emit structured_request event
320
+ this.auditor.record({
321
+ timestamp: Date.now(),
322
+ type: 'structured_request',
323
+ provider: 'router',
324
+ schemaName,
325
+ });
326
+
327
+ // Build ChatOptions with schema for the provider
328
+ // Keep tools if provided — structured output and tools can work together
329
+ const { output: _, ...restOptions } = options;
330
+ const structuredOptions: ChatOptions = {
331
+ ...restOptions,
332
+ // Use jsonSchema for the provider
333
+ jsonSchema: 'config' in schemaInfo
334
+ ? getJsonSchemaFromConfig(schemaInfo.config)
335
+ : schemaInfo.jsonSchema,
336
+ schemaName: schemaInfo.name,
337
+ schemaDescription: schemaInfo.description,
338
+ };
339
+
340
+ const start = Date.now();
341
+
342
+ // Get response from provider
343
+ const response = await this.execute(
344
+ client => client.chat(messages, structuredOptions),
345
+ 'chatWithStructuredOutput',
346
+ );
347
+
348
+ // If the response contains tool calls, skip validation and return as-is
349
+ if (response.message.tool_calls && response.message.tool_calls.length > 0) {
350
+ return response as LLMChatResponse<T>;
351
+ }
352
+
353
+ // Extract text content from response
354
+ const content = typeof response.message.content === 'string'
355
+ ? response.message.content
356
+ : response.message.content
357
+ .filter((part): part is { type: 'text'; text: string } => part.type === 'text')
358
+ .map(part => part.text)
359
+ .join('');
360
+
361
+ // Get the SchemaConfig for validation
362
+ const schemaConfig: SchemaConfig<T> | null = 'config' in schemaInfo ? schemaInfo.config : null;
363
+
364
+ if (!schemaConfig || !schemaConfig.validate) {
365
+ // No validator — return parsed JSON without validation
366
+ try {
367
+ const structured = JSON.parse(content) as T;
368
+ // Emit structured_response event on success
369
+ this.auditor.record({
370
+ timestamp: Date.now(),
371
+ type: 'structured_response',
372
+ provider: response.provider ?? 'router',
373
+ model: response.message.role,
374
+ duration: Date.now() - start,
375
+ schemaName,
376
+ usage: response.usage,
377
+ });
378
+ return {
379
+ ...response,
380
+ structured,
381
+ };
382
+ } catch (error) {
383
+ // JSON parse failed
384
+ const rawOutput = content;
385
+ this.auditor.record({
386
+ timestamp: Date.now(),
387
+ type: 'structured_validation_error',
388
+ provider: response.provider ?? 'router',
389
+ schemaName,
390
+ error: error instanceof Error ? error.message : 'JSON parse failed',
391
+ rawOutput,
392
+ });
393
+ throw new StructuredOutputError(
394
+ `Failed to parse JSON: ${rawOutput}`,
395
+ { rawOutput: rawOutput, cause: error instanceof Error ? error : undefined },
396
+ );
397
+ }
398
+ }
399
+
400
+ // Parse and validate against SchemaConfig
401
+ try {
402
+ const validated = parseStructured(schemaConfig, content);
403
+ // Emit structured_response event on success
404
+ this.auditor.record({
405
+ timestamp: Date.now(),
406
+ type: 'structured_response',
407
+ provider: response.provider ?? 'router',
408
+ duration: Date.now() - start,
409
+ schemaName,
410
+ usage: response.usage,
411
+ });
412
+ return {
413
+ ...response,
414
+ structured: validated,
415
+ };
416
+ } catch (error) {
417
+ // Emit structured_validation_error event
418
+ const rawOutput = content;
419
+ this.auditor.record({
420
+ timestamp: Date.now(),
421
+ type: 'structured_validation_error',
422
+ provider: response.provider ?? 'router',
423
+ schemaName,
424
+ error: error instanceof Error ? error.message : 'Validation failed',
425
+ rawOutput,
426
+ });
427
+ throw error;
428
+ }
429
+ }
430
+
431
+ async chatWithTools(
432
+ messages: LLMChatMessage[],
433
+ options?: ChatOptions & { maxIterations?: number },
434
+ ): Promise<LLMChatResponse> {
435
+ return this.execute(
436
+ client => client.chatWithTools(messages, options),
437
+ 'chatWithTools',
438
+ );
439
+ }
440
+
441
+ async *chatStream(
442
+ messages: LLMChatMessage[],
443
+ options?: ChatOptions,
444
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
445
+ // Structured output via output parameter is not supported on streaming
446
+ // Use generateStructuredStream() instead
447
+ if (options?.output) {
448
+ throw new Error(
449
+ 'The "output" parameter is not supported with chatStream(). '
450
+ + 'Use generateStructuredStream() for streaming structured output.',
451
+ );
452
+ }
453
+
454
+ return yield* this.executeStream(
455
+ client => client.chatStream(messages, options),
456
+ 'chatStream',
457
+ );
458
+ }
459
+
460
+ async embed(text: string): Promise<number[]> {
461
+ return this.execute(
462
+ client => client.embed(text),
463
+ 'embed',
464
+ );
465
+ }
466
+
467
+ async embedArray(texts: string[]): Promise<number[][]> {
468
+ return this.execute(
469
+ client => client.embedArray(texts),
470
+ 'embedArray',
471
+ );
472
+ }
473
+
474
+ async getModels(): Promise<string[]> {
475
+ // Aggregate models from all providers
476
+ const allModels: string[] = [];
477
+ for (const provider of this.providers) {
478
+ try {
479
+ const models = await provider.client.getModels();
480
+ allModels.push(...models);
481
+ } catch {
482
+ // Skip unavailable providers
483
+ }
484
+ }
485
+ return [...new Set(allModels)];
486
+ }
487
+
488
+ async getModelInfo(): Promise<ModelMetadata> {
489
+ return this.execute(
490
+ client => client.getModelInfo(),
491
+ 'getModelInfo',
492
+ );
493
+ }
494
+
495
+ // ========================================================================
496
+ // Structured Output Methods
497
+ // ========================================================================
498
+
499
+ /**
500
+ * Generate structured output from the LLM with automatic failover.
501
+ * Validates the response against the provided SchemaConfig.
502
+ * Throws StructuredOutputError on validation failure.
503
+ *
504
+ * @template T The output type
505
+ * @param config Schema configuration (JSON Schema + optional validator)
506
+ * @param messages Chat messages to send
507
+ * @param options Additional options (temperature, maxTokens, etc.)
508
+ * @returns Validated structured output
509
+ * @throws StructuredOutputError if validation fails
510
+ */
511
+ async generateStructured<T>(
512
+ config: SchemaConfig<T>,
513
+ messages: LLMChatMessage[],
514
+ options?: ChatOptions,
515
+ ): Promise<T> {
516
+ // Get JSON Schema from config
517
+ const jsonSchema = getJsonSchemaFromConfig(config);
518
+ const schemaName = options?.schemaName ?? config.name ?? 'response';
519
+
520
+ // Emit structured_request event
521
+ this.auditor.record({
522
+ timestamp: Date.now(),
523
+ type: 'structured_request',
524
+ provider: 'router',
525
+ schemaName,
526
+ });
527
+
528
+ // Build ChatOptions with schema
529
+ const structuredOptions: ChatOptions = {
530
+ ...options,
531
+ jsonSchema,
532
+ };
533
+
534
+ const start = Date.now();
535
+
536
+ // Execute with failover
537
+ const response = await this.execute(
538
+ client => client.chat(messages, structuredOptions),
539
+ 'generateStructured',
540
+ );
541
+
542
+ // Parse and validate the response
543
+ const content = typeof response.message.content === 'string'
544
+ ? response.message.content
545
+ : response.message.content
546
+ .filter((part): part is { type: 'text'; text: string } => part.type === 'text')
547
+ .map(part => part.text)
548
+ .join('');
549
+
550
+ try {
551
+ const result = parseStructured(config, content);
552
+ // Emit structured_response event on success
553
+ this.auditor.record({
554
+ timestamp: Date.now(),
555
+ type: 'structured_response',
556
+ provider: response.provider ?? 'router',
557
+ duration: Date.now() - start,
558
+ schemaName,
559
+ usage: response.usage,
560
+ });
561
+ return result;
562
+ } catch (error) {
563
+ // Emit structured_validation_error event
564
+ this.auditor.record({
565
+ timestamp: Date.now(),
566
+ type: 'structured_validation_error',
567
+ provider: response.provider ?? 'router',
568
+ schemaName,
569
+ error: error instanceof Error ? error.message : 'Validation failed',
570
+ rawOutput: content,
571
+ });
572
+ throw error;
573
+ }
574
+ }
575
+
576
+ /**
577
+ * Try to generate structured output, returning a result object instead of throwing.
578
+ *
579
+ * @template T The output type
580
+ * @param config Schema configuration (JSON Schema + optional validator)
581
+ * @param messages Chat messages to send
582
+ * @param options Additional options (temperature, maxTokens, etc.)
583
+ * @returns StructuredOutputResult<T>
584
+ */
585
+ async tryParseStructured<T>(
586
+ config: SchemaConfig<T>,
587
+ messages: LLMChatMessage[],
588
+ options?: ChatOptions,
589
+ ): Promise<StructuredOutputResult<T>> {
590
+ try {
591
+ const value = await this.generateStructured(config, messages, options);
592
+ return { ok: true, value };
593
+ } catch (error) {
594
+ // If error is already a StructuredOutputError, use it directly
595
+ if (error instanceof Error && 'rawOutput' in error) {
596
+ return {
597
+ ok: false,
598
+ error: error as unknown as import('./structured-output.js').StructuredOutputError,
599
+ rawOutput: (error as unknown as { rawOutput: string }).rawOutput,
600
+ };
601
+ }
602
+
603
+ // Unexpected error - re-throw
604
+ throw error;
605
+ }
606
+ }
607
+
608
+ /**
609
+ * Stream structured output with partial validated objects.
610
+ *
611
+ * @template T The output type
612
+ * @param config Schema configuration (JSON Schema + optional validator)
613
+ * @param messages Chat messages to send
614
+ * @param options Additional options (temperature, maxTokens, etc.)
615
+ * @yields Partial validated objects as the JSON stream progresses
616
+ * @returns Complete validated object on stream completion
617
+ * @throws StructuredOutputError if final validation fails
618
+ */
619
+ async *generateStructuredStream<T>(
620
+ config: SchemaConfig<T>,
621
+ messages: LLMChatMessage[],
622
+ options?: ChatOptions,
623
+ ): AsyncGenerator<T, T, unknown> {
624
+ // Get JSON Schema from config
625
+ const jsonSchema = getJsonSchemaFromConfig(config);
626
+ const schemaName = options?.schemaName ?? config.name ?? 'response';
627
+
628
+ // Emit structured_request event
629
+ this.auditor.record({
630
+ timestamp: Date.now(),
631
+ type: 'structured_request',
632
+ provider: 'router',
633
+ schemaName,
634
+ });
635
+
636
+ // Build ChatOptions with schema
637
+ const structuredOptions: ChatOptions = {
638
+ ...options,
639
+ jsonSchema,
640
+ };
641
+
642
+ const start = Date.now();
643
+
644
+ // Stream with failover
645
+ const stream = this.executeStream(
646
+ client => client.chatStream(messages, structuredOptions),
647
+ 'generateStructuredStream',
648
+ );
649
+
650
+ // Accumulate text and yield partial validated objects
651
+ const parser = new StreamingJsonParser<T>(config);
652
+ let fullContent = '';
653
+ let lastYielded: T | undefined;
654
+
655
+ try {
656
+ for await (const event of stream) {
657
+ // Only process text events
658
+ if (event.type !== 'text') continue;
659
+
660
+ fullContent += event.content;
661
+
662
+ // Try to parse partial JSON
663
+ const result = parser.feed(event.content);
664
+
665
+ // Yield if we got a valid partial and it's different from last
666
+ if (result.partial !== undefined) {
667
+ // Only yield if different from last (avoid duplicate yields)
668
+ if (lastYielded === undefined || JSON.stringify(result.partial) !== JSON.stringify(lastYielded)) {
669
+ lastYielded = result.partial;
670
+ yield result.partial;
671
+ }
672
+ }
673
+ }
674
+
675
+ // Parse and validate the complete content
676
+ const complete = parseStructured(config, fullContent);
677
+
678
+ // Emit structured_response event on success
679
+ this.auditor.record({
680
+ timestamp: Date.now(),
681
+ type: 'structured_response',
682
+ provider: 'router',
683
+ schemaName,
684
+ duration: Date.now() - start,
685
+ });
686
+
687
+ // Return the complete validated object
688
+ return complete;
689
+ } catch (error) {
690
+ // Emit structured_validation_error event
691
+ this.auditor.record({
692
+ timestamp: Date.now(),
693
+ type: 'structured_validation_error',
694
+ provider: 'router',
695
+ schemaName,
696
+ error: error instanceof Error ? error.message : 'Validation failed',
697
+ rawOutput: fullContent,
698
+ });
699
+ throw error;
700
+ }
701
+ }
702
+
703
+ // ========================================================================
704
+ // Tool Registration (broadcast to all providers)
705
+ // ========================================================================
706
+
707
+ registerTool(
708
+ name: string,
709
+ description: string,
710
+ parameters: import('./interfaces.js').LLMFunction['parameters'],
711
+ handler: import('./interfaces.js').ToolHandler,
712
+ ): void {
713
+ for (const provider of this.providers) {
714
+ provider.client.registerTool(name, description, parameters, handler);
715
+ }
716
+ }
717
+
718
+ registerTools(
719
+ tools: Array<{
720
+ name: string;
721
+ description: string;
722
+ parameters: import('./interfaces.js').LLMFunction['parameters'];
723
+ handler: import('./interfaces.js').ToolHandler;
724
+ }>,
725
+ ): void {
726
+ for (const provider of this.providers) {
727
+ provider.client.registerTools(tools);
728
+ }
729
+ }
730
+
731
+ // ========================================================================
732
+ // Health Management
733
+ // ========================================================================
734
+
735
+ private isAvailable(id: string): boolean {
736
+ const h = this.health.get(id);
737
+ if (!h) return false;
738
+ if (h.healthy) return true;
739
+ // Check if cooldown has expired
740
+ if (h.cooldownUntil && Date.now() >= h.cooldownUntil) {
741
+ // Reset for re-testing
742
+ h.healthy = true;
743
+ h.consecutiveFailures = 0;
744
+ h.cooldownUntil = undefined;
745
+ return true;
746
+ }
747
+ return false;
748
+ }
749
+
750
+ private getAvailableProviders(): ProviderEntry[] {
751
+ return this.providers.filter(p => this.isAvailable(p.id));
752
+ }
753
+
754
+ private getNextAvailableAfter(currentId: string): ProviderEntry | undefined {
755
+ const idx = this.providers.findIndex(p => p.id === currentId);
756
+ for (let i = idx + 1; i < this.providers.length; i++) {
757
+ if (this.isAvailable(this.providers[i]!.id)) {
758
+ return this.providers[i];
759
+ }
760
+ }
761
+ return undefined;
762
+ }
763
+
764
+ private recordSuccess(id: string): void {
765
+ const h = this.health.get(id);
766
+ if (h) {
767
+ h.healthy = true;
768
+ h.consecutiveFailures = 0;
769
+ h.cooldownUntil = undefined;
770
+ }
771
+ }
772
+
773
+ private recordFailure(id: string): void {
774
+ const h = this.health.get(id);
775
+ if (!h) return;
776
+
777
+ h.consecutiveFailures++;
778
+ h.lastFailure = Date.now();
779
+
780
+ if (h.consecutiveFailures >= this.config.maxFailures) {
781
+ h.healthy = false;
782
+ h.cooldownUntil = Date.now() + this.config.cooldownMs;
783
+ }
784
+ }
785
+ }