universal-llm-client 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/dist/ai-model.d.ts +20 -22
  2. package/dist/ai-model.d.ts.map +1 -1
  3. package/dist/ai-model.js +26 -23
  4. package/dist/ai-model.js.map +1 -1
  5. package/dist/client.d.ts +5 -5
  6. package/dist/client.d.ts.map +1 -1
  7. package/dist/client.js +17 -9
  8. package/dist/client.js.map +1 -1
  9. package/dist/http.d.ts +2 -0
  10. package/dist/http.d.ts.map +1 -1
  11. package/dist/http.js +1 -0
  12. package/dist/http.js.map +1 -1
  13. package/dist/index.d.ts +3 -3
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +4 -4
  16. package/dist/index.js.map +1 -1
  17. package/dist/interfaces.d.ts +49 -11
  18. package/dist/interfaces.d.ts.map +1 -1
  19. package/dist/interfaces.js +14 -0
  20. package/dist/interfaces.js.map +1 -1
  21. package/dist/providers/anthropic.d.ts +56 -0
  22. package/dist/providers/anthropic.d.ts.map +1 -0
  23. package/dist/providers/anthropic.js +524 -0
  24. package/dist/providers/anthropic.js.map +1 -0
  25. package/dist/providers/google.d.ts +5 -0
  26. package/dist/providers/google.d.ts.map +1 -1
  27. package/dist/providers/google.js +64 -8
  28. package/dist/providers/google.js.map +1 -1
  29. package/dist/providers/index.d.ts +1 -0
  30. package/dist/providers/index.d.ts.map +1 -1
  31. package/dist/providers/index.js +1 -0
  32. package/dist/providers/index.js.map +1 -1
  33. package/dist/providers/ollama.d.ts.map +1 -1
  34. package/dist/providers/ollama.js +38 -11
  35. package/dist/providers/ollama.js.map +1 -1
  36. package/dist/providers/openai.d.ts.map +1 -1
  37. package/dist/providers/openai.js +9 -7
  38. package/dist/providers/openai.js.map +1 -1
  39. package/dist/router.d.ts +13 -33
  40. package/dist/router.d.ts.map +1 -1
  41. package/dist/router.js +33 -57
  42. package/dist/router.js.map +1 -1
  43. package/dist/stream-decoder.d.ts +29 -2
  44. package/dist/stream-decoder.d.ts.map +1 -1
  45. package/dist/stream-decoder.js +39 -11
  46. package/dist/stream-decoder.js.map +1 -1
  47. package/dist/structured-output.d.ts +107 -181
  48. package/dist/structured-output.d.ts.map +1 -1
  49. package/dist/structured-output.js +137 -192
  50. package/dist/structured-output.js.map +1 -1
  51. package/dist/zod-adapter.d.ts +44 -0
  52. package/dist/zod-adapter.d.ts.map +1 -0
  53. package/dist/zod-adapter.js +61 -0
  54. package/dist/zod-adapter.js.map +1 -0
  55. package/package.json +9 -1
  56. package/src/ai-model.ts +350 -0
  57. package/src/auditor.ts +213 -0
  58. package/src/client.ts +402 -0
  59. package/src/debug/debug-google-streaming.ts +97 -0
  60. package/src/debug/debug-tool-execution.ts +86 -0
  61. package/src/debug/test-lmstudio-tools.ts +155 -0
  62. package/src/demos/README.md +47 -0
  63. package/src/demos/basic/universal-llm-examples.ts +161 -0
  64. package/src/demos/mcp/astrid-memory-demo.ts +295 -0
  65. package/src/demos/mcp/astrid-persona-memory.ts +357 -0
  66. package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
  67. package/src/demos/mcp/simple-astrid-memory.ts +148 -0
  68. package/src/demos/mcp/simple-mcp-demo.ts +68 -0
  69. package/src/demos/mcp/working-mcp-demo.ts +62 -0
  70. package/src/demos/model-alias-demo.ts +0 -0
  71. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
  72. package/src/demos/tools/astrid-memory-demo.ts +270 -0
  73. package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
  74. package/src/demos/tools/astrid-production-memory.ts +558 -0
  75. package/src/demos/tools/basic-translation-test.ts +66 -0
  76. package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
  77. package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
  78. package/src/demos/tools/clean-translation-test.ts +119 -0
  79. package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
  80. package/src/demos/tools/complete-rag-demo.ts +369 -0
  81. package/src/demos/tools/complete-tool-demo.ts +132 -0
  82. package/src/demos/tools/demo-tool-calling.ts +124 -0
  83. package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
  84. package/src/demos/tools/hybrid-thinking-test.ts +154 -0
  85. package/src/demos/tools/memory-integration-test.ts +420 -0
  86. package/src/demos/tools/multilingual-memory-system.ts +802 -0
  87. package/src/demos/tools/ondemand-translation-demo.ts +655 -0
  88. package/src/demos/tools/production-tool-demo.ts +245 -0
  89. package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
  90. package/src/demos/tools/rigorous-language-analysis.ts +218 -0
  91. package/src/demos/tools/test-universal-memory-system.ts +126 -0
  92. package/src/demos/tools/translation-integration-guide.ts +346 -0
  93. package/src/demos/tools/universal-memory-system.ts +560 -0
  94. package/src/http.ts +247 -0
  95. package/src/index.ts +161 -0
  96. package/src/interfaces.ts +657 -0
  97. package/src/mcp.ts +345 -0
  98. package/src/providers/anthropic.ts +762 -0
  99. package/src/providers/google.ts +620 -0
  100. package/src/providers/index.ts +8 -0
  101. package/src/providers/ollama.ts +469 -0
  102. package/src/providers/openai.ts +392 -0
  103. package/src/router.ts +780 -0
  104. package/src/stream-decoder.ts +361 -0
  105. package/src/structured-output.ts +759 -0
  106. package/src/test-scripts/test-advanced-tools.ts +310 -0
  107. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
  108. package/src/test-scripts/test-google-streaming.ts +63 -0
  109. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
  110. package/src/test-scripts/test-mcp-config.ts +28 -0
  111. package/src/test-scripts/test-mcp-connection.ts +29 -0
  112. package/src/test-scripts/test-system-message-positions.ts +163 -0
  113. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
  114. package/src/test-scripts/test-tool-calling.ts +231 -0
  115. package/src/tests/ai-model.test.ts +1614 -0
  116. package/src/tests/auditor.test.ts +224 -0
  117. package/src/tests/http.test.ts +200 -0
  118. package/src/tests/interfaces.test.ts +117 -0
  119. package/src/tests/providers/google.test.ts +660 -0
  120. package/src/tests/providers/ollama.test.ts +954 -0
  121. package/src/tests/providers/openai.test.ts +1122 -0
  122. package/src/tests/router.test.ts +254 -0
  123. package/src/tests/stream-decoder.test.ts +179 -0
  124. package/src/tests/structured-output.test.ts +1450 -0
  125. package/src/tests/tools.test.ts +175 -0
  126. package/src/tools.ts +246 -0
  127. package/src/zod-adapter.ts +72 -0
package/src/router.ts ADDED
@@ -0,0 +1,780 @@
1
+ /**
2
+ * Universal LLM Client v3 — Router (Internal Failover Engine)
3
+ *
4
+ * Manages the ordered provider chain with:
5
+ * - Priority ordering
6
+ * - Per-provider retries
7
+ * - Health tracking with failure thresholds
8
+ * - Cooldown periods for unhealthy providers
9
+ * - Audit integration for every retry/failover event
10
+ *
11
+ * Not exposed publicly — AIModel delegates to it.
12
+ */
13
+
14
+ import { BaseLLMClient } from './client.js';
15
+ import type { Auditor } from './auditor.js';
16
+ import { NoopAuditor } from './auditor.js';
17
+ import type {
18
+ LLMChatMessage,
19
+ LLMChatResponse,
20
+ ChatOptions,
21
+ ModelMetadata,
22
+ OutputOptions,
23
+ } from './interfaces.js';
24
+ import type { DecodedEvent } from './stream-decoder.js';
25
+ import {
26
+ parseStructured,
27
+ StructuredOutputError,
28
+ StreamingJsonParser,
29
+ getJsonSchemaFromConfig,
30
+ type SchemaConfig,
31
+ type StructuredOutputResult,
32
+ } from './structured-output.js';
33
+
34
+ // ============================================================================
35
+ // Types
36
+ // ============================================================================
37
+
38
+ export interface ProviderEntry {
39
+ /** Unique identifier for this provider entry */
40
+ id: string;
41
+ /** The underlying LLM client */
42
+ client: BaseLLMClient;
43
+ /** Priority (lower = tried first, defaults to insertion order) */
44
+ priority: number;
45
+ /** Override model name for this provider */
46
+ modelOverride?: string;
47
+ }
48
+
49
+ interface ProviderHealth {
50
+ healthy: boolean;
51
+ consecutiveFailures: number;
52
+ lastFailure?: number;
53
+ cooldownUntil?: number;
54
+ }
55
+
56
+ export interface RouterConfig {
57
+ /** Max retries per provider before failover (default: 2) */
58
+ retriesPerProvider?: number;
59
+ /** Max consecutive failures before marking unhealthy (default: 3) */
60
+ maxFailures?: number;
61
+ /** Cooldown period in ms for unhealthy providers (default: 30000) */
62
+ cooldownMs?: number;
63
+ /** Auditor for observability */
64
+ auditor?: Auditor;
65
+ }
66
+
67
+ export interface ProviderStatus {
68
+ id: string;
69
+ healthy: boolean;
70
+ active: boolean;
71
+ consecutiveFailures: number;
72
+ cooldownUntil?: number;
73
+ model: string;
74
+ }
75
+
76
+ // ============================================================================
77
+ // Router
78
+ // ============================================================================
79
+
80
+ export class Router {
81
+ private providers: ProviderEntry[] = [];
82
+ private health: Map<string, ProviderHealth> = new Map();
83
+ private auditor: Auditor;
84
+ private config: Required<Omit<RouterConfig, 'auditor'>>;
85
+
86
+ constructor(config: RouterConfig = {}) {
87
+ this.auditor = config.auditor ?? new NoopAuditor();
88
+ this.config = {
89
+ retriesPerProvider: config.retriesPerProvider ?? 2,
90
+ maxFailures: config.maxFailures ?? 3,
91
+ cooldownMs: config.cooldownMs ?? 30000,
92
+ };
93
+ }
94
+
95
+ // ========================================================================
96
+ // Provider Management
97
+ // ========================================================================
98
+
99
+ addProvider(entry: ProviderEntry): void {
100
+ this.providers.push(entry);
101
+ this.health.set(entry.id, {
102
+ healthy: true,
103
+ consecutiveFailures: 0,
104
+ });
105
+ // Re-sort by priority
106
+ this.providers.sort((a, b) => a.priority - b.priority);
107
+ }
108
+
109
+ removeProvider(id: string): void {
110
+ this.providers = this.providers.filter(p => p.id !== id);
111
+ this.health.delete(id);
112
+ }
113
+
114
+ setAuditor(auditor: Auditor): void {
115
+ this.auditor = auditor;
116
+ }
117
+
118
+ getStatus(): ProviderStatus[] {
119
+ return this.providers.map(p => ({
120
+ id: p.id,
121
+ healthy: this.isAvailable(p.id),
122
+ active: true,
123
+ consecutiveFailures: this.health.get(p.id)?.consecutiveFailures ?? 0,
124
+ cooldownUntil: this.health.get(p.id)?.cooldownUntil,
125
+ model: p.modelOverride ?? p.client.model,
126
+ }));
127
+ }
128
+
129
+ // ========================================================================
130
+ // Execution with Failover
131
+ // ========================================================================
132
+
133
+ /**
134
+ * Execute a function against providers with automatic failover.
135
+ * Tries each available provider in priority order.
136
+ */
137
+ async execute<T>(
138
+ fn: (client: BaseLLMClient) => Promise<T>,
139
+ context: string = 'execute',
140
+ ): Promise<T> {
141
+ const available = this.getAvailableProviders();
142
+
143
+ if (available.length === 0) {
144
+ throw new Error('No available LLM providers. All providers are unhealthy or in cooldown.');
145
+ }
146
+
147
+ let lastError: Error | undefined;
148
+
149
+ for (const provider of available) {
150
+ for (let attempt = 0; attempt <= this.config.retriesPerProvider; attempt++) {
151
+ try {
152
+ if (attempt > 0) {
153
+ this.auditor.record({
154
+ timestamp: Date.now(),
155
+ type: 'retry',
156
+ provider: provider.id,
157
+ model: provider.modelOverride ?? provider.client.model,
158
+ metadata: { attempt, context },
159
+ });
160
+ }
161
+
162
+ const result = await fn(provider.client);
163
+ this.recordSuccess(provider.id);
164
+ return result;
165
+ } catch (error) {
166
+ lastError = error instanceof Error ? error : new Error(String(error));
167
+ this.auditor.record({
168
+ timestamp: Date.now(),
169
+ type: 'error',
170
+ provider: provider.id,
171
+ model: provider.modelOverride ?? provider.client.model,
172
+ error: lastError.message,
173
+ metadata: { attempt, context },
174
+ });
175
+ }
176
+ }
177
+
178
+ // All retries exhausted for this provider
179
+ this.recordFailure(provider.id);
180
+
181
+ // Try next provider (failover)
182
+ const nextProvider = this.getNextAvailableAfter(provider.id);
183
+ if (nextProvider) {
184
+ this.auditor.record({
185
+ timestamp: Date.now(),
186
+ type: 'failover',
187
+ provider: provider.id,
188
+ metadata: {
189
+ from: provider.id,
190
+ nextProvider: nextProvider.id,
191
+ context,
192
+ reason: lastError?.message,
193
+ },
194
+ });
195
+ }
196
+ }
197
+
198
+ throw lastError ?? new Error('All providers failed');
199
+ }
200
+
201
+ /**
202
+ * Execute a streaming function with failover.
203
+ * On failure, retries with the next provider from the beginning.
204
+ */
205
+ async *executeStream(
206
+ fn: (client: BaseLLMClient) => AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>,
207
+ context: string = 'stream',
208
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
209
+ const available = this.getAvailableProviders();
210
+
211
+ if (available.length === 0) {
212
+ throw new Error('No available LLM providers for streaming.');
213
+ }
214
+
215
+ let lastError: Error | undefined;
216
+
217
+ for (const provider of available) {
218
+ try {
219
+ const stream = fn(provider.client);
220
+ let returnValue: LLMChatResponse | void;
221
+
222
+ // We need to yield all values and capture the return
223
+ while (true) {
224
+ const result = await stream.next();
225
+ if (result.done) {
226
+ returnValue = result.value;
227
+ break;
228
+ }
229
+ yield result.value;
230
+ }
231
+
232
+ this.recordSuccess(provider.id);
233
+ return returnValue;
234
+ } catch (error) {
235
+ lastError = error instanceof Error ? error : new Error(String(error));
236
+ this.recordFailure(provider.id);
237
+
238
+ this.auditor.record({
239
+ timestamp: Date.now(),
240
+ type: 'failover',
241
+ provider: provider.id,
242
+ error: lastError.message,
243
+ metadata: { context },
244
+ });
245
+
246
+ // Continue to next provider
247
+ }
248
+ }
249
+
250
+ throw lastError ?? new Error('All providers failed for streaming');
251
+ }
252
+
253
+ // ========================================================================
254
+ // Convenience Methods
255
+ // ========================================================================
256
+
257
+ /**
258
+ * @deprecated No longer needed — structured output and tools can now be used together.
259
+ */
260
+ private validateOutputAndTools(_options?: ChatOptions): void {
261
+ // Structured output and tools are now allowed together.
262
+ }
263
+
264
+ /**
265
+ * Extract schema from output options.
266
+ * Returns a SchemaConfig or a bare jsonSchema object.
267
+ */
268
+ private getSchemaFromOutput<T>(output: OutputOptions<T>): { config: SchemaConfig<T>; name?: string; description?: string } | { jsonSchema: Record<string, unknown>; name?: string; description?: string } {
269
+ if (output.schema) {
270
+ return {
271
+ config: output.schema,
272
+ name: output.name,
273
+ description: output.description,
274
+ };
275
+ }
276
+ if (output.jsonSchema) {
277
+ return {
278
+ jsonSchema: output.jsonSchema as Record<string, unknown>,
279
+ name: output.name,
280
+ description: output.description,
281
+ };
282
+ }
283
+ throw new Error('output must have either schema or jsonSchema');
284
+ }
285
+
286
+ async chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse> {
287
+ // Validate that output and tools are not used together (VAL-API-005)
288
+ this.validateOutputAndTools(options);
289
+
290
+ // If output parameter is provided, use structured output flow (VAL-API-004)
291
+ if (options?.output) {
292
+ // Type assertion: we know output is defined at this point
293
+ return this.chatWithStructuredOutput(messages, options as ChatOptions & { output: OutputOptions });
294
+ }
295
+
296
+ return this.execute(
297
+ client => client.chat(messages, options),
298
+ 'chat',
299
+ );
300
+ }
301
+
302
+ /**
303
+ * Chat with structured output using the output parameter.
304
+ * Validates response against the schema and returns structured property.
305
+ */
306
+ private async chatWithStructuredOutput<T>(
307
+ messages: LLMChatMessage[],
308
+ options: ChatOptions & { output: OutputOptions<T> },
309
+ ): Promise<LLMChatResponse<T>> {
310
+ const { output } = options;
311
+ const schemaInfo = this.getSchemaFromOutput(output);
312
+ const schemaName = schemaInfo.name ?? 'response';
313
+
314
+ // Emit structured_request event
315
+ this.auditor.record({
316
+ timestamp: Date.now(),
317
+ type: 'structured_request',
318
+ provider: 'router',
319
+ schemaName,
320
+ });
321
+
322
+ // Build ChatOptions with schema for the provider
323
+ // Keep tools if provided — structured output and tools can work together
324
+ const { output: _, ...restOptions } = options;
325
+ const structuredOptions: ChatOptions = {
326
+ ...restOptions,
327
+ // Use jsonSchema for the provider
328
+ jsonSchema: 'config' in schemaInfo
329
+ ? getJsonSchemaFromConfig(schemaInfo.config)
330
+ : schemaInfo.jsonSchema,
331
+ schemaName: schemaInfo.name,
332
+ schemaDescription: schemaInfo.description,
333
+ };
334
+
335
+ const start = Date.now();
336
+
337
+ // Get response from provider
338
+ const response = await this.execute(
339
+ client => client.chat(messages, structuredOptions),
340
+ 'chatWithStructuredOutput',
341
+ );
342
+
343
+ // If the response contains tool calls, skip validation and return as-is
344
+ if (response.message.tool_calls && response.message.tool_calls.length > 0) {
345
+ return response as LLMChatResponse<T>;
346
+ }
347
+
348
+ // Extract text content from response
349
+ const content = typeof response.message.content === 'string'
350
+ ? response.message.content
351
+ : response.message.content
352
+ .filter((part): part is { type: 'text'; text: string } => part.type === 'text')
353
+ .map(part => part.text)
354
+ .join('');
355
+
356
+ // Get the SchemaConfig for validation
357
+ const schemaConfig: SchemaConfig<T> | null = 'config' in schemaInfo ? schemaInfo.config : null;
358
+
359
+ if (!schemaConfig || !schemaConfig.validate) {
360
+ // No validator — return parsed JSON without validation
361
+ try {
362
+ const structured = JSON.parse(content) as T;
363
+ // Emit structured_response event on success
364
+ this.auditor.record({
365
+ timestamp: Date.now(),
366
+ type: 'structured_response',
367
+ provider: response.provider ?? 'router',
368
+ model: response.message.role,
369
+ duration: Date.now() - start,
370
+ schemaName,
371
+ usage: response.usage,
372
+ });
373
+ return {
374
+ ...response,
375
+ structured,
376
+ };
377
+ } catch (error) {
378
+ // JSON parse failed
379
+ const rawOutput = content;
380
+ this.auditor.record({
381
+ timestamp: Date.now(),
382
+ type: 'structured_validation_error',
383
+ provider: response.provider ?? 'router',
384
+ schemaName,
385
+ error: error instanceof Error ? error.message : 'JSON parse failed',
386
+ rawOutput,
387
+ });
388
+ throw new StructuredOutputError(
389
+ `Failed to parse JSON: ${rawOutput}`,
390
+ { rawOutput: rawOutput, cause: error instanceof Error ? error : undefined },
391
+ );
392
+ }
393
+ }
394
+
395
+ // Parse and validate against SchemaConfig
396
+ try {
397
+ const validated = parseStructured(schemaConfig, content);
398
+ // Emit structured_response event on success
399
+ this.auditor.record({
400
+ timestamp: Date.now(),
401
+ type: 'structured_response',
402
+ provider: response.provider ?? 'router',
403
+ duration: Date.now() - start,
404
+ schemaName,
405
+ usage: response.usage,
406
+ });
407
+ return {
408
+ ...response,
409
+ structured: validated,
410
+ };
411
+ } catch (error) {
412
+ // Emit structured_validation_error event
413
+ const rawOutput = content;
414
+ this.auditor.record({
415
+ timestamp: Date.now(),
416
+ type: 'structured_validation_error',
417
+ provider: response.provider ?? 'router',
418
+ schemaName,
419
+ error: error instanceof Error ? error.message : 'Validation failed',
420
+ rawOutput,
421
+ });
422
+ throw error;
423
+ }
424
+ }
425
+
426
+ async chatWithTools(
427
+ messages: LLMChatMessage[],
428
+ options?: ChatOptions & { maxIterations?: number },
429
+ ): Promise<LLMChatResponse> {
430
+ return this.execute(
431
+ client => client.chatWithTools(messages, options),
432
+ 'chatWithTools',
433
+ );
434
+ }
435
+
436
+ async *chatStream(
437
+ messages: LLMChatMessage[],
438
+ options?: ChatOptions,
439
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
440
+ // Structured output via output parameter is not supported on streaming
441
+ // Use generateStructuredStream() instead
442
+ if (options?.output) {
443
+ throw new Error(
444
+ 'The "output" parameter is not supported with chatStream(). '
445
+ + 'Use generateStructuredStream() for streaming structured output.',
446
+ );
447
+ }
448
+
449
+ return yield* this.executeStream(
450
+ client => client.chatStream(messages, options),
451
+ 'chatStream',
452
+ );
453
+ }
454
+
455
+ async embed(text: string): Promise<number[]> {
456
+ return this.execute(
457
+ client => client.embed(text),
458
+ 'embed',
459
+ );
460
+ }
461
+
462
+ async embedArray(texts: string[]): Promise<number[][]> {
463
+ return this.execute(
464
+ client => client.embedArray(texts),
465
+ 'embedArray',
466
+ );
467
+ }
468
+
469
+ async getModels(): Promise<string[]> {
470
+ // Aggregate models from all providers
471
+ const allModels: string[] = [];
472
+ for (const provider of this.providers) {
473
+ try {
474
+ const models = await provider.client.getModels();
475
+ allModels.push(...models);
476
+ } catch {
477
+ // Skip unavailable providers
478
+ }
479
+ }
480
+ return [...new Set(allModels)];
481
+ }
482
+
483
+ async getModelInfo(): Promise<ModelMetadata> {
484
+ return this.execute(
485
+ client => client.getModelInfo(),
486
+ 'getModelInfo',
487
+ );
488
+ }
489
+
490
+ // ========================================================================
491
+ // Structured Output Methods
492
+ // ========================================================================
493
+
494
+ /**
495
+ * Generate structured output from the LLM with automatic failover.
496
+ * Validates the response against the provided SchemaConfig.
497
+ * Throws StructuredOutputError on validation failure.
498
+ *
499
+ * @template T The output type
500
+ * @param config Schema configuration (JSON Schema + optional validator)
501
+ * @param messages Chat messages to send
502
+ * @param options Additional options (temperature, maxTokens, etc.)
503
+ * @returns Validated structured output
504
+ * @throws StructuredOutputError if validation fails
505
+ */
506
+ async generateStructured<T>(
507
+ config: SchemaConfig<T>,
508
+ messages: LLMChatMessage[],
509
+ options?: ChatOptions,
510
+ ): Promise<T> {
511
+ // Get JSON Schema from config
512
+ const jsonSchema = getJsonSchemaFromConfig(config);
513
+ const schemaName = options?.schemaName ?? config.name ?? 'response';
514
+
515
+ // Emit structured_request event
516
+ this.auditor.record({
517
+ timestamp: Date.now(),
518
+ type: 'structured_request',
519
+ provider: 'router',
520
+ schemaName,
521
+ });
522
+
523
+ // Build ChatOptions with schema
524
+ const structuredOptions: ChatOptions = {
525
+ ...options,
526
+ jsonSchema,
527
+ };
528
+
529
+ const start = Date.now();
530
+
531
+ // Execute with failover
532
+ const response = await this.execute(
533
+ client => client.chat(messages, structuredOptions),
534
+ 'generateStructured',
535
+ );
536
+
537
+ // Parse and validate the response
538
+ const content = typeof response.message.content === 'string'
539
+ ? response.message.content
540
+ : response.message.content
541
+ .filter((part): part is { type: 'text'; text: string } => part.type === 'text')
542
+ .map(part => part.text)
543
+ .join('');
544
+
545
+ try {
546
+ const result = parseStructured(config, content);
547
+ // Emit structured_response event on success
548
+ this.auditor.record({
549
+ timestamp: Date.now(),
550
+ type: 'structured_response',
551
+ provider: response.provider ?? 'router',
552
+ duration: Date.now() - start,
553
+ schemaName,
554
+ usage: response.usage,
555
+ });
556
+ return result;
557
+ } catch (error) {
558
+ // Emit structured_validation_error event
559
+ this.auditor.record({
560
+ timestamp: Date.now(),
561
+ type: 'structured_validation_error',
562
+ provider: response.provider ?? 'router',
563
+ schemaName,
564
+ error: error instanceof Error ? error.message : 'Validation failed',
565
+ rawOutput: content,
566
+ });
567
+ throw error;
568
+ }
569
+ }
570
+
571
+ /**
572
+ * Try to generate structured output, returning a result object instead of throwing.
573
+ *
574
+ * @template T The output type
575
+ * @param config Schema configuration (JSON Schema + optional validator)
576
+ * @param messages Chat messages to send
577
+ * @param options Additional options (temperature, maxTokens, etc.)
578
+ * @returns StructuredOutputResult<T>
579
+ */
580
+ async tryParseStructured<T>(
581
+ config: SchemaConfig<T>,
582
+ messages: LLMChatMessage[],
583
+ options?: ChatOptions,
584
+ ): Promise<StructuredOutputResult<T>> {
585
+ try {
586
+ const value = await this.generateStructured(config, messages, options);
587
+ return { ok: true, value };
588
+ } catch (error) {
589
+ // If error is already a StructuredOutputError, use it directly
590
+ if (error instanceof Error && 'rawOutput' in error) {
591
+ return {
592
+ ok: false,
593
+ error: error as unknown as import('./structured-output.js').StructuredOutputError,
594
+ rawOutput: (error as unknown as { rawOutput: string }).rawOutput,
595
+ };
596
+ }
597
+
598
+ // Unexpected error - re-throw
599
+ throw error;
600
+ }
601
+ }
602
+
603
+ /**
604
+ * Stream structured output with partial validated objects.
605
+ *
606
+ * @template T The output type
607
+ * @param config Schema configuration (JSON Schema + optional validator)
608
+ * @param messages Chat messages to send
609
+ * @param options Additional options (temperature, maxTokens, etc.)
610
+ * @yields Partial validated objects as the JSON stream progresses
611
+ * @returns Complete validated object on stream completion
612
+ * @throws StructuredOutputError if final validation fails
613
+ */
614
+ async *generateStructuredStream<T>(
615
+ config: SchemaConfig<T>,
616
+ messages: LLMChatMessage[],
617
+ options?: ChatOptions,
618
+ ): AsyncGenerator<T, T, unknown> {
619
+ // Get JSON Schema from config
620
+ const jsonSchema = getJsonSchemaFromConfig(config);
621
+ const schemaName = options?.schemaName ?? config.name ?? 'response';
622
+
623
+ // Emit structured_request event
624
+ this.auditor.record({
625
+ timestamp: Date.now(),
626
+ type: 'structured_request',
627
+ provider: 'router',
628
+ schemaName,
629
+ });
630
+
631
+ // Build ChatOptions with schema
632
+ const structuredOptions: ChatOptions = {
633
+ ...options,
634
+ jsonSchema,
635
+ };
636
+
637
+ const start = Date.now();
638
+
639
+ // Stream with failover
640
+ const stream = this.executeStream(
641
+ client => client.chatStream(messages, structuredOptions),
642
+ 'generateStructuredStream',
643
+ );
644
+
645
+ // Accumulate text and yield partial validated objects
646
+ const parser = new StreamingJsonParser<T>(config);
647
+ let fullContent = '';
648
+ let lastYielded: T | undefined;
649
+
650
+ try {
651
+ for await (const event of stream) {
652
+ // Only process text events
653
+ if (event.type !== 'text') continue;
654
+
655
+ fullContent += event.content;
656
+
657
+ // Try to parse partial JSON
658
+ const result = parser.feed(event.content);
659
+
660
+ // Yield if we got a valid partial and it's different from last
661
+ if (result.partial !== undefined) {
662
+ // Only yield if different from last (avoid duplicate yields)
663
+ if (lastYielded === undefined || JSON.stringify(result.partial) !== JSON.stringify(lastYielded)) {
664
+ lastYielded = result.partial;
665
+ yield result.partial;
666
+ }
667
+ }
668
+ }
669
+
670
+ // Parse and validate the complete content
671
+ const complete = parseStructured(config, fullContent);
672
+
673
+ // Emit structured_response event on success
674
+ this.auditor.record({
675
+ timestamp: Date.now(),
676
+ type: 'structured_response',
677
+ provider: 'router',
678
+ schemaName,
679
+ duration: Date.now() - start,
680
+ });
681
+
682
+ // Return the complete validated object
683
+ return complete;
684
+ } catch (error) {
685
+ // Emit structured_validation_error event
686
+ this.auditor.record({
687
+ timestamp: Date.now(),
688
+ type: 'structured_validation_error',
689
+ provider: 'router',
690
+ schemaName,
691
+ error: error instanceof Error ? error.message : 'Validation failed',
692
+ rawOutput: fullContent,
693
+ });
694
+ throw error;
695
+ }
696
+ }
697
+
698
+ // ========================================================================
699
+ // Tool Registration (broadcast to all providers)
700
+ // ========================================================================
701
+
702
+ registerTool(
703
+ name: string,
704
+ description: string,
705
+ parameters: import('./interfaces.js').LLMFunction['parameters'],
706
+ handler: import('./interfaces.js').ToolHandler,
707
+ ): void {
708
+ for (const provider of this.providers) {
709
+ provider.client.registerTool(name, description, parameters, handler);
710
+ }
711
+ }
712
+
713
+ registerTools(
714
+ tools: Array<{
715
+ name: string;
716
+ description: string;
717
+ parameters: import('./interfaces.js').LLMFunction['parameters'];
718
+ handler: import('./interfaces.js').ToolHandler;
719
+ }>,
720
+ ): void {
721
+ for (const provider of this.providers) {
722
+ provider.client.registerTools(tools);
723
+ }
724
+ }
725
+
726
+ // ========================================================================
727
+ // Health Management
728
+ // ========================================================================
729
+
730
+ private isAvailable(id: string): boolean {
731
+ const h = this.health.get(id);
732
+ if (!h) return false;
733
+ if (h.healthy) return true;
734
+ // Check if cooldown has expired
735
+ if (h.cooldownUntil && Date.now() >= h.cooldownUntil) {
736
+ // Reset for re-testing
737
+ h.healthy = true;
738
+ h.consecutiveFailures = 0;
739
+ h.cooldownUntil = undefined;
740
+ return true;
741
+ }
742
+ return false;
743
+ }
744
+
745
+ private getAvailableProviders(): ProviderEntry[] {
746
+ return this.providers.filter(p => this.isAvailable(p.id));
747
+ }
748
+
749
+ private getNextAvailableAfter(currentId: string): ProviderEntry | undefined {
750
+ const idx = this.providers.findIndex(p => p.id === currentId);
751
+ for (let i = idx + 1; i < this.providers.length; i++) {
752
+ if (this.isAvailable(this.providers[i]!.id)) {
753
+ return this.providers[i];
754
+ }
755
+ }
756
+ return undefined;
757
+ }
758
+
759
+ private recordSuccess(id: string): void {
760
+ const h = this.health.get(id);
761
+ if (h) {
762
+ h.healthy = true;
763
+ h.consecutiveFailures = 0;
764
+ h.cooldownUntil = undefined;
765
+ }
766
+ }
767
+
768
+ private recordFailure(id: string): void {
769
+ const h = this.health.get(id);
770
+ if (!h) return;
771
+
772
+ h.consecutiveFailures++;
773
+ h.lastFailure = Date.now();
774
+
775
+ if (h.consecutiveFailures >= this.config.maxFailures) {
776
+ h.healthy = false;
777
+ h.cooldownUntil = Date.now() + this.config.cooldownMs;
778
+ }
779
+ }
780
+ }