universal-llm-client 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +142 -103
  2. package/LICENSE +21 -21
  3. package/README.md +640 -591
  4. package/dist/ai-model.d.ts +12 -1
  5. package/dist/ai-model.d.ts.map +1 -1
  6. package/dist/ai-model.js +36 -1
  7. package/dist/ai-model.js.map +1 -1
  8. package/dist/gemma-channel.d.ts +14 -0
  9. package/dist/gemma-channel.d.ts.map +1 -0
  10. package/dist/gemma-channel.js +38 -0
  11. package/dist/gemma-channel.js.map +1 -0
  12. package/dist/gemma-diffusion.d.ts +49 -0
  13. package/dist/gemma-diffusion.d.ts.map +1 -0
  14. package/dist/gemma-diffusion.js +147 -0
  15. package/dist/gemma-diffusion.js.map +1 -0
  16. package/dist/http.d.ts +4 -0
  17. package/dist/http.d.ts.map +1 -1
  18. package/dist/http.js +14 -1
  19. package/dist/http.js.map +1 -1
  20. package/dist/index.d.ts +2 -1
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +4 -0
  23. package/dist/index.js.map +1 -1
  24. package/dist/interfaces.d.ts +183 -7
  25. package/dist/interfaces.d.ts.map +1 -1
  26. package/dist/interfaces.js.map +1 -1
  27. package/dist/providers/anthropic.d.ts.map +1 -1
  28. package/dist/providers/anthropic.js +28 -3
  29. package/dist/providers/anthropic.js.map +1 -1
  30. package/dist/providers/google.d.ts +22 -1
  31. package/dist/providers/google.d.ts.map +1 -1
  32. package/dist/providers/google.js +225 -13
  33. package/dist/providers/google.js.map +1 -1
  34. package/dist/providers/ollama.d.ts +2 -0
  35. package/dist/providers/ollama.d.ts.map +1 -1
  36. package/dist/providers/ollama.js +59 -30
  37. package/dist/providers/ollama.js.map +1 -1
  38. package/dist/providers/openai.d.ts +14 -0
  39. package/dist/providers/openai.d.ts.map +1 -1
  40. package/dist/providers/openai.js +200 -22
  41. package/dist/providers/openai.js.map +1 -1
  42. package/dist/router.d.ts +2 -0
  43. package/dist/router.d.ts.map +1 -1
  44. package/dist/router.js +4 -0
  45. package/dist/router.js.map +1 -1
  46. package/dist/stream-decoder.d.ts +12 -0
  47. package/dist/stream-decoder.d.ts.map +1 -1
  48. package/dist/stream-decoder.js +182 -5
  49. package/dist/stream-decoder.js.map +1 -1
  50. package/dist/thinking.d.ts +36 -0
  51. package/dist/thinking.d.ts.map +1 -0
  52. package/dist/thinking.js +52 -0
  53. package/dist/thinking.js.map +1 -0
  54. package/package.json +118 -116
  55. package/src/ai-model.ts +400 -350
  56. package/src/auditor.ts +213 -213
  57. package/src/client.ts +402 -402
  58. package/src/debug/debug-google-streaming.ts +1 -1
  59. package/src/demos/basic/universal-llm-examples.ts +3 -3
  60. package/src/demos/diffusion-gemma/.env +29 -0
  61. package/src/demos/diffusion-gemma/.env.example +27 -0
  62. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  63. package/src/demos/diffusion-gemma/README.md +59 -0
  64. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  65. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  66. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  67. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  68. package/src/demos/diffusion-gemma/server.ts +1205 -0
  69. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  70. package/src/gemma-channel.ts +47 -0
  71. package/src/gemma-diffusion.ts +167 -0
  72. package/src/http.ts +261 -247
  73. package/src/index.ts +180 -161
  74. package/src/interfaces.ts +843 -657
  75. package/src/mcp.ts +345 -345
  76. package/src/providers/anthropic.ts +796 -762
  77. package/src/providers/google.ts +840 -620
  78. package/src/providers/index.ts +8 -8
  79. package/src/providers/ollama.ts +503 -469
  80. package/src/providers/openai.ts +587 -392
  81. package/src/router.ts +785 -780
  82. package/src/stream-decoder.ts +535 -361
  83. package/src/structured-output.ts +759 -759
  84. package/src/test-scripts/test-google-deep-research.ts +33 -0
  85. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
  86. package/src/test-scripts/test-google-streaming.ts +1 -1
  87. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
  88. package/src/test-scripts/test-google-thinking.ts +46 -0
  89. package/src/test-scripts/test-system-message-positions.ts +163 -163
  90. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
  91. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  92. package/src/tests/ai-model.test.ts +1614 -1614
  93. package/src/tests/auditor.test.ts +224 -224
  94. package/src/tests/gemma-diffusion.test.ts +115 -0
  95. package/src/tests/http.test.ts +200 -200
  96. package/src/tests/interfaces.test.ts +117 -117
  97. package/src/tests/providers/anthropic.test.ts +118 -0
  98. package/src/tests/providers/google.test.ts +841 -660
  99. package/src/tests/providers/ollama.test.ts +1034 -954
  100. package/src/tests/providers/openai.test.ts +1511 -1122
  101. package/src/tests/router.test.ts +254 -254
  102. package/src/tests/stream-decoder.test.ts +263 -179
  103. package/src/tests/structured-output.test.ts +1450 -1450
  104. package/src/tests/thinking.test.ts +65 -0
  105. package/src/tests/tools.test.ts +175 -175
  106. package/src/thinking.ts +73 -0
  107. package/src/tools.ts +246 -246
  108. package/src/zod-adapter.ts +72 -72
package/src/ai-model.ts CHANGED
@@ -1,350 +1,400 @@
1
- /**
2
- * Universal LLM Client v3 — AIModel (The Universal Client)
3
- *
4
- * The only public-facing class. Developers configure one model with
5
- * multiple provider backends for transparent failover.
6
- *
7
- * Provider classes are internal — the user never imports them.
8
- */
9
-
10
- import {
11
- AIModelApiType,
12
- type AIModelConfig,
13
- type ProviderConfig,
14
- type LLMClientOptions,
15
- type LLMChatMessage,
16
- type LLMChatResponse,
17
- type ChatOptions,
18
- type ModelMetadata,
19
- type LLMFunction,
20
- type ToolHandler,
21
- } from './interfaces.js';
22
- import type { DecodedEvent } from './stream-decoder.js';
23
- import { Router, type RouterConfig, type ProviderStatus } from './router.js';
24
- import type { Auditor } from './auditor.js';
25
- import { NoopAuditor } from './auditor.js';
26
- import { OllamaClient } from './providers/ollama.js';
27
- import { OpenAICompatibleClient } from './providers/openai.js';
28
- import { GoogleClient } from './providers/google.js';
29
- import { AnthropicClient } from './providers/anthropic.js';
30
- import { BaseLLMClient } from './client.js';
31
- import {
32
- type StructuredOutputResult,
33
- type SchemaConfig,
34
- } from './structured-output.js';
35
-
36
- // ============================================================================
37
- // Default Provider URLs
38
- // ============================================================================
39
-
40
- const DEFAULT_URLS: Record<string, string> = {
41
- ollama: 'http://localhost:11434',
42
- openai: 'https://api.openai.com',
43
- llamacpp: 'http://localhost:8080',
44
- anthropic: 'https://api.anthropic.com',
45
- // google and vertex build their own URLs internally
46
- };
47
-
48
- // ============================================================================
49
- // AIModel — The Universal Client
50
- // ============================================================================
51
-
52
- export class AIModel {
53
- private router: Router;
54
- private auditor: Auditor;
55
- private config: AIModelConfig;
56
-
57
- constructor(config: AIModelConfig) {
58
- this.config = config;
59
- this.auditor = config.auditor ?? new NoopAuditor();
60
-
61
- const routerConfig: RouterConfig = {
62
- retriesPerProvider: config.retries ?? 2,
63
- auditor: this.auditor,
64
- };
65
- this.router = new Router(routerConfig);
66
-
67
- // Initialize providers in order
68
- for (let i = 0; i < config.providers.length; i++) {
69
- const providerConfig = config.providers[i]!;
70
- const client = this.createClient(providerConfig);
71
- const id = `${this.normalizeType(providerConfig.type)}-${i}`;
72
-
73
- this.router.addProvider({
74
- id,
75
- client,
76
- priority: providerConfig.priority ?? i,
77
- modelOverride: providerConfig.model,
78
- });
79
- }
80
- }
81
-
82
- // ========================================================================
83
- // Chat
84
- // ========================================================================
85
-
86
- /** Send a chat request with automatic failover across providers */
87
- async chat(
88
- messages: LLMChatMessage[],
89
- options?: ChatOptions,
90
- ): Promise<LLMChatResponse> {
91
- return this.router.chat(messages, options);
92
- }
93
-
94
- /** Chat with automatic tool execution (multi-turn loop) */
95
- async chatWithTools(
96
- messages: LLMChatMessage[],
97
- options?: ChatOptions & { maxIterations?: number },
98
- ): Promise<LLMChatResponse> {
99
- return this.router.chatWithTools(messages, options);
100
- }
101
-
102
- /** Stream chat response with pluggable decoder strategy */
103
- async *chatStream(
104
- messages: LLMChatMessage[],
105
- options?: ChatOptions,
106
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
107
- return yield* this.router.chatStream(messages, options);
108
- }
109
-
110
- // ========================================================================
111
- // Structured Output
112
- // ========================================================================
113
-
114
- /**
115
- * Generate structured output from the LLM with automatic failover.
116
- * Validates the response against the provided Zod schema.
117
- * Throws StructuredOutputError on validation failure.
118
- *
119
- * @template T The output type
120
- * @param config Schema configuration (JSON Schema + optional validator)
121
- * @param messages Chat messages to send
122
- * @param options Additional options (temperature, maxTokens, etc.)
123
- * @returns Promise resolving to validated structured output
124
- * @throws StructuredOutputError if JSON parsing fails or validation fails
125
- *
126
- * @example
127
- * ```typescript
128
- * import { fromZod } from 'universal-llm-client/zod';
129
- * const UserConfig = fromZod(z.object({
130
- * name: z.string(),
131
- * age: z.number(),
132
- * }));
133
- *
134
- * const user = await model.generateStructured(UserConfig, [
135
- * { role: 'user', content: 'Generate a user profile' },
136
- * ]);
137
- * // user.name: string, user.age: number
138
- * ```
139
- */
140
- async generateStructured<T>(
141
- config: SchemaConfig<T>,
142
- messages: LLMChatMessage[],
143
- options?: ChatOptions,
144
- ): Promise<T> {
145
- return this.router.generateStructured(config, messages, options);
146
- }
147
-
148
- /**
149
- * Try to generate structured output, returning a result object instead of throwing.
150
- * Same as generateStructured but returns { ok: true, value } on success
151
- * and { ok: false, error, rawOutput } on failure.
152
- *
153
- * @template T The output type
154
- * @param config Schema configuration (JSON Schema + optional validator)
155
- * @param messages Chat messages to send
156
- * @param options Additional options (temperature, maxTokens, etc.)
157
- * @returns StructuredOutputResult<T> - either success with value or failure with error
158
- *
159
- * @example
160
- * ```typescript
161
- * const result = await model.tryParseStructured(config, messages);
162
- *
163
- * if (result.ok) {
164
- * console.log('User:', result.value.name);
165
- * } else {
166
- * console.log('Error:', result.error.message);
167
- * console.log('Raw output:', result.rawOutput);
168
- * }
169
- * ```
170
- */
171
- async tryParseStructured<T>(
172
- config: SchemaConfig<T>,
173
- messages: LLMChatMessage[],
174
- options?: ChatOptions,
175
- ): Promise<StructuredOutputResult<T>> {
176
- return this.router.tryParseStructured(config, messages, options);
177
- }
178
-
179
- /**
180
- * Stream structured output with partial validated objects.
181
- *
182
- * Yields partial validated objects as JSON generates, then returns the
183
- * complete validated object on stream completion.
184
- *
185
- * For invalid partial JSON, no yield occurs (partial validation is best-effort).
186
- * On stream completion, if the final JSON fails validation, throws StructuredOutputError.
187
- *
188
- * @template T The output type
189
- * @param config Schema configuration (JSON Schema + optional validator)
190
- * @param messages Chat messages to send
191
- * @param options Additional options (temperature, maxTokens, etc.)
192
- * @yields Partial validated objects as the JSON stream progresses
193
- * @returns Complete validated object on stream completion
194
- * @throws StructuredOutputError if final validation fails
195
- *
196
- * @example
197
- * ```typescript
198
- * import { fromZod } from 'universal-llm-client/zod';
199
- * const UserConfig = fromZod(z.object({
200
- * name: z.string(),
201
- * age: z.number(),
202
- * }));
203
- *
204
- * const stream = model.generateStructuredStream(UserConfig, [
205
- * { role: 'user', content: 'Generate a user' },
206
- * ]);
207
- *
208
- * for await (const partial of stream) {
209
- * console.log('Partial user:', partial);
210
- * }
211
- * ```
212
- */
213
- async *generateStructuredStream<T>(
214
- config: SchemaConfig<T>,
215
- messages: LLMChatMessage[],
216
- options?: ChatOptions,
217
- ): AsyncGenerator<T, T, unknown> {
218
- return yield* this.router.generateStructuredStream(config, messages, options);
219
- }
220
-
221
- // ========================================================================
222
- // Embeddings
223
- // ========================================================================
224
-
225
- /** Generate embedding for a single text */
226
- async embed(text: string): Promise<number[]> {
227
- return this.router.embed(text);
228
- }
229
-
230
- /** Generate embeddings for multiple texts */
231
- async embedArray(texts: string[]): Promise<number[][]> {
232
- return this.router.embedArray(texts);
233
- }
234
-
235
- // ========================================================================
236
- // Tool Registration
237
- // ========================================================================
238
-
239
- /** Register a tool callable by the LLM (broadcast to all providers) */
240
- registerTool(
241
- name: string,
242
- description: string,
243
- parameters: LLMFunction['parameters'],
244
- handler: ToolHandler,
245
- ): void {
246
- this.router.registerTool(name, description, parameters, handler);
247
- }
248
-
249
- /** Register multiple tools at once */
250
- registerTools(
251
- tools: Array<{
252
- name: string;
253
- description: string;
254
- parameters: LLMFunction['parameters'];
255
- handler: ToolHandler;
256
- }>,
257
- ): void {
258
- this.router.registerTools(tools);
259
- }
260
-
261
- // ========================================================================
262
- // Model Management
263
- // ========================================================================
264
-
265
- /** Get available models from all configured providers */
266
- async getModels(): Promise<string[]> {
267
- return this.router.getModels();
268
- }
269
-
270
- /** Get metadata about the current model (context length, capabilities) */
271
- async getModelInfo(): Promise<ModelMetadata> {
272
- return this.router.getModelInfo();
273
- }
274
-
275
- /** Switch model at runtime (updates all providers) */
276
- setModel(name: string): void {
277
- this.config.model = name;
278
- // The model name change will be picked up by the providers
279
- // through the router on next request
280
- }
281
-
282
- /** Get the current model name */
283
- get model(): string {
284
- return this.config.model;
285
- }
286
-
287
- // ========================================================================
288
- // Provider Status
289
- // ========================================================================
290
-
291
- /** Get health/status of all configured providers */
292
- getProviderStatus(): ProviderStatus[] {
293
- return this.router.getStatus();
294
- }
295
-
296
- // ========================================================================
297
- // Lifecycle
298
- // ========================================================================
299
-
300
- /** Clean shutdown — flush auditor, disconnect MCP, etc. */
301
- async dispose(): Promise<void> {
302
- await this.auditor.flush?.();
303
- }
304
-
305
- // ========================================================================
306
- // Internal: Provider Factory
307
- // ========================================================================
308
-
309
- private createClient(providerConfig: ProviderConfig): BaseLLMClient {
310
- const type = this.normalizeType(providerConfig.type);
311
- const modelName = providerConfig.model ?? this.config.model;
312
-
313
- const clientOptions: LLMClientOptions = {
314
- model: modelName,
315
- url: providerConfig.url ?? DEFAULT_URLS[type] ?? '',
316
- apiType: type as AIModelApiType,
317
- apiKey: providerConfig.apiKey,
318
- timeout: this.config.timeout ?? 30000,
319
- retries: this.config.retries ?? 2,
320
- debug: this.config.debug ?? false,
321
- defaultParameters: this.config.defaultParameters,
322
- thinking: this.config.thinking ?? false,
323
- region: providerConfig.region,
324
- apiVersion: providerConfig.apiVersion,
325
- };
326
-
327
- switch (type) {
328
- case 'ollama':
329
- return new OllamaClient(clientOptions, this.auditor);
330
-
331
- case 'openai':
332
- case 'llamacpp':
333
- return new OpenAICompatibleClient(clientOptions, this.auditor);
334
-
335
- case 'google':
336
- case 'vertex':
337
- return new GoogleClient(clientOptions, this.auditor);
338
-
339
- case 'anthropic':
340
- return new AnthropicClient(clientOptions, this.auditor);
341
-
342
- default:
343
- throw new Error(`Unknown provider type: ${type}`);
344
- }
345
- }
346
-
347
- private normalizeType(type: string): string {
348
- return type.toLowerCase();
349
- }
350
- }
1
+ /**
2
+ * Universal LLM Client v3 — AIModel (The Universal Client)
3
+ *
4
+ * The only public-facing class. Developers configure one model with
5
+ * multiple provider backends for transparent failover.
6
+ *
7
+ * Provider classes are internal — the user never imports them.
8
+ */
9
+
10
+ import {
11
+ AIModelApiType,
12
+ type AIModelConfig,
13
+ type ProviderConfig,
14
+ type LLMClientOptions,
15
+ type LLMChatMessage,
16
+ type LLMChatResponse,
17
+ type ChatOptions,
18
+ type ModelMetadata,
19
+ type LLMFunction,
20
+ type ToolHandler,
21
+ type DeepResearchOptions,
22
+ type DeepResearchResult,
23
+ type DeepResearchEvent,
24
+ } from './interfaces.js';
25
+ import type { DecodedEvent } from './stream-decoder.js';
26
+ import { Router, type RouterConfig, type ProviderStatus } from './router.js';
27
+ import type { Auditor } from './auditor.js';
28
+ import { NoopAuditor } from './auditor.js';
29
+ import { OllamaClient } from './providers/ollama.js';
30
+ import { OpenAICompatibleClient } from './providers/openai.js';
31
+ import { GoogleClient } from './providers/google.js';
32
+ import { AnthropicClient } from './providers/anthropic.js';
33
+ import { BaseLLMClient } from './client.js';
34
+ import {
35
+ type StructuredOutputResult,
36
+ type SchemaConfig,
37
+ } from './structured-output.js';
38
+
39
+ // ============================================================================
40
+ // Default Provider URLs
41
+ // ============================================================================
42
+
43
+ const DEFAULT_URLS: Record<string, string> = {
44
+ ollama: 'http://localhost:11434',
45
+ openai: 'https://api.openai.com',
46
+ llamacpp: 'http://localhost:8080',
47
+ anthropic: 'https://api.anthropic.com',
48
+ // google and vertex build their own URLs internally
49
+ };
50
+
51
+ // ============================================================================
52
+ // AIModel — The Universal Client
53
+ // ============================================================================
54
+
55
+ export class AIModel {
56
+ private router: Router;
57
+ private auditor: Auditor;
58
+ private config: AIModelConfig;
59
+
60
+ constructor(config: AIModelConfig) {
61
+ this.config = config;
62
+ this.auditor = config.auditor ?? new NoopAuditor();
63
+
64
+ const routerConfig: RouterConfig = {
65
+ retriesPerProvider: config.retries ?? 2,
66
+ auditor: this.auditor,
67
+ };
68
+ this.router = new Router(routerConfig);
69
+
70
+ // Initialize providers in order
71
+ for (let i = 0; i < config.providers.length; i++) {
72
+ const providerConfig = config.providers[i]!;
73
+ const client = this.createClient(providerConfig);
74
+ const id = `${this.normalizeType(providerConfig.type)}-${i}`;
75
+
76
+ this.router.addProvider({
77
+ id,
78
+ client,
79
+ priority: providerConfig.priority ?? i,
80
+ modelOverride: providerConfig.model,
81
+ });
82
+ }
83
+ }
84
+
85
+ // ========================================================================
86
+ // Chat
87
+ // ========================================================================
88
+
89
+ /** Send a chat request with automatic failover across providers */
90
+ async chat(
91
+ messages: LLMChatMessage[],
92
+ options?: ChatOptions,
93
+ ): Promise<LLMChatResponse> {
94
+ return this.router.chat(messages, options);
95
+ }
96
+
97
+ /** Chat with automatic tool execution (multi-turn loop) */
98
+ async chatWithTools(
99
+ messages: LLMChatMessage[],
100
+ options?: ChatOptions & { maxIterations?: number },
101
+ ): Promise<LLMChatResponse> {
102
+ return this.router.chatWithTools(messages, options);
103
+ }
104
+
105
+ /** Stream chat response with pluggable decoder strategy */
106
+ async *chatStream(
107
+ messages: LLMChatMessage[],
108
+ options?: ChatOptions,
109
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
110
+ return yield* this.router.chatStream(messages, options);
111
+ }
112
+
113
+ // ========================================================================
114
+ // Structured Output
115
+ // ========================================================================
116
+
117
+ /**
118
+ * Generate structured output from the LLM with automatic failover.
119
+ * Validates the response against the provided Zod schema.
120
+ * Throws StructuredOutputError on validation failure.
121
+ *
122
+ * @template T The output type
123
+ * @param config Schema configuration (JSON Schema + optional validator)
124
+ * @param messages Chat messages to send
125
+ * @param options Additional options (temperature, maxTokens, etc.)
126
+ * @returns Promise resolving to validated structured output
127
+ * @throws StructuredOutputError if JSON parsing fails or validation fails
128
+ *
129
+ * @example
130
+ * ```typescript
131
+ * import { fromZod } from 'universal-llm-client/zod';
132
+ * const UserConfig = fromZod(z.object({
133
+ * name: z.string(),
134
+ * age: z.number(),
135
+ * }));
136
+ *
137
+ * const user = await model.generateStructured(UserConfig, [
138
+ * { role: 'user', content: 'Generate a user profile' },
139
+ * ]);
140
+ * // user.name: string, user.age: number
141
+ * ```
142
+ */
143
+ async generateStructured<T>(
144
+ config: SchemaConfig<T>,
145
+ messages: LLMChatMessage[],
146
+ options?: ChatOptions,
147
+ ): Promise<T> {
148
+ return this.router.generateStructured(config, messages, options);
149
+ }
150
+
151
+ /**
152
+ * Try to generate structured output, returning a result object instead of throwing.
153
+ * Same as generateStructured but returns { ok: true, value } on success
154
+ * and { ok: false, error, rawOutput } on failure.
155
+ *
156
+ * @template T The output type
157
+ * @param config Schema configuration (JSON Schema + optional validator)
158
+ * @param messages Chat messages to send
159
+ * @param options Additional options (temperature, maxTokens, etc.)
160
+ * @returns StructuredOutputResult<T> - either success with value or failure with error
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const result = await model.tryParseStructured(config, messages);
165
+ *
166
+ * if (result.ok) {
167
+ * console.log('User:', result.value.name);
168
+ * } else {
169
+ * console.log('Error:', result.error.message);
170
+ * console.log('Raw output:', result.rawOutput);
171
+ * }
172
+ * ```
173
+ */
174
+ async tryParseStructured<T>(
175
+ config: SchemaConfig<T>,
176
+ messages: LLMChatMessage[],
177
+ options?: ChatOptions,
178
+ ): Promise<StructuredOutputResult<T>> {
179
+ return this.router.tryParseStructured(config, messages, options);
180
+ }
181
+
182
+ /**
183
+ * Stream structured output with partial validated objects.
184
+ *
185
+ * Yields partial validated objects as JSON generates, then returns the
186
+ * complete validated object on stream completion.
187
+ *
188
+ * For invalid partial JSON, no yield occurs (partial validation is best-effort).
189
+ * On stream completion, if the final JSON fails validation, throws StructuredOutputError.
190
+ *
191
+ * @template T The output type
192
+ * @param config Schema configuration (JSON Schema + optional validator)
193
+ * @param messages Chat messages to send
194
+ * @param options Additional options (temperature, maxTokens, etc.)
195
+ * @yields Partial validated objects as the JSON stream progresses
196
+ * @returns Complete validated object on stream completion
197
+ * @throws StructuredOutputError if final validation fails
198
+ *
199
+ * @example
200
+ * ```typescript
201
+ * import { fromZod } from 'universal-llm-client/zod';
202
+ * const UserConfig = fromZod(z.object({
203
+ * name: z.string(),
204
+ * age: z.number(),
205
+ * }));
206
+ *
207
+ * const stream = model.generateStructuredStream(UserConfig, [
208
+ * { role: 'user', content: 'Generate a user' },
209
+ * ]);
210
+ *
211
+ * for await (const partial of stream) {
212
+ * console.log('Partial user:', partial);
213
+ * }
214
+ * ```
215
+ */
216
+ async *generateStructuredStream<T>(
217
+ config: SchemaConfig<T>,
218
+ messages: LLMChatMessage[],
219
+ options?: ChatOptions,
220
+ ): AsyncGenerator<T, T, unknown> {
221
+ return yield* this.router.generateStructuredStream(config, messages, options);
222
+ }
223
+
224
+ // ========================================================================
225
+ // Embeddings
226
+ // ========================================================================
227
+
228
+ /** Generate embedding for a single text */
229
+ async embed(text: string): Promise<number[]> {
230
+ return this.router.embed(text);
231
+ }
232
+
233
+ /** Generate embeddings for multiple texts */
234
+ async embedArray(texts: string[]): Promise<number[][]> {
235
+ return this.router.embedArray(texts);
236
+ }
237
+
238
+ // ========================================================================
239
+ // Deep Research (Gemini-only)
240
+ // ========================================================================
241
+
242
+ private getGoogleClient(method: string): GoogleClient {
243
+ const googleClients = this.router.getClients().filter(
244
+ (c): c is GoogleClient => c instanceof GoogleClient,
245
+ );
246
+ // Prefer an AI Studio client — Vertex AI doesn't support Deep Research.
247
+ const aiStudio = googleClients.find(c => c.supportsDeepResearch());
248
+ if (aiStudio) return aiStudio;
249
+ if (googleClients.length > 0) {
250
+ throw new Error(
251
+ `${method} requires an AI Studio Google provider (type: "google"); Vertex AI is not supported for Deep Research.`,
252
+ );
253
+ }
254
+ throw new Error(
255
+ `${method} requires a Google provider (type: "google"). None is configured.`,
256
+ );
257
+ }
258
+
259
+ /**
260
+ * Run an agentic Deep Research interaction (Gemini only): creates it and
261
+ * polls until completion. Throws if no Google provider is configured.
262
+ */
263
+ async deepResearch(input: string, options?: DeepResearchOptions): Promise<DeepResearchResult> {
264
+ return this.getGoogleClient('deepResearch').deepResearch(input, options);
265
+ }
266
+
267
+ /**
268
+ * Stream a Deep Research interaction's intermediate thought/text/step events
269
+ * (Gemini only), returning the final result. Throws if no Google provider.
270
+ */
271
+ async *deepResearchStream(
272
+ input: string,
273
+ options?: DeepResearchOptions,
274
+ ): AsyncGenerator<DeepResearchEvent, DeepResearchResult, unknown> {
275
+ return yield* this.getGoogleClient('deepResearchStream').deepResearchStream(input, options);
276
+ }
277
+
278
+ // ========================================================================
279
+ // Tool Registration
280
+ // ========================================================================
281
+
282
+ /** Register a tool callable by the LLM (broadcast to all providers) */
283
+ registerTool(
284
+ name: string,
285
+ description: string,
286
+ parameters: LLMFunction['parameters'],
287
+ handler: ToolHandler,
288
+ ): void {
289
+ this.router.registerTool(name, description, parameters, handler);
290
+ }
291
+
292
+ /** Register multiple tools at once */
293
+ registerTools(
294
+ tools: Array<{
295
+ name: string;
296
+ description: string;
297
+ parameters: LLMFunction['parameters'];
298
+ handler: ToolHandler;
299
+ }>,
300
+ ): void {
301
+ this.router.registerTools(tools);
302
+ }
303
+
304
+ // ========================================================================
305
+ // Model Management
306
+ // ========================================================================
307
+
308
+ /** Get available models from all configured providers */
309
+ async getModels(): Promise<string[]> {
310
+ return this.router.getModels();
311
+ }
312
+
313
+ /** Get metadata about the current model (context length, capabilities) */
314
+ async getModelInfo(): Promise<ModelMetadata> {
315
+ return this.router.getModelInfo();
316
+ }
317
+
318
+ /** Switch model at runtime (updates all providers) */
319
+ setModel(name: string): void {
320
+ this.config.model = name;
321
+ // The model name change will be picked up by the providers
322
+ // through the router on next request
323
+ }
324
+
325
+ /** Get the current model name */
326
+ get model(): string {
327
+ return this.config.model;
328
+ }
329
+
330
+ // ========================================================================
331
+ // Provider Status
332
+ // ========================================================================
333
+
334
+ /** Get health/status of all configured providers */
335
+ getProviderStatus(): ProviderStatus[] {
336
+ return this.router.getStatus();
337
+ }
338
+
339
+ // ========================================================================
340
+ // Lifecycle
341
+ // ========================================================================
342
+
343
+ /** Clean shutdown flush auditor, disconnect MCP, etc. */
344
+ async dispose(): Promise<void> {
345
+ await this.auditor.flush?.();
346
+ }
347
+
348
+ // ========================================================================
349
+ // Internal: Provider Factory
350
+ // ========================================================================
351
+
352
+ private createClient(providerConfig: ProviderConfig): BaseLLMClient {
353
+ const type = this.normalizeType(providerConfig.type);
354
+ const modelName = providerConfig.model ?? this.config.model;
355
+
356
+ const clientOptions: LLMClientOptions = {
357
+ model: modelName,
358
+ url: providerConfig.url ?? DEFAULT_URLS[type] ?? '',
359
+ apiType: type as AIModelApiType,
360
+ apiKey: providerConfig.apiKey,
361
+ timeout: this.config.timeout ?? 30000,
362
+ retries: this.config.retries ?? 2,
363
+ debug: this.config.debug ?? false,
364
+ defaultParameters: this.config.defaultParameters,
365
+ // Preserve `undefined` (not set) vs explicit false so providers can
366
+ // decide whether to send a thinking toggle at all.
367
+ thinking: this.config.thinking,
368
+ region: providerConfig.region,
369
+ apiVersion: providerConfig.apiVersion,
370
+ extraHeaders: providerConfig.headers,
371
+ queryParams: providerConfig.queryParams,
372
+ authHeader: providerConfig.authHeader,
373
+ authPrefix: providerConfig.authPrefix,
374
+ apiBasePath: providerConfig.apiBasePath,
375
+ };
376
+
377
+ switch (type) {
378
+ case 'ollama':
379
+ return new OllamaClient(clientOptions, this.auditor);
380
+
381
+ case 'openai':
382
+ case 'llamacpp':
383
+ return new OpenAICompatibleClient(clientOptions, this.auditor);
384
+
385
+ case 'google':
386
+ case 'vertex':
387
+ return new GoogleClient(clientOptions, this.auditor);
388
+
389
+ case 'anthropic':
390
+ return new AnthropicClient(clientOptions, this.auditor);
391
+
392
+ default:
393
+ throw new Error(`Unknown provider type: ${type}`);
394
+ }
395
+ }
396
+
397
+ private normalizeType(type: string): string {
398
+ return type.toLowerCase();
399
+ }
400
+ }