universal-llm-client 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/CHANGELOG.md +27 -24
  2. package/README.md +60 -11
  3. package/dist/ai-model.d.ts +12 -1
  4. package/dist/ai-model.d.ts.map +1 -1
  5. package/dist/ai-model.js +36 -1
  6. package/dist/ai-model.js.map +1 -1
  7. package/dist/auditor.js.map +1 -1
  8. package/dist/client.js.map +1 -1
  9. package/dist/gemma-channel.d.ts +14 -0
  10. package/dist/gemma-channel.d.ts.map +1 -0
  11. package/dist/gemma-channel.js +38 -0
  12. package/dist/gemma-channel.js.map +1 -0
  13. package/dist/gemma-diffusion.d.ts +49 -0
  14. package/dist/gemma-diffusion.d.ts.map +1 -0
  15. package/dist/gemma-diffusion.js +147 -0
  16. package/dist/gemma-diffusion.js.map +1 -0
  17. package/dist/http.d.ts +4 -0
  18. package/dist/http.d.ts.map +1 -1
  19. package/dist/http.js +14 -1
  20. package/dist/http.js.map +1 -1
  21. package/dist/index.d.ts +2 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +4 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/interfaces.d.ts +163 -7
  26. package/dist/interfaces.d.ts.map +1 -1
  27. package/dist/interfaces.js.map +1 -1
  28. package/dist/mcp.js.map +1 -1
  29. package/dist/providers/anthropic.d.ts.map +1 -1
  30. package/dist/providers/anthropic.js +28 -3
  31. package/dist/providers/anthropic.js.map +1 -1
  32. package/dist/providers/google.d.ts +22 -1
  33. package/dist/providers/google.d.ts.map +1 -1
  34. package/dist/providers/google.js +223 -13
  35. package/dist/providers/google.js.map +1 -1
  36. package/dist/providers/index.js.map +1 -1
  37. package/dist/providers/ollama.d.ts +2 -0
  38. package/dist/providers/ollama.d.ts.map +1 -1
  39. package/dist/providers/ollama.js +59 -30
  40. package/dist/providers/ollama.js.map +1 -1
  41. package/dist/providers/openai.d.ts +14 -0
  42. package/dist/providers/openai.d.ts.map +1 -1
  43. package/dist/providers/openai.js +200 -22
  44. package/dist/providers/openai.js.map +1 -1
  45. package/dist/router.d.ts +2 -0
  46. package/dist/router.d.ts.map +1 -1
  47. package/dist/router.js +4 -0
  48. package/dist/router.js.map +1 -1
  49. package/dist/stream-decoder.d.ts +12 -0
  50. package/dist/stream-decoder.d.ts.map +1 -1
  51. package/dist/stream-decoder.js +182 -5
  52. package/dist/stream-decoder.js.map +1 -1
  53. package/dist/structured-output.js.map +1 -1
  54. package/dist/thinking.d.ts +36 -0
  55. package/dist/thinking.d.ts.map +1 -0
  56. package/dist/thinking.js +52 -0
  57. package/dist/thinking.js.map +1 -0
  58. package/dist/tools.js.map +1 -1
  59. package/dist/zod-adapter.js.map +1 -1
  60. package/package.json +4 -1
  61. package/src/ai-model.ts +400 -0
  62. package/src/auditor.ts +213 -0
  63. package/src/client.ts +402 -0
  64. package/src/debug/debug-google-streaming.ts +97 -0
  65. package/src/debug/debug-tool-execution.ts +86 -0
  66. package/src/debug/test-lmstudio-tools.ts +155 -0
  67. package/src/demos/README.md +47 -0
  68. package/src/demos/basic/universal-llm-examples.ts +161 -0
  69. package/src/demos/diffusion-gemma/.env +29 -0
  70. package/src/demos/diffusion-gemma/.env.example +27 -0
  71. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  72. package/src/demos/diffusion-gemma/README.md +59 -0
  73. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  74. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  75. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  76. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  77. package/src/demos/diffusion-gemma/server.ts +1205 -0
  78. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  79. package/src/demos/mcp/astrid-memory-demo.ts +295 -0
  80. package/src/demos/mcp/astrid-persona-memory.ts +357 -0
  81. package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
  82. package/src/demos/mcp/simple-astrid-memory.ts +148 -0
  83. package/src/demos/mcp/simple-mcp-demo.ts +68 -0
  84. package/src/demos/mcp/working-mcp-demo.ts +62 -0
  85. package/src/demos/model-alias-demo.ts +0 -0
  86. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
  87. package/src/demos/tools/astrid-memory-demo.ts +270 -0
  88. package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
  89. package/src/demos/tools/astrid-production-memory.ts +558 -0
  90. package/src/demos/tools/basic-translation-test.ts +66 -0
  91. package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
  92. package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
  93. package/src/demos/tools/clean-translation-test.ts +119 -0
  94. package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
  95. package/src/demos/tools/complete-rag-demo.ts +369 -0
  96. package/src/demos/tools/complete-tool-demo.ts +132 -0
  97. package/src/demos/tools/demo-tool-calling.ts +124 -0
  98. package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
  99. package/src/demos/tools/hybrid-thinking-test.ts +154 -0
  100. package/src/demos/tools/memory-integration-test.ts +420 -0
  101. package/src/demos/tools/multilingual-memory-system.ts +802 -0
  102. package/src/demos/tools/ondemand-translation-demo.ts +655 -0
  103. package/src/demos/tools/production-tool-demo.ts +245 -0
  104. package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
  105. package/src/demos/tools/rigorous-language-analysis.ts +218 -0
  106. package/src/demos/tools/test-universal-memory-system.ts +126 -0
  107. package/src/demos/tools/translation-integration-guide.ts +346 -0
  108. package/src/demos/tools/universal-memory-system.ts +560 -0
  109. package/src/gemma-channel.ts +47 -0
  110. package/src/gemma-diffusion.ts +167 -0
  111. package/src/http.ts +261 -0
  112. package/src/index.ts +180 -0
  113. package/src/interfaces.ts +843 -0
  114. package/src/mcp.ts +345 -0
  115. package/src/providers/anthropic.ts +796 -0
  116. package/src/providers/google.ts +840 -0
  117. package/src/providers/index.ts +8 -0
  118. package/src/providers/ollama.ts +503 -0
  119. package/src/providers/openai.ts +587 -0
  120. package/src/router.ts +785 -0
  121. package/src/stream-decoder.ts +535 -0
  122. package/src/structured-output.ts +759 -0
  123. package/src/test-scripts/test-advanced-tools.ts +310 -0
  124. package/src/test-scripts/test-google-deep-research.ts +33 -0
  125. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
  126. package/src/test-scripts/test-google-streaming.ts +63 -0
  127. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
  128. package/src/test-scripts/test-google-thinking.ts +46 -0
  129. package/src/test-scripts/test-mcp-config.ts +28 -0
  130. package/src/test-scripts/test-mcp-connection.ts +29 -0
  131. package/src/test-scripts/test-system-message-positions.ts +163 -0
  132. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
  133. package/src/test-scripts/test-tool-calling.ts +231 -0
  134. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  135. package/src/tests/ai-model.test.ts +1614 -0
  136. package/src/tests/auditor.test.ts +224 -0
  137. package/src/tests/gemma-diffusion.test.ts +115 -0
  138. package/src/tests/http.test.ts +200 -0
  139. package/src/tests/interfaces.test.ts +117 -0
  140. package/src/tests/providers/anthropic.test.ts +118 -0
  141. package/src/tests/providers/google.test.ts +841 -0
  142. package/src/tests/providers/ollama.test.ts +1034 -0
  143. package/src/tests/providers/openai.test.ts +1511 -0
  144. package/src/tests/router.test.ts +254 -0
  145. package/src/tests/stream-decoder.test.ts +263 -0
  146. package/src/tests/structured-output.test.ts +1450 -0
  147. package/src/tests/thinking.test.ts +65 -0
  148. package/src/tests/tools.test.ts +175 -0
  149. package/src/thinking.ts +73 -0
  150. package/src/tools.ts +246 -0
  151. package/src/zod-adapter.ts +72 -0
@@ -0,0 +1,843 @@
1
+ /**
2
+ * Universal LLM Client v3 — Core Interfaces
3
+ *
4
+ * All types, enums, and helper functions used throughout the library.
5
+ * Zero dependencies — pure TypeScript types.
6
+ */
7
+
8
+ // ============================================================================
9
+ // Enums
10
+ // ============================================================================
11
+
12
+ export enum AIModelType {
13
+ Chat = 'chat',
14
+ Embedding = 'embedding',
15
+ }
16
+
17
+ export enum AIModelApiType {
18
+ Ollama = 'ollama',
19
+ OpenAI = 'openai',
20
+ Google = 'google',
21
+ Vertex = 'vertex',
22
+ LlamaCpp = 'llamacpp',
23
+ Anthropic = 'anthropic',
24
+ }
25
+
26
+ // ============================================================================
27
+ // Model Metadata
28
+ // ============================================================================
29
+
30
+ export interface ModelMetadata {
31
+ /** Model name as reported by provider */
32
+ model?: string;
33
+ /** Context window size in tokens */
34
+ contextLength: number;
35
+ /** Model architecture (e.g., "llama", "mistral3") */
36
+ architecture?: string;
37
+ /** Parameter count */
38
+ parameterCount?: number;
39
+ /** Model capabilities reported by provider (e.g., "tools", "vision", "thinking") */
40
+ capabilities?: string[];
41
+ }
42
+
43
+ // ============================================================================
44
+ // Provider Configuration (user-facing)
45
+ // ============================================================================
46
+
47
+ export interface ProviderConfig {
48
+ /** Provider type */
49
+ type: AIModelApiType | 'ollama' | 'openai' | 'google' | 'vertex' | 'llamacpp' | 'anthropic';
50
+ /** Provider endpoint URL (has sensible defaults per type) */
51
+ url?: string;
52
+ /** API key or Bearer token */
53
+ apiKey?: string;
54
+ /** Override model name for this specific provider */
55
+ model?: string;
56
+ /** Explicit priority (default: array order, lower = higher priority) */
57
+ priority?: number;
58
+ /** Vertex AI region (e.g., "us-central1") */
59
+ region?: string;
60
+ /** Google API version (default: "v1beta") */
61
+ apiVersion?: 'v1' | 'v1beta';
62
+ /**
63
+ * Extra headers merged into requests, applied by providers that use
64
+ * `buildHeaders` — **OpenAI-compatible and Ollama**. Google/Vertex and
65
+ * Anthropic build their own auth headers and ignore this. Useful for Azure
66
+ * (api-key), custom gateways, or non-standard auth. Merged after the default
67
+ * auth header (later entries win).
68
+ */
69
+ headers?: Record<string, string>;
70
+
71
+ /**
72
+ * Extra query parameters appended to request URLs — **OpenAI-compatible
73
+ * provider only**. Useful for Azure OpenAI (e.g. { 'api-version': '2024-10-21' }).
74
+ */
75
+ queryParams?: Record<string, string>;
76
+
77
+ /**
78
+ * Override the name of the header that carries the API key (default:
79
+ * "Authorization") — **OpenAI-compatible and Ollama only** (via `buildHeaders`).
80
+ * Common alternative for Azure and some gateways: "api-key".
81
+ */
82
+ authHeader?: string;
83
+
84
+ /**
85
+ * Prefix placed before the apiKey value in the auth header (OpenAI-compatible
86
+ * and Ollama only). Default: "Bearer " when authHeader is Authorization (or
87
+ * unset), otherwise "". Set to "" explicitly for "api-key: <yourkey>" style auth.
88
+ */
89
+ authPrefix?: string;
90
+
91
+ /**
92
+ * For OpenAI-compatible providers only: the URL path segment to append after the base URL.
93
+ * Default: "/v1".
94
+ * Set to "" (or "/") to disable the automatic append. This is required when supplying
95
+ * a full Azure deployment URL such as ".../deployments/my-deploy".
96
+ */
97
+ apiBasePath?: string;
98
+ }
99
+
100
+ // ============================================================================
101
+ // Thinking / Reasoning control
102
+ // ============================================================================
103
+
104
+ /**
105
+ * Unified reasoning-effort level. Mapped to each provider's native control:
106
+ * Gemini 3.x `thinkingConfig.thinkingLevel`, OpenAI `reasoning_effort`,
107
+ * Gemini 2.5 `thinkingBudget`, Anthropic `budget_tokens`, vLLM/Ollama on/off.
108
+ */
109
+ export type ThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
110
+
111
+ // ============================================================================
112
+ // AIModel Configuration (user-facing)
113
+ // ============================================================================
114
+
115
+ export interface AIModelConfig {
116
+ /** Model name (used across all providers unless overridden) */
117
+ model: string;
118
+ /** Ordered list of providers (first = highest priority) */
119
+ providers: ProviderConfig[];
120
+ /** Default parameters for all requests (temperature, top_p, etc.) */
121
+ defaultParameters?: Record<string, unknown>;
122
+ /** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
123
+ thinking?: boolean | ThinkingLevel;
124
+ /** Request timeout in ms (default: 30000) */
125
+ timeout?: number;
126
+ /** Retries per provider before failover (default: 2) */
127
+ retries?: number;
128
+ /** Observability hooks */
129
+ auditor?: import('./auditor.js').Auditor;
130
+ /** Enable debug logging */
131
+ debug?: boolean;
132
+ }
133
+
134
+ // ============================================================================
135
+ // Internal Client Options
136
+ // ============================================================================
137
+
138
+ export interface LLMClientOptions {
139
+ /** Model name */
140
+ model: string;
141
+ /** Base URL for the API */
142
+ url: string;
143
+ /** API type for protocol variations */
144
+ apiType: AIModelApiType;
145
+ /** Model type (chat or embedding) */
146
+ modelType?: AIModelType;
147
+ /** Default parameters for requests */
148
+ defaultParameters?: Record<string, unknown>;
149
+ /** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
150
+ thinking?: boolean | ThinkingLevel;
151
+ /** Request timeout in ms */
152
+ timeout?: number;
153
+ /** Number of retries for failed requests */
154
+ retries?: number;
155
+ /** API key for authenticated endpoints */
156
+ apiKey?: string;
157
+ /** Enable debug logging */
158
+ debug?: boolean;
159
+ /** Vertex AI region */
160
+ region?: string;
161
+ /** Google API version */
162
+ apiVersion?: 'v1' | 'v1beta';
163
+ /**
164
+ * Force the DiffusionGemma native channel protocol on/off for
165
+ * OpenAI-compatible backends (skip_special_tokens:false + client-side
166
+ * reasoning/tool-call parsing). Auto-detected from the model name when
167
+ * omitted. See gemma-diffusion.ts.
168
+ */
169
+ gemmaNativeProtocol?: boolean;
170
+ /**
171
+ * Extra headers merged for every request from this provider instance.
172
+ * Populated from ProviderConfig.headers for advanced auth / gateway scenarios
173
+ * (Azure api-key style, custom x- headers, etc.).
174
+ */
175
+ extraHeaders?: Record<string, string>;
176
+
177
+ /** Extra query parameters appended to request URLs (from ProviderConfig.queryParams). */
178
+ queryParams?: Record<string, string>;
179
+
180
+ /** Auth header name override (from ProviderConfig.authHeader). */
181
+ authHeader?: string;
182
+
183
+ /** Auth value prefix (from ProviderConfig.authPrefix). */
184
+ authPrefix?: string;
185
+
186
+ /**
187
+ * For openai-compatible clients: the sub-path to append (from ProviderConfig.apiBasePath).
188
+ * Defaults to "/v1"; `undefined` keeps that default. Set to "" or "/" to disable
189
+ * the append (when the base URL already contains the full path).
190
+ */
191
+ apiBasePath?: string;
192
+ }
193
+
194
+ // ============================================================================
195
+ // Multimodal Content Types
196
+ // ============================================================================
197
+
198
+ export interface LLMTextContent {
199
+ type: 'text';
200
+ text: string;
201
+ }
202
+
203
+ export interface LLMImageContent {
204
+ type: 'image_url';
205
+ image_url: {
206
+ url: string;
207
+ detail?: 'auto' | 'low' | 'high';
208
+ };
209
+ }
210
+
211
+ export interface LLMAudioContent {
212
+ type: 'audio';
213
+ audio: {
214
+ /** Raw base64-encoded audio data */
215
+ data: string;
216
+ /** MIME type (e.g. 'audio/ogg', 'audio/wav', 'audio/mp3') */
217
+ mimeType: string;
218
+ };
219
+ }
220
+
221
+ export type LLMContentPart = LLMTextContent | LLMImageContent | LLMAudioContent;
222
+ export type LLMMessageContent = string | LLMContentPart[];
223
+
224
+ // ============================================================================
225
+ // Chat Message Types
226
+ // ============================================================================
227
+
228
+ export interface LLMChatMessage {
229
+ role: 'system' | 'user' | 'assistant' | 'tool';
230
+ content: LLMMessageContent;
231
+ tool_call_id?: string;
232
+ tool_calls?: LLMToolCall[];
233
+ }
234
+
235
+ // ============================================================================
236
+ // Tool Types
237
+ // ============================================================================
238
+
239
+ export interface LLMToolCall {
240
+ id: string;
241
+ type: 'function';
242
+ function: {
243
+ name: string;
244
+ arguments: string;
245
+ };
246
+ /**
247
+ * Gemini 3.x thought signature — encrypted reasoning context.
248
+ * Must be echoed back exactly when sending conversation history
249
+ * during multi-turn function calling. Mandatory for Gemini 3,
250
+ * optional for Gemini 2.5, ignored by other providers.
251
+ */
252
+ thoughtSignature?: string;
253
+ }
254
+
255
+ export interface LLMFunction {
256
+ name: string;
257
+ description: string;
258
+ parameters: {
259
+ type: 'object';
260
+ properties?: Record<string, unknown>;
261
+ required?: string[];
262
+ };
263
+ }
264
+
265
+ export interface LLMToolDefinition {
266
+ type: 'function';
267
+ function: LLMFunction;
268
+ }
269
+
270
+ export interface ToolExecutionResult {
271
+ tool_call_id: string;
272
+ output: unknown;
273
+ error?: string;
274
+ duration?: number;
275
+ }
276
+
277
+ export type ToolHandler = (args: unknown) => Promise<unknown> | unknown;
278
+
279
+ export interface ToolRegistryEntry {
280
+ definition: LLMFunction;
281
+ handler: ToolHandler;
282
+ }
283
+
284
+ export interface ToolRegistry {
285
+ [toolName: string]: ToolRegistryEntry;
286
+ }
287
+
288
+ // ============================================================================
289
+ // Chat Options (per-call overrides)
290
+ // ============================================================================
291
+
292
+ /**
293
+ * Response format for structured output.
294
+ *
295
+ * For json_schema mode, use: { type: 'json_schema', json_schema: { name, schema, strict } }
296
+ * For json_object mode (legacy), use: { type: 'json_object' }
297
+ */
298
+ export interface ResponseFormat {
299
+ /** Response format type */
300
+ type: 'json_object' | 'json_schema';
301
+ /** JSON Schema definition (required for json_schema type) */
302
+ json_schema?: {
303
+ /** Name of the schema (for LLM guidance) */
304
+ name: string;
305
+ /** Schema description (optional, for LLM guidance) */
306
+ description?: string;
307
+ /** The JSON Schema */
308
+ schema: Record<string, unknown>;
309
+ /** Enable strict mode (required for reliable structured output) */
310
+ strict?: boolean;
311
+ };
312
+ }
313
+
314
+ /**
315
+ * Output options for structured output in chat responses.
316
+ *
317
+ * When provided, the response will include a `structured` property with
318
+ * the validated, typed result. This is the recommended way to request
319
+ * structured output via the chat() method.
320
+ *
321
+ * @example
322
+ * ```typescript
323
+ * const UserSchema = z.object({
324
+ * name: z.string(),
325
+ * age: z.number(),
326
+ * });
327
+ *
328
+ * const response = await model.chat(messages, {
329
+ * output: { schema: UserSchema },
330
+ * });
331
+ *
332
+ * // response.structured is typed as { name: string, age: number }
333
+ * console.log(response.structured?.name);
334
+ * ```
335
+ */
336
+ export interface OutputOptions<T = unknown> {
337
+ /**
338
+ * Schema configuration for structured output.
339
+ * Use `fromZod()` from `universal-llm-client/zod` to create from a Zod schema,
340
+ * or provide a raw SchemaConfig with jsonSchema + optional validate function.
341
+ */
342
+ schema?: import('./structured-output.js').SchemaConfig<T>;
343
+
344
+ /**
345
+ * Raw JSON Schema for structured output.
346
+ * Alternative to `schema` when you have a pre-defined schema.
347
+ */
348
+ jsonSchema?: import('./structured-output.js').JSONSchema;
349
+
350
+ /**
351
+ * Optional name for the schema.
352
+ * Used by providers like OpenAI for better LLM guidance.
353
+ */
354
+ name?: string;
355
+
356
+ /**
357
+ * Optional description for the schema.
358
+ * Used by providers like OpenAI for better LLM guidance.
359
+ */
360
+ description?: string;
361
+
362
+ /**
363
+ * Whether to use strict mode for schema validation (OpenAI only).
364
+ * When true, OpenAI enforces the schema exactly (no additional properties,
365
+ * limited schema subset). Defaults to `true`.
366
+ */
367
+ strict?: boolean;
368
+ }
369
+
370
+ export interface ChatOptions {
371
+ /** Override temperature */
372
+ temperature?: number;
373
+ /** Max tokens to generate */
374
+ maxTokens?: number;
375
+ /**
376
+ * Enable/disable/level model thinking for this request, overriding the
377
+ * model-level `thinking` config. `true`/`false` or a level
378
+ * ('minimal' | 'low' | 'medium' | 'high'). Mapped per provider: Gemini
379
+ * `thinkingLevel`/`thinkingBudget`, OpenAI `reasoning_effort`, vLLM
380
+ * `enable_thinking`, Anthropic `budget_tokens`, Ollama `think`.
381
+ */
382
+ thinking?: boolean | ThinkingLevel;
383
+ /** Tool definitions (auto-populated from registry if not set) */
384
+ tools?: LLMToolDefinition[];
385
+ /** Tool choice mode */
386
+ toolChoice?: 'none' | 'auto' | 'required';
387
+ /** Additional provider-specific parameters */
388
+ parameters?: Record<string, unknown>;
389
+ /** Abort signal for cancellation (forwarded to HTTP layer) */
390
+ signal?: AbortSignal;
391
+ /** Enable/disable tool execution for chatWithTools */
392
+ executeTools?: boolean;
393
+ /**
394
+ * Enable provider-side prompt caching when supported.
395
+ * - Anthropic: Adds cache_control: { type: 'ephemeral' } to the system prompt block (most common high-impact pattern).
396
+ * - Other providers: May be passed through via parameters/headers or ignored; consult provider docs.
397
+ */
398
+ enablePromptCaching?: boolean;
399
+ /** Maximum tool execution rounds (default: 10) */
400
+ maxIterations?: number;
401
+ /**
402
+ * Stream decoder selection. Accepts:
403
+ * - A built-in type name: 'passthrough' | 'standard-chat' | 'interleaved-reasoning'
404
+ * - A custom type name registered via `registerDecoder()`
405
+ * - A pre-built `StreamDecoder` instance for full control
406
+ */
407
+ decoder?: import('./stream-decoder.js').DecoderType | string | import('./stream-decoder.js').StreamDecoder;
408
+
409
+ // ========================================================================
410
+ // Structured Output Options
411
+ // ========================================================================
412
+
413
+ /**
414
+ * Structured output options for chat responses.
415
+ * When provided, the response will include a `structured` property
416
+ * with the validated result.
417
+ *
418
+ * **Note**: `output` and `tools` cannot be used together.
419
+ * If both are provided, an error will be thrown.
420
+ *
421
+ * @example
422
+ * ```typescript
423
+ * const response = await model.chat(messages, {
424
+ * output: { schema: UserSchema },
425
+ * });
426
+ * console.log(response.structured);
427
+ * ```
428
+ */
429
+ output?: OutputOptions;
430
+
431
+ /**
432
+ * Schema configuration for structured output.
433
+ * When provided, the response is validated against this schema.
434
+ *
435
+ * @deprecated Use `output.schema` or `generateStructured()` instead.
436
+ */
437
+ schema?: import('./structured-output.js').SchemaConfig<unknown>;
438
+
439
+ /**
440
+ * Raw JSON Schema for structured output.
441
+ * Alternative to `schema` when you have a pre-defined schema.
442
+ *
443
+ * @deprecated Use `output.jsonSchema` or `generateStructured()` instead.
444
+ */
445
+ jsonSchema?: import('./structured-output.js').JSONSchema;
446
+
447
+ /**
448
+ * Name for the schema (optional, used for LLM guidance).
449
+ * Required by some providers (e.g., OpenAI strict mode).
450
+ *
451
+ * @deprecated Use `output.name` or `generateStructured()` instead.
452
+ */
453
+ schemaName?: string;
454
+
455
+ /**
456
+ * Description for the schema (optional, used for LLM guidance).
457
+ *
458
+ * @deprecated Use `output.description` or `generateStructured()` instead.
459
+ */
460
+ schemaDescription?: string;
461
+
462
+ /**
463
+ * Response format for structured output (legacy json_object mode).
464
+ * For new code, prefer `output` or `generateStructured()`.
465
+ *
466
+ * Use { type: 'json_object' } for legacy JSON mode without schema validation.
467
+ */
468
+ responseFormat?: ResponseFormat;
469
+
470
+ // ========================================================================
471
+ // Inference Tier Selection
472
+ // ========================================================================
473
+
474
+ /** Inference tier selection (provider-specific; Google supports 'flex' and 'priority').
475
+ * - 'flex': 50% cost reduction, best-effort, higher latency (background tasks)
476
+ * - 'priority': Premium pricing, lowest latency, highest reliability (interactive)
477
+ * - 'standard': Default behavior (omitted from request) */
478
+ serviceTier?: 'flex' | 'priority' | 'standard';
479
+ }
480
+
481
+ // ============================================================================
482
+ // Token Usage
483
+ // ============================================================================
484
+
485
+ export interface TokenUsageInfo {
486
+ inputTokens: number;
487
+ /**
488
+ * Visible output tokens (the streamed `text` content). For providers
489
+ * that bill thinking separately (Google Gemini), this excludes the
490
+ * reasoning trace — see `reasoningTokens`.
491
+ */
492
+ outputTokens: number;
493
+ totalTokens: number;
494
+ cachedTokens?: number;
495
+ /**
496
+ * Server-side reasoning/thinking tokens that were generated but not
497
+ * yielded as visible text. Currently populated by the Google provider
498
+ * from `usageMetadata.thoughtsTokenCount` for thinking-enabled models.
499
+ * Other providers may roll thinking into `outputTokens` (Ollama) or
500
+ * stream it as `thinking` events (the universal client surfaces these
501
+ * via `DecodedEvent { type: 'thinking' }`); consult the provider.
502
+ */
503
+ reasoningTokens?: number;
504
+ /**
505
+ * Total request duration in milliseconds. Server-measured where the
506
+ * provider reports it (Ollama `total_duration`); otherwise client-measured
507
+ * wall-clock (OpenAI-compatible / vLLM return no timing in `usage`).
508
+ */
509
+ durationMs?: number;
510
+ /**
511
+ * Decode throughput in output tokens/second. Server-precise for Ollama
512
+ * (`eval_count / eval_duration`); derived from `outputTokens / durationMs`
513
+ * for providers without server-side timing (OpenAI-compatible / vLLM).
514
+ */
515
+ tokensPerSecond?: number;
516
+ }
517
+
518
+ // ============================================================================
519
+ // Response Types
520
+ // ============================================================================
521
+
522
+ export interface LLMChatResponse<T = unknown> {
523
+ message: LLMChatMessage;
524
+ /** Provider finish reason when available (e.g. Ollama done_reason, Google finishReason) */
525
+ finishReason?: string;
526
+ /** Reasoning/thinking content from the model (if supported) */
527
+ reasoning?: string;
528
+ /** Token usage info */
529
+ usage?: TokenUsageInfo;
530
+ /** Tool execution trace (populated by chatWithTools) */
531
+ toolExecutions?: ToolExecutionResult[];
532
+ /** Which provider served this response */
533
+ provider?: string;
534
+ /**
535
+ * Validated structured output when `output` parameter is provided to chat().
536
+ * This is the same type as inferred from the schema provided in `output.schema`.
537
+ *
538
+ * Undefined when:
539
+ * - No `output` parameter was provided
540
+ * - Structured output validation failed (throws StructuredOutputError instead)
541
+ *
542
+ * @example
543
+ * ```typescript
544
+ * const response = await model.chat(messages, {
545
+ * output: { schema: UserSchema },
546
+ * });
547
+ * if (response.structured) {
548
+ * console.log(response.structured.name); // Fully typed!
549
+ * }
550
+ * ```
551
+ */
552
+ structured?: T;
553
+ /** Which inference tier actually served this response (from provider response headers, e.g. x-gemini-service-tier) */
554
+ serviceTier?: 'flex' | 'priority' | 'standard';
555
+ }
556
+
557
+ // ============================================================================
558
+ // Provider Response Types (internal)
559
+ // ============================================================================
560
+
561
+ export interface OllamaResponse {
562
+ model: string;
563
+ created_at: string;
564
+ message: {
565
+ role: string;
566
+ content: string;
567
+ thinking?: string;
568
+ tool_calls?: LLMToolCall[];
569
+ };
570
+ done: boolean;
571
+ done_reason?: string;
572
+ /** Total request time in nanoseconds. */
573
+ total_duration?: number;
574
+ /** Model load time in nanoseconds. */
575
+ load_duration?: number;
576
+ prompt_eval_count?: number;
577
+ eval_count?: number;
578
+ /** Prompt evaluation time in nanoseconds. */
579
+ prompt_eval_duration?: number;
580
+ /** Generation time in nanoseconds. */
581
+ eval_duration?: number;
582
+ }
583
+
584
+ export interface OpenAIResponse {
585
+ id: string;
586
+ object: string;
587
+ created: number;
588
+ model: string;
589
+ choices: Array<{
590
+ index: number;
591
+ message: {
592
+ role: string;
593
+ content: string | null;
594
+ /**
595
+ * Chain-of-thought from reasoning models exposed via a dedicated
596
+ * field (vLLM `--reasoning-parser`, DeepSeek-R1, etc.). vLLM uses
597
+ * `reasoning_content`; some gateways use `reasoning`.
598
+ */
599
+ reasoning?: string;
600
+ reasoning_content?: string;
601
+ tool_calls?: LLMToolCall[];
602
+ };
603
+ finish_reason: string;
604
+ }>;
605
+ usage?: {
606
+ prompt_tokens: number;
607
+ completion_tokens: number;
608
+ total_tokens: number;
609
+ prompt_tokens_details?: {
610
+ cached_tokens?: number;
611
+ audio_tokens?: number;
612
+ };
613
+ };
614
+ }
615
+
616
+ export interface OllamaModelInfo {
617
+ name: string;
618
+ size: number;
619
+ digest: string;
620
+ details: {
621
+ format: string;
622
+ family: string;
623
+ families: string[];
624
+ parameter_size: string;
625
+ quantization_level: string;
626
+ };
627
+ modified_at: string;
628
+ }
629
+
630
+ export interface OpenAIModelInfo {
631
+ id: string;
632
+ object: string;
633
+ created: number;
634
+ owned_by: string;
635
+ }
636
+
637
+ // ============================================================================
638
+ // Google API Types
639
+ // ============================================================================
640
+
641
+ export interface GooglePart {
642
+ text?: string;
643
+ functionCall?: {
644
+ name?: string;
645
+ args?: Record<string, unknown>;
646
+ };
647
+ functionResponse?: {
648
+ name: string;
649
+ response: Record<string, unknown>;
650
+ };
651
+ inlineData?: {
652
+ mimeType: string;
653
+ data: string;
654
+ };
655
+ /** True when this part is a reasoning summary (requires `includeThoughts`). */
656
+ thought?: boolean;
657
+ /** Gemini 3.x thought signature — must be echoed back on functionCall parts */
658
+ thoughtSignature?: string;
659
+ }
660
+
661
+ export interface GoogleContent {
662
+ role: 'user' | 'model' | 'function';
663
+ parts: GooglePart[];
664
+ }
665
+
666
+ export interface GoogleGenerationConfig {
667
+ responseMimeType?: string;
668
+ temperature?: number;
669
+ maxOutputTokens?: number;
670
+ topK?: number;
671
+ topP?: number;
672
+ thinkingConfig?: {
673
+ thinkingBudget?: number;
674
+ };
675
+ }
676
+
677
+ export interface GoogleFunctionDeclaration {
678
+ name: string;
679
+ description: string;
680
+ parameters: {
681
+ type: 'object';
682
+ properties: Record<string, unknown>;
683
+ required?: string[];
684
+ };
685
+ }
686
+
687
+ export interface GoogleToolConfig {
688
+ functionCallingConfig?: {
689
+ mode: 'AUTO' | 'ANY' | 'NONE';
690
+ allowedFunctionNames?: string[];
691
+ };
692
+ }
693
+
694
+ export interface GoogleRequest {
695
+ contents: GoogleContent[];
696
+ generationConfig?: GoogleGenerationConfig;
697
+ systemInstruction?: { parts: Array<{ text: string }> };
698
+ tools?: Array<{
699
+ functionDeclarations: GoogleFunctionDeclaration[];
700
+ }>;
701
+ toolConfig?: GoogleToolConfig;
702
+ /** Inference tier: FLEX (50% off, best-effort) or PRIORITY (premium, highest reliability) */
703
+ service_tier?: 'FLEX' | 'PRIORITY' | 'STANDARD';
704
+ }
705
+
706
+ export interface GoogleCandidate {
707
+ content: {
708
+ parts: GooglePart[];
709
+ role: string;
710
+ };
711
+ finishReason?: string;
712
+ index: number;
713
+ }
714
+
715
+ export interface GoogleResponse {
716
+ candidates: GoogleCandidate[];
717
+ usageMetadata?: {
718
+ promptTokenCount: number;
719
+ candidatesTokenCount: number;
720
+ totalTokenCount: number;
721
+ cachedContentTokenCount?: number;
722
+ /**
723
+ * Server-side reasoning tokens emitted by Gemini thinking models
724
+ * (e.g. 2.5 Pro / 3.x Pro). Counted toward billing as output but
725
+ * not included in `candidatesTokenCount` and not streamed as text.
726
+ */
727
+ thoughtsTokenCount?: number;
728
+ };
729
+ }
730
+
731
+ // ============================================================================
732
+ // Deep Research (Gemini interactions API)
733
+ // ============================================================================
734
+
735
+ /** Options for an agentic Deep Research interaction (Gemini-only). */
736
+ export interface DeepResearchOptions {
737
+ /** Research agent id (default 'deep-research-preview-04-2026'). */
738
+ agent?: string;
739
+ /** Tools the agent may use, e.g. 'google_search', 'url_context', 'code_execution'. */
740
+ tools?: string[];
741
+ /** Emit intermediate reasoning ('auto') or not ('none'). Default 'auto'. */
742
+ thinkingSummaries?: 'auto' | 'none';
743
+ /** Continue a prior interaction (follow-up question). */
744
+ previousInteractionId?: string;
745
+ /** Poll interval in ms while awaiting completion (default 5000). */
746
+ pollIntervalMs?: number;
747
+ /** Overall timeout in ms before giving up the poll loop (default 600000). */
748
+ timeoutMs?: number;
749
+ /** Abort signal forwarded to every request. */
750
+ signal?: AbortSignal;
751
+ }
752
+
753
+ /** One intermediate step in a Deep Research interaction. */
754
+ export interface DeepResearchStep {
755
+ type?: string;
756
+ content?: Array<{ text?: string;[k: string]: unknown }>;
757
+ [k: string]: unknown;
758
+ }
759
+
760
+ /** Terminal (or last-polled) state of a Deep Research interaction. */
761
+ export interface DeepResearchResult {
762
+ id: string;
763
+ status: 'in_progress' | 'completed' | 'failed' | string;
764
+ /** Final research report (`output_text`) when completed. */
765
+ report?: string;
766
+ steps?: DeepResearchStep[];
767
+ error?: unknown;
768
+ /** The raw last interaction object from the API. */
769
+ raw?: unknown;
770
+ }
771
+
772
+ /** Streaming Deep Research event (from `step.delta` updates). */
773
+ export type DeepResearchEvent =
774
+ | { type: 'thought'; content: string }
775
+ | { type: 'text'; content: string }
776
+ | { type: 'image'; content: unknown }
777
+ | { type: 'status'; status: string };
778
+
779
+ // ============================================================================
780
+ // Helper Functions
781
+ // ============================================================================
782
+
783
+ /** Create a text content part */
784
+ export function textContent(text: string): LLMTextContent {
785
+ return { type: 'text', text };
786
+ }
787
+
788
+ /** Create an image content part from base64 data or URL */
789
+ export function imageContent(
790
+ base64DataOrUrl: string,
791
+ mimeType: string = 'image/jpeg',
792
+ detail?: 'auto' | 'low' | 'high',
793
+ ): LLMImageContent {
794
+ const url = base64DataOrUrl.startsWith('data:') || base64DataOrUrl.startsWith('http')
795
+ ? base64DataOrUrl
796
+ : `data:${mimeType};base64,${base64DataOrUrl}`;
797
+ return {
798
+ type: 'image_url',
799
+ image_url: { url, detail },
800
+ };
801
+ }
802
+
803
+ /** Create a multimodal user message with text and images */
804
+ export function multimodalMessage(
805
+ text: string,
806
+ images: string[],
807
+ mimeType: string = 'image/jpeg',
808
+ ): LLMChatMessage {
809
+ const content: LLMContentPart[] = [
810
+ textContent(text),
811
+ ...images.map(img => imageContent(img, mimeType)),
812
+ ];
813
+ return { role: 'user', content };
814
+ }
815
+
816
+ /** Extract text content from a message content value */
817
+ export function extractTextContent(content: LLMMessageContent): string {
818
+ if (typeof content === 'string') return content;
819
+ return content
820
+ .filter((part): part is LLMTextContent => part.type === 'text')
821
+ .map(part => part.text)
822
+ .join('');
823
+ }
824
+
825
+ /** Check if message content contains images */
826
+ export function hasImages(content: LLMMessageContent): boolean {
827
+ if (typeof content === 'string') return false;
828
+ return content.some(part => part.type === 'image_url');
829
+ }
830
+
831
+ /** Create an audio content part from raw base64 data */
832
+ export function audioContent(base64Data: string, mimeType: string): LLMAudioContent {
833
+ return {
834
+ type: 'audio',
835
+ audio: { data: base64Data, mimeType },
836
+ };
837
+ }
838
+
839
+ /** Check if message content contains audio */
840
+ export function hasAudio(content: LLMMessageContent): boolean {
841
+ if (typeof content === 'string') return false;
842
+ return content.some(part => part.type === 'audio');
843
+ }