universal-llm-client 4.5.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +2 -0
  3. package/dist/ai-model.d.ts +0 -1
  4. package/dist/ai-model.js +0 -1
  5. package/dist/auditor.d.ts +0 -1
  6. package/dist/auditor.js +0 -1
  7. package/dist/client.d.ts +0 -1
  8. package/dist/client.js +0 -1
  9. package/dist/gemma-channel.d.ts +0 -1
  10. package/dist/gemma-channel.js +0 -1
  11. package/dist/gemma-diffusion.d.ts +0 -1
  12. package/dist/gemma-diffusion.js +0 -1
  13. package/dist/http.d.ts +0 -1
  14. package/dist/http.js +0 -1
  15. package/dist/index.d.ts +0 -1
  16. package/dist/index.js +0 -1
  17. package/dist/interfaces.d.ts +0 -1
  18. package/dist/interfaces.js +0 -1
  19. package/dist/mcp.d.ts +0 -1
  20. package/dist/mcp.js +0 -1
  21. package/dist/providers/anthropic.d.ts +0 -1
  22. package/dist/providers/anthropic.js +0 -1
  23. package/dist/providers/google.d.ts +0 -1
  24. package/dist/providers/google.js +0 -1
  25. package/dist/providers/index.d.ts +0 -1
  26. package/dist/providers/index.js +0 -1
  27. package/dist/providers/ollama.d.ts +0 -1
  28. package/dist/providers/ollama.js +0 -1
  29. package/dist/providers/openai.d.ts +2 -1
  30. package/dist/providers/openai.js +303 -74
  31. package/dist/router.d.ts +0 -1
  32. package/dist/router.js +0 -1
  33. package/dist/stream-decoder.d.ts +0 -1
  34. package/dist/stream-decoder.js +0 -1
  35. package/dist/structured-output.d.ts +0 -1
  36. package/dist/structured-output.js +0 -1
  37. package/dist/thinking.d.ts +0 -1
  38. package/dist/thinking.js +0 -1
  39. package/dist/tools.d.ts +0 -1
  40. package/dist/tools.js +0 -1
  41. package/dist/zod-adapter.d.ts +0 -1
  42. package/dist/zod-adapter.js +0 -1
  43. package/package.json +1 -2
  44. package/dist/ai-model.d.ts.map +0 -1
  45. package/dist/ai-model.js.map +0 -1
  46. package/dist/auditor.d.ts.map +0 -1
  47. package/dist/auditor.js.map +0 -1
  48. package/dist/client.d.ts.map +0 -1
  49. package/dist/client.js.map +0 -1
  50. package/dist/gemma-channel.d.ts.map +0 -1
  51. package/dist/gemma-channel.js.map +0 -1
  52. package/dist/gemma-diffusion.d.ts.map +0 -1
  53. package/dist/gemma-diffusion.js.map +0 -1
  54. package/dist/http.d.ts.map +0 -1
  55. package/dist/http.js.map +0 -1
  56. package/dist/index.d.ts.map +0 -1
  57. package/dist/index.js.map +0 -1
  58. package/dist/interfaces.d.ts.map +0 -1
  59. package/dist/interfaces.js.map +0 -1
  60. package/dist/mcp.d.ts.map +0 -1
  61. package/dist/mcp.js.map +0 -1
  62. package/dist/providers/anthropic.d.ts.map +0 -1
  63. package/dist/providers/anthropic.js.map +0 -1
  64. package/dist/providers/google.d.ts.map +0 -1
  65. package/dist/providers/google.js.map +0 -1
  66. package/dist/providers/index.d.ts.map +0 -1
  67. package/dist/providers/index.js.map +0 -1
  68. package/dist/providers/ollama.d.ts.map +0 -1
  69. package/dist/providers/ollama.js.map +0 -1
  70. package/dist/providers/openai.d.ts.map +0 -1
  71. package/dist/providers/openai.js.map +0 -1
  72. package/dist/router.d.ts.map +0 -1
  73. package/dist/router.js.map +0 -1
  74. package/dist/stream-decoder.d.ts.map +0 -1
  75. package/dist/stream-decoder.js.map +0 -1
  76. package/dist/structured-output.d.ts.map +0 -1
  77. package/dist/structured-output.js.map +0 -1
  78. package/dist/thinking.d.ts.map +0 -1
  79. package/dist/thinking.js.map +0 -1
  80. package/dist/tools.d.ts.map +0 -1
  81. package/dist/tools.js.map +0 -1
  82. package/dist/zod-adapter.d.ts.map +0 -1
  83. package/dist/zod-adapter.js.map +0 -1
  84. package/src/ai-model.ts +0 -400
  85. package/src/auditor.ts +0 -213
  86. package/src/client.ts +0 -402
  87. package/src/debug/debug-google-streaming.ts +0 -97
  88. package/src/debug/debug-tool-execution.ts +0 -86
  89. package/src/debug/test-lmstudio-tools.ts +0 -155
  90. package/src/demos/README.md +0 -47
  91. package/src/demos/basic/universal-llm-examples.ts +0 -161
  92. package/src/demos/diffusion-gemma/.env +0 -29
  93. package/src/demos/diffusion-gemma/.env.example +0 -27
  94. package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
  95. package/src/demos/diffusion-gemma/README.md +0 -59
  96. package/src/demos/diffusion-gemma/canvas.ts +0 -1606
  97. package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
  98. package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
  99. package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
  100. package/src/demos/diffusion-gemma/server.ts +0 -1205
  101. package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
  102. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  103. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  104. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  105. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  106. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  107. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  108. package/src/demos/model-alias-demo.ts +0 -0
  109. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  110. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  111. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  112. package/src/demos/tools/astrid-production-memory.ts +0 -558
  113. package/src/demos/tools/basic-translation-test.ts +0 -66
  114. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  115. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  116. package/src/demos/tools/clean-translation-test.ts +0 -119
  117. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  118. package/src/demos/tools/complete-rag-demo.ts +0 -369
  119. package/src/demos/tools/complete-tool-demo.ts +0 -132
  120. package/src/demos/tools/demo-tool-calling.ts +0 -124
  121. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  122. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  123. package/src/demos/tools/memory-integration-test.ts +0 -420
  124. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  125. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  126. package/src/demos/tools/production-tool-demo.ts +0 -245
  127. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  128. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  129. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  130. package/src/demos/tools/translation-integration-guide.ts +0 -346
  131. package/src/demos/tools/universal-memory-system.ts +0 -560
  132. package/src/gemma-channel.ts +0 -47
  133. package/src/gemma-diffusion.ts +0 -167
  134. package/src/http.ts +0 -261
  135. package/src/index.ts +0 -180
  136. package/src/interfaces.ts +0 -843
  137. package/src/mcp.ts +0 -345
  138. package/src/providers/anthropic.ts +0 -796
  139. package/src/providers/google.ts +0 -840
  140. package/src/providers/index.ts +0 -8
  141. package/src/providers/ollama.ts +0 -503
  142. package/src/providers/openai.ts +0 -587
  143. package/src/router.ts +0 -785
  144. package/src/stream-decoder.ts +0 -535
  145. package/src/structured-output.ts +0 -759
  146. package/src/test-scripts/test-advanced-tools.ts +0 -310
  147. package/src/test-scripts/test-google-deep-research.ts +0 -33
  148. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  149. package/src/test-scripts/test-google-streaming.ts +0 -63
  150. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  151. package/src/test-scripts/test-google-thinking.ts +0 -46
  152. package/src/test-scripts/test-mcp-config.ts +0 -28
  153. package/src/test-scripts/test-mcp-connection.ts +0 -29
  154. package/src/test-scripts/test-system-message-positions.ts +0 -163
  155. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  156. package/src/test-scripts/test-tool-calling.ts +0 -231
  157. package/src/test-scripts/test-vllm-qwen36.ts +0 -256
  158. package/src/tests/ai-model.test.ts +0 -1614
  159. package/src/tests/auditor.test.ts +0 -224
  160. package/src/tests/gemma-diffusion.test.ts +0 -115
  161. package/src/tests/http.test.ts +0 -200
  162. package/src/tests/interfaces.test.ts +0 -117
  163. package/src/tests/providers/anthropic.test.ts +0 -118
  164. package/src/tests/providers/google.test.ts +0 -841
  165. package/src/tests/providers/ollama.test.ts +0 -1034
  166. package/src/tests/providers/openai.test.ts +0 -1511
  167. package/src/tests/router.test.ts +0 -254
  168. package/src/tests/stream-decoder.test.ts +0 -263
  169. package/src/tests/structured-output.test.ts +0 -1450
  170. package/src/tests/thinking.test.ts +0 -65
  171. package/src/tests/tools.test.ts +0 -175
  172. package/src/thinking.ts +0 -73
  173. package/src/tools.ts +0 -246
  174. package/src/zod-adapter.ts +0 -72
@@ -1,8 +0,0 @@
1
- /**
2
- * Universal LLM Client v3 — Provider Barrel Export
3
- */
4
-
5
- export { OllamaClient } from './ollama.js';
6
- export { OpenAICompatibleClient } from './openai.js';
7
- export { GoogleClient } from './google.js';
8
- export { AnthropicClient } from './anthropic.js';
@@ -1,503 +0,0 @@
1
- /**
2
- * Universal LLM Client v3 — Ollama Provider
3
- *
4
- * Implements BaseLLMClient for Ollama's native API.
5
- * Supports chat, streaming (NDJSON), embeddings, model discovery,
6
- * context length detection via /api/show, and structured output.
7
- *
8
- * Structured Output Assertions:
9
- * - VAL-PROVIDER-OLLAMA-001: format parameter with JSON Schema
10
- * - VAL-PROVIDER-OLLAMA-003: Vision with base64 extraction alongside format
11
- * - VAL-PROVIDER-OLLAMA-004: format "json" vs schema modes
12
- */
13
-
14
- import { BaseLLMClient } from '../client.js';
15
- import { resolveThinking } from '../thinking.js';
16
- import { httpRequest, httpStream, parseNDJSON, buildHeaders } from '../http.js';
17
- import { StandardChatDecoder } from '../stream-decoder.js';
18
- import {
19
- normalizeJsonSchema,
20
- getJsonSchemaFromConfig,
21
- } from '../structured-output.js';
22
- import { extractGemmaThoughtChannels } from '../gemma-channel.js';
23
- import type {
24
- LLMClientOptions,
25
- LLMChatMessage,
26
- LLMChatResponse,
27
- ChatOptions,
28
- ModelMetadata,
29
- OllamaResponse,
30
- OllamaModelInfo,
31
- LLMToolDefinition,
32
- LLMToolCall,
33
- TokenUsageInfo,
34
- } from '../interfaces.js';
35
- import type { DecodedEvent } from '../stream-decoder.js';
36
- import type { Auditor } from '../auditor.js';
37
-
38
- export class OllamaClient extends BaseLLMClient {
39
- constructor(options: LLMClientOptions, auditor?: Auditor) {
40
- super({
41
- ...options,
42
- url: (options.url || 'http://localhost:11434').replace(/\/+$/, ''),
43
- }, auditor);
44
- }
45
-
46
- // ========================================================================
47
- // Chat
48
- // ========================================================================
49
-
50
- async chat(
51
- messages: LLMChatMessage[],
52
- options?: ChatOptions,
53
- ): Promise<LLMChatResponse> {
54
- // Structured output and tools can now be used together.\n // The provider sends both format and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
55
-
56
- const url = `${this.options.url}/api/chat`;
57
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
58
-
59
- const body: Record<string, unknown> = {
60
- model: this.options.model,
61
- messages: this.convertMessages(messages),
62
- stream: false,
63
- options: this.buildOllamaOptions(options),
64
- };
65
-
66
- if (tools?.length) {
67
- body['tools'] = this.convertToolsToOllama(tools);
68
- }
69
-
70
- // Enable native thinking by default — thinking models produce better
71
- // tool selections and reasoning when allowed to think before acting.
72
- // Ollama `think` is on/off (no levels); default on for thinking models.
73
- body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
74
-
75
- // Handle structured output via format parameter
76
- const schemaOptions = this.extractSchemaOptions(options);
77
- if (schemaOptions) {
78
- body['format'] = this.buildFormatParameter(schemaOptions);
79
- } else if (options?.responseFormat) {
80
- // Legacy json_object mode - map to Ollama's "json" format
81
- body['format'] = 'json';
82
- }
83
-
84
- const start = Date.now();
85
- this.auditor.record({
86
- timestamp: start,
87
- type: 'request',
88
- provider: 'ollama',
89
- model: this.options.model,
90
- });
91
-
92
- const response = await httpRequest<OllamaResponse>(url, {
93
- method: 'POST',
94
- headers: buildHeaders(this.options),
95
- body,
96
- timeout: this.options.timeout ?? 30000,
97
- });
98
-
99
- const data = response.data;
100
- const usage: TokenUsageInfo | undefined = (data.prompt_eval_count || data.eval_count)
101
- ? {
102
- inputTokens: data.prompt_eval_count ?? 0,
103
- outputTokens: data.eval_count ?? 0,
104
- totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
105
- // Ollama reports server-precise timing in nanoseconds.
106
- durationMs: data.total_duration ? data.total_duration / 1e6 : undefined,
107
- tokensPerSecond: data.eval_duration && data.eval_count
108
- ? data.eval_count / (data.eval_duration / 1e9)
109
- : undefined,
110
- }
111
- : undefined;
112
-
113
- // Normalize tool calls (Ollama sometimes omits IDs and empty args).
114
- const toolCalls = data.message.tool_calls?.map(tc => this.normalizeToolCall(tc));
115
-
116
- const gemmaContent = extractGemmaThoughtChannels(data.message.content || '');
117
- const reasoning = [data.message.thinking, gemmaContent.reasoning].filter(Boolean).join('\n\n') || undefined;
118
-
119
- const result: LLMChatResponse = {
120
- message: {
121
- role: 'assistant',
122
- content: gemmaContent.content,
123
- tool_calls: toolCalls,
124
- },
125
- finishReason: data.done_reason,
126
- reasoning,
127
- usage,
128
- provider: 'ollama',
129
- };
130
-
131
- this.auditor.record({
132
- timestamp: Date.now(),
133
- type: 'response',
134
- provider: 'ollama',
135
- model: this.options.model,
136
- duration: Date.now() - start,
137
- usage,
138
- });
139
-
140
- return result;
141
- }
142
-
143
- // ========================================================================
144
- // Streaming
145
- // ========================================================================
146
-
147
- async *chatStream(
148
- messages: LLMChatMessage[],
149
- options?: ChatOptions,
150
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
151
- const url = `${this.options.url}/api/chat`;
152
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
153
-
154
- const body: Record<string, unknown> = {
155
- model: this.options.model,
156
- messages: this.convertMessages(messages),
157
- stream: true,
158
- options: this.buildOllamaOptions(options),
159
- };
160
-
161
- if (tools?.length) {
162
- body['tools'] = this.convertToolsToOllama(tools);
163
- }
164
-
165
- // Ollama `think` is on/off (no levels); default on for thinking models.
166
- body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
167
-
168
- const start = Date.now();
169
- this.auditor.record({
170
- timestamp: start,
171
- type: 'stream_start',
172
- provider: 'ollama',
173
- model: this.options.model,
174
- });
175
-
176
- const decoderEvents: DecodedEvent[] = [];
177
- const decoder = new StandardChatDecoder(event => decoderEvents.push(event));
178
- let lastResponse: OllamaResponse | undefined;
179
- const streamedToolCalls: import('../interfaces.js').LLMToolCall[] = [];
180
-
181
- // Stream idle timeout: thinking models can pause for minutes between chunks.
182
- // Ensure at least 5 minutes regardless of the base request timeout.
183
- const streamTimeout = Math.max(this.options.timeout ?? 300000, 300000);
184
-
185
- const stream = httpStream(url, {
186
- method: 'POST',
187
- headers: buildHeaders(this.options),
188
- body,
189
- timeout: streamTimeout,
190
- });
191
-
192
- for await (const chunk of parseNDJSON<OllamaResponse>(stream)) {
193
- lastResponse = chunk;
194
-
195
- if (chunk.message?.thinking) {
196
- decoder.pushReasoning(chunk.message.thinking);
197
- const pending = decoderEvents.splice(0);
198
- for (const event of pending) {
199
- yield event;
200
- }
201
- }
202
-
203
- if (chunk.message?.content) {
204
- decoder.push(chunk.message.content);
205
- const pending = decoderEvents.splice(0);
206
- for (const event of pending) {
207
- yield event;
208
- }
209
- }
210
-
211
- if (chunk.message?.tool_calls?.length) {
212
- const normalized = chunk.message.tool_calls.map(tc => this.normalizeToolCall(tc));
213
- streamedToolCalls.push(...normalized);
214
- yield { type: 'tool_call', calls: normalized };
215
- }
216
- }
217
-
218
- decoder.flush();
219
- const pending = decoderEvents.splice(0);
220
- for (const event of pending) {
221
- yield event;
222
- }
223
-
224
- const usage: TokenUsageInfo | undefined = lastResponse?.prompt_eval_count
225
- ? {
226
- inputTokens: lastResponse.prompt_eval_count ?? 0,
227
- outputTokens: lastResponse.eval_count ?? 0,
228
- totalTokens: (lastResponse.prompt_eval_count ?? 0) + (lastResponse.eval_count ?? 0),
229
- durationMs: lastResponse.total_duration ? lastResponse.total_duration / 1e6 : undefined,
230
- tokensPerSecond: lastResponse.eval_duration && lastResponse.eval_count
231
- ? lastResponse.eval_count / (lastResponse.eval_duration / 1e9)
232
- : undefined,
233
- }
234
- : undefined;
235
-
236
- this.auditor.record({
237
- timestamp: Date.now(),
238
- type: 'stream_end',
239
- provider: 'ollama',
240
- model: this.options.model,
241
- duration: Date.now() - start,
242
- usage,
243
- });
244
-
245
- return {
246
- message: {
247
- role: 'assistant',
248
- content: decoder.getCleanContent(),
249
- tool_calls: streamedToolCalls.length > 0 ? streamedToolCalls : undefined,
250
- },
251
- finishReason: lastResponse?.done_reason,
252
- reasoning: decoder.getReasoning(),
253
- usage,
254
- provider: 'ollama',
255
- };
256
- }
257
-
258
- private normalizeToolCall(
259
- toolCall: Partial<LLMToolCall> & { function?: Partial<LLMToolCall['function']> },
260
- ): LLMToolCall {
261
- return {
262
- ...toolCall,
263
- id: toolCall.id || this.generateToolCallId(),
264
- type: 'function',
265
- function: {
266
- ...toolCall.function,
267
- name: toolCall.function?.name || '',
268
- arguments: this.normalizeToolArguments(toolCall.function?.arguments),
269
- },
270
- };
271
- }
272
-
273
- private normalizeToolArguments(args: unknown): string {
274
- if (typeof args === 'string') {
275
- return args.trim().length > 0 ? args : '{}';
276
- }
277
- if (args == null) {
278
- return '{}';
279
- }
280
- return JSON.stringify(args) ?? '{}';
281
- }
282
-
283
- // ========================================================================
284
- // Embeddings
285
- // ========================================================================
286
-
287
- async embed(text: string): Promise<number[]> {
288
- const url = `${this.options.url}/api/embed`;
289
- const response = await httpRequest<{ embeddings: number[][] }>(url, {
290
- method: 'POST',
291
- headers: buildHeaders(this.options),
292
- body: { model: this.options.model, input: text },
293
- timeout: this.options.timeout ?? 30000,
294
- });
295
- return response.data.embeddings[0] ?? [];
296
- }
297
-
298
- override async embedArray(texts: string[]): Promise<number[][]> {
299
- const url = `${this.options.url}/api/embed`;
300
- const response = await httpRequest<{ embeddings: number[][] }>(url, {
301
- method: 'POST',
302
- headers: buildHeaders(this.options),
303
- body: { model: this.options.model, input: texts },
304
- timeout: this.options.timeout ?? 30000,
305
- });
306
- return response.data.embeddings;
307
- }
308
-
309
- // ========================================================================
310
- // Model Discovery
311
- // ========================================================================
312
-
313
- async getModels(): Promise<string[]> {
314
- const url = `${this.options.url}/api/tags`;
315
- const response = await httpRequest<{ models: OllamaModelInfo[] }>(url, {
316
- timeout: 5000,
317
- });
318
- return response.data.models.map(m => m.name);
319
- }
320
-
321
- override async getModelInfo(modelName?: string): Promise<ModelMetadata> {
322
- const url = `${this.options.url}/api/show`;
323
- try {
324
- const targetModel = modelName ?? this.options.model;
325
- const response = await httpRequest<Record<string, unknown>>(url, {
326
- method: 'POST',
327
- body: { name: targetModel },
328
- timeout: 5000,
329
- });
330
-
331
- const modelInfo = response.data['model_info'] as Record<string, unknown> | undefined;
332
- if (!modelInfo) return { contextLength: 8192 };
333
-
334
- // Extract architecture-specific context length
335
- const arch = modelInfo['general.architecture'] as string | undefined;
336
- let contextLength = 8192;
337
-
338
- if (arch) {
339
- const ctxKey = `${arch}.context_length`;
340
- const ctxValue = modelInfo[ctxKey] as number | undefined;
341
- if (ctxValue) contextLength = ctxValue;
342
- }
343
-
344
- // Prefer the live deployment context when available. /api/show reports
345
- // the trained maximum; /api/ps reports what the daemon has actually loaded.
346
- try {
347
- const psResponse = await httpRequest<{ models?: Array<{ name?: string; context_length?: number }> }>(
348
- `${this.options.url}/api/ps`,
349
- { timeout: 5000 },
350
- );
351
- const liveModel = psResponse.data.models?.find(
352
- model => model.name?.toLowerCase() === targetModel.toLowerCase(),
353
- );
354
- if (liveModel?.context_length && liveModel.context_length > 0) {
355
- contextLength = Math.min(contextLength, liveModel.context_length);
356
- }
357
- } catch {
358
- // Ignore /api/ps failures — /api/show is still a valid fallback
359
- }
360
-
361
- const paramCountRaw = modelInfo['general.parameter_count'] as number | undefined;
362
- const capabilities = response.data['capabilities'] as string[] | undefined;
363
-
364
- return {
365
- model: targetModel,
366
- contextLength,
367
- architecture: arch,
368
- parameterCount: paramCountRaw,
369
- capabilities,
370
- };
371
- } catch {
372
- return { contextLength: 8192 };
373
- }
374
- }
375
-
376
- // ========================================================================
377
- // Readiness
378
- // ========================================================================
379
-
380
- /** Ensure model is available, pull if missing */
381
- async ensureReady(): Promise<void> {
382
- try {
383
- await this.getModelInfo();
384
- } catch {
385
- // Try pulling the model
386
- this.debugLog(`Model not found, attempting pull: ${this.options.model}`);
387
- await httpRequest(`${this.options.url}/api/pull`, {
388
- method: 'POST',
389
- body: { name: this.options.model },
390
- timeout: 300000, // 5 min for pull
391
- });
392
- }
393
- }
394
-
395
- // ========================================================================
396
- // Internals
397
- // ========================================================================
398
-
399
- private convertMessages(messages: LLMChatMessage[]): Record<string, unknown>[] {
400
- return messages.map(msg => {
401
- const converted: Record<string, unknown> = { role: msg.role };
402
-
403
- // Handle multimodal content (array of text + image parts)
404
- if (Array.isArray(msg.content)) {
405
- const textParts: string[] = [];
406
- const images: string[] = [];
407
-
408
- for (const part of msg.content) {
409
- if (part.type === 'text') {
410
- textParts.push(part.text);
411
- } else if (part.type === 'audio') {
412
- this.debugLog('Ollama: skipping audio content (not supported)');
413
- } else if (part.type === 'image_url' && part.image_url?.url) {
414
- // Extract base64 data from data URL or use raw base64
415
- const url = part.image_url.url;
416
- if (url.startsWith('data:')) {
417
- // data:image/jpeg;base64,XXXX → extract XXXX
418
- const base64Data = url.split(',')[1];
419
- if (base64Data) images.push(base64Data);
420
- } else if (url.startsWith('http')) {
421
- // Ollama doesn't support URLs directly — skip
422
- // (caller should download and convert to base64)
423
- this.debugLog('Ollama vision: skipping URL image, use base64 instead');
424
- } else {
425
- // Assume raw base64
426
- images.push(url);
427
- }
428
- }
429
- }
430
-
431
- converted['content'] = textParts.join('\n');
432
- if (images.length > 0) {
433
- converted['images'] = images;
434
- }
435
- } else {
436
- converted['content'] = msg.content ?? '';
437
- }
438
-
439
- // Ollama needs tool call arguments as objects, not strings
440
- if (msg.tool_calls?.length) {
441
- converted['tool_calls'] = msg.tool_calls.map(tc => ({
442
- ...tc,
443
- function: {
444
- ...tc.function,
445
- arguments: typeof tc.function.arguments === 'string'
446
- ? (() => { try { return JSON.parse(tc.function.arguments); } catch { return tc.function.arguments; } })()
447
- : tc.function.arguments,
448
- },
449
- }));
450
- }
451
-
452
- // Preserve tool_call_id for tool result messages
453
- if (msg.tool_call_id) {
454
- converted['tool_call_id'] = msg.tool_call_id;
455
- }
456
-
457
- return converted;
458
- });
459
- }
460
-
461
- private convertToolsToOllama(tools: LLMToolDefinition[]): unknown[] {
462
- return tools.map(t => ({
463
- type: 'function',
464
- function: {
465
- name: t.function.name,
466
- description: t.function.description,
467
- parameters: t.function.parameters,
468
- },
469
- }));
470
- }
471
-
472
- private buildOllamaOptions(options?: ChatOptions): Record<string, unknown> {
473
- const params: Record<string, unknown> = {
474
- ...this.options.defaultParameters,
475
- ...options?.parameters,
476
- };
477
- if (options?.temperature !== undefined) params['temperature'] = options.temperature;
478
- if (options?.maxTokens !== undefined) params['num_predict'] = options.maxTokens;
479
- return params;
480
- }
481
-
482
- // ========================================================================
483
- // Structured Output Helpers
484
- // ========================================================================
485
-
486
- /**
487
- * Build Ollama format parameter from schema options.
488
- * Ollama accepts:
489
- * - format: "json" for simple JSON mode
490
- * - format: { ...schema } for structured output with JSON Schema
491
- */
492
- private buildFormatParameter(options: { schemaConfig?: import('../structured-output.js').SchemaConfig<unknown>, jsonSchema?: import('../structured-output.js').JSONSchema }): string | import('../structured-output.js').JSONSchema {
493
- if (options.jsonSchema) {
494
- return normalizeJsonSchema(options.jsonSchema);
495
- }
496
-
497
- if (options.schemaConfig) {
498
- return getJsonSchemaFromConfig(options.schemaConfig);
499
- }
500
-
501
- return 'json';
502
- }
503
- }