universal-llm-client 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/CHANGELOG.md +139 -103
  2. package/LICENSE +21 -21
  3. package/README.md +591 -591
  4. package/dist/ai-model.js.map +1 -1
  5. package/dist/auditor.js.map +1 -1
  6. package/dist/client.js.map +1 -1
  7. package/dist/http.js.map +1 -1
  8. package/dist/index.d.ts +1 -1
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +1 -1
  11. package/dist/index.js.map +1 -1
  12. package/dist/interfaces.d.ts +20 -0
  13. package/dist/interfaces.d.ts.map +1 -1
  14. package/dist/interfaces.js.map +1 -1
  15. package/dist/mcp.js.map +1 -1
  16. package/dist/providers/anthropic.js.map +1 -1
  17. package/dist/providers/google.d.ts.map +1 -1
  18. package/dist/providers/google.js +2 -0
  19. package/dist/providers/google.js.map +1 -1
  20. package/dist/providers/index.js.map +1 -1
  21. package/dist/providers/ollama.js.map +1 -1
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/router.js.map +1 -1
  24. package/dist/stream-decoder.js.map +1 -1
  25. package/dist/structured-output.d.ts +24 -1
  26. package/dist/structured-output.d.ts.map +1 -1
  27. package/dist/structured-output.js +58 -5
  28. package/dist/structured-output.js.map +1 -1
  29. package/dist/tools.js.map +1 -1
  30. package/dist/zod-adapter.js.map +1 -1
  31. package/package.json +115 -116
  32. package/src/ai-model.ts +0 -350
  33. package/src/auditor.ts +0 -213
  34. package/src/client.ts +0 -402
  35. package/src/debug/debug-google-streaming.ts +0 -97
  36. package/src/debug/debug-tool-execution.ts +0 -86
  37. package/src/debug/test-lmstudio-tools.ts +0 -155
  38. package/src/demos/README.md +0 -47
  39. package/src/demos/basic/universal-llm-examples.ts +0 -161
  40. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  41. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  42. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  43. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  44. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  45. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  46. package/src/demos/model-alias-demo.ts +0 -0
  47. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  48. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  49. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  50. package/src/demos/tools/astrid-production-memory.ts +0 -558
  51. package/src/demos/tools/basic-translation-test.ts +0 -66
  52. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  53. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  54. package/src/demos/tools/clean-translation-test.ts +0 -119
  55. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  56. package/src/demos/tools/complete-rag-demo.ts +0 -369
  57. package/src/demos/tools/complete-tool-demo.ts +0 -132
  58. package/src/demos/tools/demo-tool-calling.ts +0 -124
  59. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  60. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  61. package/src/demos/tools/memory-integration-test.ts +0 -420
  62. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  63. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  64. package/src/demos/tools/production-tool-demo.ts +0 -245
  65. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  66. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  67. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  68. package/src/demos/tools/translation-integration-guide.ts +0 -346
  69. package/src/demos/tools/universal-memory-system.ts +0 -560
  70. package/src/http.ts +0 -247
  71. package/src/index.ts +0 -160
  72. package/src/interfaces.ts +0 -657
  73. package/src/mcp.ts +0 -345
  74. package/src/providers/anthropic.ts +0 -762
  75. package/src/providers/google.ts +0 -620
  76. package/src/providers/index.ts +0 -8
  77. package/src/providers/ollama.ts +0 -469
  78. package/src/providers/openai.ts +0 -392
  79. package/src/router.ts +0 -780
  80. package/src/stream-decoder.ts +0 -361
  81. package/src/structured-output.ts +0 -702
  82. package/src/test-scripts/test-advanced-tools.ts +0 -310
  83. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  84. package/src/test-scripts/test-google-streaming.ts +0 -63
  85. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  86. package/src/test-scripts/test-mcp-config.ts +0 -28
  87. package/src/test-scripts/test-mcp-connection.ts +0 -29
  88. package/src/test-scripts/test-system-message-positions.ts +0 -163
  89. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  90. package/src/test-scripts/test-tool-calling.ts +0 -231
  91. package/src/tests/ai-model.test.ts +0 -1614
  92. package/src/tests/auditor.test.ts +0 -224
  93. package/src/tests/http.test.ts +0 -200
  94. package/src/tests/interfaces.test.ts +0 -117
  95. package/src/tests/providers/google.test.ts +0 -660
  96. package/src/tests/providers/ollama.test.ts +0 -954
  97. package/src/tests/providers/openai.test.ts +0 -1122
  98. package/src/tests/router.test.ts +0 -254
  99. package/src/tests/stream-decoder.test.ts +0 -179
  100. package/src/tests/structured-output.test.ts +0 -1340
  101. package/src/tests/tools.test.ts +0 -175
  102. package/src/tools.ts +0 -246
  103. package/src/zod-adapter.ts +0 -72
@@ -1,469 +0,0 @@
1
- /**
2
- * Universal LLM Client v3 — Ollama Provider
3
- *
4
- * Implements BaseLLMClient for Ollama's native API.
5
- * Supports chat, streaming (NDJSON), embeddings, model discovery,
6
- * context length detection via /api/show, and structured output.
7
- *
8
- * Structured Output Assertions:
9
- * - VAL-PROVIDER-OLLAMA-001: format parameter with JSON Schema
10
- * - VAL-PROVIDER-OLLAMA-003: Vision with base64 extraction alongside format
11
- * - VAL-PROVIDER-OLLAMA-004: format "json" vs schema modes
12
- */
13
-
14
- import { BaseLLMClient } from '../client.js';
15
- import { httpRequest, httpStream, parseNDJSON, buildHeaders } from '../http.js';
16
- import { StandardChatDecoder } from '../stream-decoder.js';
17
- import {
18
- normalizeJsonSchema,
19
- getJsonSchemaFromConfig,
20
- } from '../structured-output.js';
21
- import type {
22
- LLMClientOptions,
23
- LLMChatMessage,
24
- LLMChatResponse,
25
- ChatOptions,
26
- ModelMetadata,
27
- OllamaResponse,
28
- OllamaModelInfo,
29
- LLMToolDefinition,
30
- TokenUsageInfo,
31
- } from '../interfaces.js';
32
- import type { DecodedEvent } from '../stream-decoder.js';
33
- import type { Auditor } from '../auditor.js';
34
-
35
- export class OllamaClient extends BaseLLMClient {
36
- constructor(options: LLMClientOptions, auditor?: Auditor) {
37
- super({
38
- ...options,
39
- url: (options.url || 'http://localhost:11434').replace(/\/+$/, ''),
40
- }, auditor);
41
- }
42
-
43
- // ========================================================================
44
- // Chat
45
- // ========================================================================
46
-
47
- async chat(
48
- messages: LLMChatMessage[],
49
- options?: ChatOptions,
50
- ): Promise<LLMChatResponse> {
51
- // Structured output and tools can now be used together.\n // The provider sends both format and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
52
-
53
- const url = `${this.options.url}/api/chat`;
54
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
55
-
56
- const body: Record<string, unknown> = {
57
- model: this.options.model,
58
- messages: this.convertMessages(messages),
59
- stream: false,
60
- options: this.buildOllamaOptions(options),
61
- };
62
-
63
- if (tools?.length) {
64
- body['tools'] = this.convertToolsToOllama(tools);
65
- }
66
-
67
- // Enable native thinking by default — thinking models produce better
68
- // tool selections and reasoning when allowed to think before acting.
69
- body['think'] = this.options.thinking ?? true;
70
-
71
- // Handle structured output via format parameter
72
- const schemaOptions = this.extractSchemaOptions(options);
73
- if (schemaOptions) {
74
- body['format'] = this.buildFormatParameter(schemaOptions);
75
- } else if (options?.responseFormat) {
76
- // Legacy json_object mode - map to Ollama's "json" format
77
- body['format'] = 'json';
78
- }
79
-
80
- const start = Date.now();
81
- this.auditor.record({
82
- timestamp: start,
83
- type: 'request',
84
- provider: 'ollama',
85
- model: this.options.model,
86
- });
87
-
88
- const response = await httpRequest<OllamaResponse>(url, {
89
- method: 'POST',
90
- headers: buildHeaders(this.options),
91
- body,
92
- timeout: this.options.timeout ?? 30000,
93
- });
94
-
95
- const data = response.data;
96
- const usage: TokenUsageInfo | undefined = (data.prompt_eval_count || data.eval_count)
97
- ? {
98
- inputTokens: data.prompt_eval_count ?? 0,
99
- outputTokens: data.eval_count ?? 0,
100
- totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
101
- }
102
- : undefined;
103
-
104
- // Normalize tool call IDs (Ollama sometimes omits them)
105
- const toolCalls = data.message.tool_calls?.map(tc => ({
106
- ...tc,
107
- id: tc.id || this.generateToolCallId(),
108
- function: {
109
- ...tc.function,
110
- arguments: typeof tc.function.arguments === 'string'
111
- ? tc.function.arguments
112
- : JSON.stringify(tc.function.arguments),
113
- },
114
- }));
115
-
116
- // Get content, handling potential null
117
- const content = data.message.content || data.message.thinking || '';
118
-
119
- const result: LLMChatResponse = {
120
- message: {
121
- role: 'assistant',
122
- content,
123
- tool_calls: toolCalls,
124
- },
125
- reasoning: data.message.content ? data.message.thinking : undefined,
126
- usage,
127
- provider: 'ollama',
128
- };
129
-
130
- this.auditor.record({
131
- timestamp: Date.now(),
132
- type: 'response',
133
- provider: 'ollama',
134
- model: this.options.model,
135
- duration: Date.now() - start,
136
- usage,
137
- });
138
-
139
- return result;
140
- }
141
-
142
- // ========================================================================
143
- // Streaming
144
- // ========================================================================
145
-
146
- async *chatStream(
147
- messages: LLMChatMessage[],
148
- options?: ChatOptions,
149
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
150
- const url = `${this.options.url}/api/chat`;
151
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
152
-
153
- const body: Record<string, unknown> = {
154
- model: this.options.model,
155
- messages: this.convertMessages(messages),
156
- stream: true,
157
- options: this.buildOllamaOptions(options),
158
- };
159
-
160
- if (tools?.length) {
161
- body['tools'] = this.convertToolsToOllama(tools);
162
- }
163
-
164
- body['think'] = this.options.thinking ?? true;
165
-
166
- const start = Date.now();
167
- this.auditor.record({
168
- timestamp: start,
169
- type: 'stream_start',
170
- provider: 'ollama',
171
- model: this.options.model,
172
- });
173
-
174
- const decoder = new StandardChatDecoder(() => {});
175
- let lastResponse: OllamaResponse | undefined;
176
- const streamedToolCalls: import('../interfaces.js').LLMToolCall[] = [];
177
-
178
- // Stream idle timeout: thinking models can pause for minutes between chunks.
179
- // Ensure at least 5 minutes regardless of the base request timeout.
180
- const streamTimeout = Math.max(this.options.timeout ?? 300000, 300000);
181
-
182
- const stream = httpStream(url, {
183
- method: 'POST',
184
- headers: buildHeaders(this.options),
185
- body,
186
- timeout: streamTimeout,
187
- });
188
-
189
- for await (const chunk of parseNDJSON<OllamaResponse>(stream)) {
190
- lastResponse = chunk;
191
-
192
- if (chunk.message?.thinking) {
193
- decoder.pushReasoning(chunk.message.thinking);
194
- yield { type: 'thinking', content: chunk.message.thinking };
195
- }
196
-
197
- if (chunk.message?.content) {
198
- decoder.push(chunk.message.content);
199
- yield { type: 'text', content: chunk.message.content };
200
- }
201
-
202
- if (chunk.message?.tool_calls?.length) {
203
- const normalized = chunk.message.tool_calls.map(tc => ({
204
- ...tc,
205
- id: tc.id || this.generateToolCallId(),
206
- function: {
207
- ...tc.function,
208
- arguments: typeof tc.function.arguments === 'string'
209
- ? tc.function.arguments
210
- : JSON.stringify(tc.function.arguments),
211
- },
212
- }));
213
- streamedToolCalls.push(...normalized);
214
- yield { type: 'tool_call', calls: normalized };
215
- }
216
- }
217
-
218
- decoder.flush();
219
-
220
- const usage: TokenUsageInfo | undefined = lastResponse?.prompt_eval_count
221
- ? {
222
- inputTokens: lastResponse.prompt_eval_count ?? 0,
223
- outputTokens: lastResponse.eval_count ?? 0,
224
- totalTokens: (lastResponse.prompt_eval_count ?? 0) + (lastResponse.eval_count ?? 0),
225
- }
226
- : undefined;
227
-
228
- this.auditor.record({
229
- timestamp: Date.now(),
230
- type: 'stream_end',
231
- provider: 'ollama',
232
- model: this.options.model,
233
- duration: Date.now() - start,
234
- usage,
235
- });
236
-
237
- return {
238
- message: {
239
- role: 'assistant',
240
- content: decoder.getCleanContent(),
241
- tool_calls: streamedToolCalls.length > 0 ? streamedToolCalls : undefined,
242
- },
243
- reasoning: decoder.getReasoning(),
244
- usage,
245
- provider: 'ollama',
246
- };
247
- }
248
-
249
- // ========================================================================
250
- // Embeddings
251
- // ========================================================================
252
-
253
- async embed(text: string): Promise<number[]> {
254
- const url = `${this.options.url}/api/embed`;
255
- const response = await httpRequest<{ embeddings: number[][] }>(url, {
256
- method: 'POST',
257
- headers: buildHeaders(this.options),
258
- body: { model: this.options.model, input: text },
259
- timeout: this.options.timeout ?? 30000,
260
- });
261
- return response.data.embeddings[0] ?? [];
262
- }
263
-
264
- override async embedArray(texts: string[]): Promise<number[][]> {
265
- const url = `${this.options.url}/api/embed`;
266
- const response = await httpRequest<{ embeddings: number[][] }>(url, {
267
- method: 'POST',
268
- headers: buildHeaders(this.options),
269
- body: { model: this.options.model, input: texts },
270
- timeout: this.options.timeout ?? 30000,
271
- });
272
- return response.data.embeddings;
273
- }
274
-
275
- // ========================================================================
276
- // Model Discovery
277
- // ========================================================================
278
-
279
- async getModels(): Promise<string[]> {
280
- const url = `${this.options.url}/api/tags`;
281
- const response = await httpRequest<{ models: OllamaModelInfo[] }>(url, {
282
- timeout: 5000,
283
- });
284
- return response.data.models.map(m => m.name);
285
- }
286
-
287
- override async getModelInfo(modelName?: string): Promise<ModelMetadata> {
288
- const url = `${this.options.url}/api/show`;
289
- try {
290
- const targetModel = modelName ?? this.options.model;
291
- const response = await httpRequest<Record<string, unknown>>(url, {
292
- method: 'POST',
293
- body: { name: targetModel },
294
- timeout: 5000,
295
- });
296
-
297
- const modelInfo = response.data['model_info'] as Record<string, unknown> | undefined;
298
- if (!modelInfo) return { contextLength: 8192 };
299
-
300
- // Extract architecture-specific context length
301
- const arch = modelInfo['general.architecture'] as string | undefined;
302
- let contextLength = 8192;
303
-
304
- if (arch) {
305
- const ctxKey = `${arch}.context_length`;
306
- const ctxValue = modelInfo[ctxKey] as number | undefined;
307
- if (ctxValue) contextLength = ctxValue;
308
- }
309
-
310
- // Prefer the live deployment context when available. /api/show reports
311
- // the trained maximum; /api/ps reports what the daemon has actually loaded.
312
- try {
313
- const psResponse = await httpRequest<{ models?: Array<{ name?: string; context_length?: number }> }>(
314
- `${this.options.url}/api/ps`,
315
- { timeout: 5000 },
316
- );
317
- const liveModel = psResponse.data.models?.find(
318
- model => model.name?.toLowerCase() === targetModel.toLowerCase(),
319
- );
320
- if (liveModel?.context_length && liveModel.context_length > 0) {
321
- contextLength = Math.min(contextLength, liveModel.context_length);
322
- }
323
- } catch {
324
- // Ignore /api/ps failures — /api/show is still a valid fallback
325
- }
326
-
327
- const paramCountRaw = modelInfo['general.parameter_count'] as number | undefined;
328
- const capabilities = response.data['capabilities'] as string[] | undefined;
329
-
330
- return {
331
- model: targetModel,
332
- contextLength,
333
- architecture: arch,
334
- parameterCount: paramCountRaw,
335
- capabilities,
336
- };
337
- } catch {
338
- return { contextLength: 8192 };
339
- }
340
- }
341
-
342
- // ========================================================================
343
- // Readiness
344
- // ========================================================================
345
-
346
- /** Ensure model is available, pull if missing */
347
- async ensureReady(): Promise<void> {
348
- try {
349
- await this.getModelInfo();
350
- } catch {
351
- // Try pulling the model
352
- this.debugLog(`Model not found, attempting pull: ${this.options.model}`);
353
- await httpRequest(`${this.options.url}/api/pull`, {
354
- method: 'POST',
355
- body: { name: this.options.model },
356
- timeout: 300000, // 5 min for pull
357
- });
358
- }
359
- }
360
-
361
- // ========================================================================
362
- // Internals
363
- // ========================================================================
364
-
365
- private convertMessages(messages: LLMChatMessage[]): Record<string, unknown>[] {
366
- return messages.map(msg => {
367
- const converted: Record<string, unknown> = { role: msg.role };
368
-
369
- // Handle multimodal content (array of text + image parts)
370
- if (Array.isArray(msg.content)) {
371
- const textParts: string[] = [];
372
- const images: string[] = [];
373
-
374
- for (const part of msg.content) {
375
- if (part.type === 'text') {
376
- textParts.push(part.text);
377
- } else if (part.type === 'audio') {
378
- this.debugLog('Ollama: skipping audio content (not supported)');
379
- } else if (part.type === 'image_url' && part.image_url?.url) {
380
- // Extract base64 data from data URL or use raw base64
381
- const url = part.image_url.url;
382
- if (url.startsWith('data:')) {
383
- // data:image/jpeg;base64,XXXX → extract XXXX
384
- const base64Data = url.split(',')[1];
385
- if (base64Data) images.push(base64Data);
386
- } else if (url.startsWith('http')) {
387
- // Ollama doesn't support URLs directly — skip
388
- // (caller should download and convert to base64)
389
- this.debugLog('Ollama vision: skipping URL image, use base64 instead');
390
- } else {
391
- // Assume raw base64
392
- images.push(url);
393
- }
394
- }
395
- }
396
-
397
- converted['content'] = textParts.join('\n');
398
- if (images.length > 0) {
399
- converted['images'] = images;
400
- }
401
- } else {
402
- converted['content'] = msg.content ?? '';
403
- }
404
-
405
- // Ollama needs tool call arguments as objects, not strings
406
- if (msg.tool_calls?.length) {
407
- converted['tool_calls'] = msg.tool_calls.map(tc => ({
408
- ...tc,
409
- function: {
410
- ...tc.function,
411
- arguments: typeof tc.function.arguments === 'string'
412
- ? (() => { try { return JSON.parse(tc.function.arguments); } catch { return tc.function.arguments; } })()
413
- : tc.function.arguments,
414
- },
415
- }));
416
- }
417
-
418
- // Preserve tool_call_id for tool result messages
419
- if (msg.tool_call_id) {
420
- converted['tool_call_id'] = msg.tool_call_id;
421
- }
422
-
423
- return converted;
424
- });
425
- }
426
-
427
- private convertToolsToOllama(tools: LLMToolDefinition[]): unknown[] {
428
- return tools.map(t => ({
429
- type: 'function',
430
- function: {
431
- name: t.function.name,
432
- description: t.function.description,
433
- parameters: t.function.parameters,
434
- },
435
- }));
436
- }
437
-
438
- private buildOllamaOptions(options?: ChatOptions): Record<string, unknown> {
439
- const params: Record<string, unknown> = {
440
- ...this.options.defaultParameters,
441
- ...options?.parameters,
442
- };
443
- if (options?.temperature !== undefined) params['temperature'] = options.temperature;
444
- if (options?.maxTokens !== undefined) params['num_predict'] = options.maxTokens;
445
- return params;
446
- }
447
-
448
- // ========================================================================
449
- // Structured Output Helpers
450
- // ========================================================================
451
-
452
- /**
453
- * Build Ollama format parameter from schema options.
454
- * Ollama accepts:
455
- * - format: "json" for simple JSON mode
456
- * - format: { ...schema } for structured output with JSON Schema
457
- */
458
- private buildFormatParameter(options: { schemaConfig?: import('../structured-output.js').SchemaConfig<unknown>, jsonSchema?: import('../structured-output.js').JSONSchema }): string | import('../structured-output.js').JSONSchema {
459
- if (options.jsonSchema) {
460
- return normalizeJsonSchema(options.jsonSchema);
461
- }
462
-
463
- if (options.schemaConfig) {
464
- return getJsonSchemaFromConfig(options.schemaConfig);
465
- }
466
-
467
- return 'json';
468
- }
469
- }