universal-llm-client 4.3.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -19
- package/README.md +62 -11
- package/dist/ai-model.d.ts +12 -2
- package/dist/ai-model.js +36 -2
- package/dist/auditor.d.ts +0 -1
- package/dist/auditor.js +0 -1
- package/dist/client.d.ts +0 -1
- package/dist/client.js +0 -1
- package/dist/gemma-channel.d.ts +13 -0
- package/dist/gemma-channel.js +37 -0
- package/dist/gemma-diffusion.d.ts +48 -0
- package/dist/gemma-diffusion.js +146 -0
- package/dist/http.d.ts +4 -1
- package/dist/http.js +14 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +4 -1
- package/dist/interfaces.d.ts +163 -8
- package/dist/interfaces.js +0 -1
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +0 -1
- package/dist/providers/anthropic.d.ts +0 -1
- package/dist/providers/anthropic.js +28 -4
- package/dist/providers/google.d.ts +22 -2
- package/dist/providers/google.js +223 -14
- package/dist/providers/index.d.ts +0 -1
- package/dist/providers/index.js +0 -1
- package/dist/providers/ollama.d.ts +2 -1
- package/dist/providers/ollama.js +59 -31
- package/dist/providers/openai.d.ts +16 -1
- package/dist/providers/openai.js +488 -81
- package/dist/router.d.ts +2 -1
- package/dist/router.js +4 -1
- package/dist/stream-decoder.d.ts +12 -1
- package/dist/stream-decoder.js +182 -6
- package/dist/structured-output.d.ts +0 -1
- package/dist/structured-output.js +0 -1
- package/dist/thinking.d.ts +35 -0
- package/dist/thinking.js +51 -0
- package/dist/tools.d.ts +0 -1
- package/dist/tools.js +0 -1
- package/dist/zod-adapter.d.ts +0 -1
- package/dist/zod-adapter.js +0 -1
- package/package.json +3 -1
- package/dist/ai-model.d.ts.map +0 -1
- package/dist/ai-model.js.map +0 -1
- package/dist/auditor.d.ts.map +0 -1
- package/dist/auditor.js.map +0 -1
- package/dist/client.d.ts.map +0 -1
- package/dist/client.js.map +0 -1
- package/dist/http.d.ts.map +0 -1
- package/dist/http.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/interfaces.d.ts.map +0 -1
- package/dist/interfaces.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/providers/anthropic.d.ts.map +0 -1
- package/dist/providers/anthropic.js.map +0 -1
- package/dist/providers/google.d.ts.map +0 -1
- package/dist/providers/google.js.map +0 -1
- package/dist/providers/index.d.ts.map +0 -1
- package/dist/providers/index.js.map +0 -1
- package/dist/providers/ollama.d.ts.map +0 -1
- package/dist/providers/ollama.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/router.d.ts.map +0 -1
- package/dist/router.js.map +0 -1
- package/dist/stream-decoder.d.ts.map +0 -1
- package/dist/stream-decoder.js.map +0 -1
- package/dist/structured-output.d.ts.map +0 -1
- package/dist/structured-output.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/zod-adapter.d.ts.map +0 -1
- package/dist/zod-adapter.js.map +0 -1
package/dist/providers/google.js
CHANGED
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
* streaming, embeddings, and system prompt handling.
|
|
7
7
|
*/
|
|
8
8
|
import { BaseLLMClient } from '../client.js';
|
|
9
|
-
import {
|
|
9
|
+
import { resolveThinking, geminiThinkingBudget } from '../thinking.js';
|
|
10
|
+
import { httpRequest, httpStream, parseSSE } from '../http.js';
|
|
10
11
|
import { StandardChatDecoder } from '../stream-decoder.js';
|
|
11
12
|
import { normalizeJsonSchema, stripUnsupportedFeatures, getJsonSchemaFromConfig, } from '../structured-output.js';
|
|
12
13
|
export class GoogleClient extends BaseLLMClient {
|
|
@@ -132,6 +133,7 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
132
133
|
});
|
|
133
134
|
// Google streams SSE with JSON payloads
|
|
134
135
|
let buffer = '';
|
|
136
|
+
let reasoningBuffer = '';
|
|
135
137
|
for await (const chunk of stream) {
|
|
136
138
|
buffer += chunk;
|
|
137
139
|
// Google SSE uses "data: " prefix
|
|
@@ -159,8 +161,14 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
159
161
|
continue;
|
|
160
162
|
for (const part of candidate.content.parts) {
|
|
161
163
|
if (part.text) {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
+
if (part.thought) {
|
|
165
|
+
reasoningBuffer += part.text;
|
|
166
|
+
yield { type: 'thinking', content: part.text };
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
decoder.push(part.text);
|
|
170
|
+
yield { type: 'text', content: part.text };
|
|
171
|
+
}
|
|
164
172
|
}
|
|
165
173
|
if (part.functionCall) {
|
|
166
174
|
const toolCall = this.convertFunctionCallToToolCall(part.functionCall, part.thoughtSignature);
|
|
@@ -189,12 +197,170 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
189
197
|
content: decoder.getCleanContent(),
|
|
190
198
|
tool_calls: allToolCalls.length > 0 ? allToolCalls : undefined,
|
|
191
199
|
},
|
|
192
|
-
reasoning: decoder.getReasoning(),
|
|
200
|
+
reasoning: reasoningBuffer || decoder.getReasoning(),
|
|
193
201
|
usage,
|
|
194
202
|
provider: this.isVertex ? 'vertex' : 'google',
|
|
195
203
|
};
|
|
196
204
|
}
|
|
197
205
|
// ========================================================================
|
|
206
|
+
// Deep Research (Gemini interactions API)
|
|
207
|
+
// ========================================================================
|
|
208
|
+
/** Deep Research is available via Google AI Studio only (not Vertex AI). */
|
|
209
|
+
supportsDeepResearch() {
|
|
210
|
+
return !this.isVertex;
|
|
211
|
+
}
|
|
212
|
+
interactionsBase() {
|
|
213
|
+
if (this.isVertex) {
|
|
214
|
+
throw new Error('Deep Research is only available via Google AI Studio, not Vertex AI.');
|
|
215
|
+
}
|
|
216
|
+
return `https://generativelanguage.googleapis.com/${this.apiVersion}/interactions`;
|
|
217
|
+
}
|
|
218
|
+
deepResearchHeaders() {
|
|
219
|
+
return {
|
|
220
|
+
'Content-Type': 'application/json',
|
|
221
|
+
'x-goog-api-key': this.options.apiKey ?? '',
|
|
222
|
+
'Api-Revision': '2026-05-20',
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
buildInteractionBody(input, opts, background) {
|
|
226
|
+
return {
|
|
227
|
+
input,
|
|
228
|
+
agent: opts.agent ?? 'deep-research-preview-04-2026',
|
|
229
|
+
background,
|
|
230
|
+
agent_config: {
|
|
231
|
+
type: 'deep-research',
|
|
232
|
+
thinking_summaries: opts.thinkingSummaries ?? 'auto',
|
|
233
|
+
},
|
|
234
|
+
...(opts.tools?.length ? { tools: opts.tools.map(t => ({ type: t })) } : {}),
|
|
235
|
+
...(opts.previousInteractionId ? { previous_interaction_id: opts.previousInteractionId } : {}),
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
toDeepResearchResult(i) {
|
|
239
|
+
const obj = i ?? {};
|
|
240
|
+
const steps = obj['steps'];
|
|
241
|
+
let report = (obj['output_text'] ?? obj['outputText'] ?? obj['output']);
|
|
242
|
+
// Some responses carry the final report only inside the steps' content
|
|
243
|
+
// blocks (the last step is typically the answer) — concatenate text there.
|
|
244
|
+
if (!report && Array.isArray(steps)) {
|
|
245
|
+
const text = steps
|
|
246
|
+
.flatMap(s => (Array.isArray(s.content) ? s.content : []))
|
|
247
|
+
.map(c => (c && typeof c === 'object' && typeof c.text === 'string'
|
|
248
|
+
? c.text
|
|
249
|
+
: ''))
|
|
250
|
+
.filter(Boolean)
|
|
251
|
+
.join('\n\n');
|
|
252
|
+
report = text || undefined;
|
|
253
|
+
}
|
|
254
|
+
return {
|
|
255
|
+
id: obj['id'] ?? '',
|
|
256
|
+
status: obj['status'] ?? 'in_progress',
|
|
257
|
+
report,
|
|
258
|
+
steps,
|
|
259
|
+
error: obj['error'],
|
|
260
|
+
raw: obj,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
/** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
|
|
264
|
+
async drRequest(url, init, retries = 3) {
|
|
265
|
+
let lastErr;
|
|
266
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
267
|
+
try {
|
|
268
|
+
const res = await httpRequest(url, init);
|
|
269
|
+
return res.data;
|
|
270
|
+
}
|
|
271
|
+
catch (e) {
|
|
272
|
+
lastErr = e;
|
|
273
|
+
if (attempt < retries)
|
|
274
|
+
await this.delay(1500 * (attempt + 1), init.signal);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
throw lastErr;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Run an agentic Deep Research interaction: create it, then poll until it
|
|
281
|
+
* completes/fails or the timeout elapses. Returns the final report + steps.
|
|
282
|
+
*/
|
|
283
|
+
async deepResearch(input, opts = {}) {
|
|
284
|
+
const base = this.interactionsBase();
|
|
285
|
+
const headers = this.deepResearchHeaders();
|
|
286
|
+
const pollInterval = opts.pollIntervalMs ?? 5000;
|
|
287
|
+
const deadline = Date.now() + (opts.timeoutMs ?? 600_000);
|
|
288
|
+
let interaction = await this.drRequest(base, {
|
|
289
|
+
method: 'POST',
|
|
290
|
+
headers,
|
|
291
|
+
body: this.buildInteractionBody(input, opts, true),
|
|
292
|
+
timeout: this.options.timeout ?? 60_000,
|
|
293
|
+
signal: opts.signal,
|
|
294
|
+
});
|
|
295
|
+
const id = interaction?.['id'];
|
|
296
|
+
if (!id)
|
|
297
|
+
return this.toDeepResearchResult(interaction);
|
|
298
|
+
while ((interaction?.['status'] ?? 'in_progress') === 'in_progress') {
|
|
299
|
+
if (Date.now() > deadline)
|
|
300
|
+
break;
|
|
301
|
+
await this.delay(pollInterval, opts.signal);
|
|
302
|
+
try {
|
|
303
|
+
interaction = await this.drRequest(`${base}/${id}`, { method: 'GET', headers, timeout: this.options.timeout ?? 60_000, signal: opts.signal }, 2);
|
|
304
|
+
}
|
|
305
|
+
catch {
|
|
306
|
+
// Tolerate transient errors during a long poll; keep trying until the deadline.
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
return this.toDeepResearchResult(interaction);
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Stream a Deep Research interaction's intermediate updates (`step.delta`
|
|
313
|
+
* thought/text/image events) and return the final result. Best-effort:
|
|
314
|
+
* falls back to the created interaction object if the stream ends early.
|
|
315
|
+
*/
|
|
316
|
+
async *deepResearchStream(input, opts = {}) {
|
|
317
|
+
const base = this.interactionsBase();
|
|
318
|
+
const headers = this.deepResearchHeaders();
|
|
319
|
+
// Streaming long-running research requires background:true AND stream:true
|
|
320
|
+
// in the create body (per the Deep Research Interactions API docs).
|
|
321
|
+
const stream = httpStream(base, {
|
|
322
|
+
method: 'POST',
|
|
323
|
+
headers,
|
|
324
|
+
body: { ...this.buildInteractionBody(input, opts, true), stream: true },
|
|
325
|
+
timeout: opts.timeoutMs ?? 600_000,
|
|
326
|
+
signal: opts.signal,
|
|
327
|
+
});
|
|
328
|
+
let last;
|
|
329
|
+
for await (const { data } of parseSSE(stream)) {
|
|
330
|
+
if (!data || data === '[DONE]')
|
|
331
|
+
continue;
|
|
332
|
+
let parsed;
|
|
333
|
+
try {
|
|
334
|
+
parsed = JSON.parse(data);
|
|
335
|
+
}
|
|
336
|
+
catch {
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
last = parsed;
|
|
340
|
+
const delta = (parsed['delta'] ?? parsed['step']?.['delta']);
|
|
341
|
+
if (delta) {
|
|
342
|
+
const dtype = delta['type'];
|
|
343
|
+
if (dtype === 'thought')
|
|
344
|
+
yield { type: 'thought', content: String(delta['text'] ?? delta['content'] ?? '') };
|
|
345
|
+
else if (dtype === 'text')
|
|
346
|
+
yield { type: 'text', content: String(delta['text'] ?? delta['content'] ?? '') };
|
|
347
|
+
else if (dtype === 'image')
|
|
348
|
+
yield { type: 'image', content: delta['image'] ?? delta['content'] };
|
|
349
|
+
}
|
|
350
|
+
if (typeof parsed['status'] === 'string')
|
|
351
|
+
yield { type: 'status', status: parsed['status'] };
|
|
352
|
+
}
|
|
353
|
+
return this.toDeepResearchResult(last);
|
|
354
|
+
}
|
|
355
|
+
delay(ms, signal) {
|
|
356
|
+
return new Promise((resolve, reject) => {
|
|
357
|
+
if (signal?.aborted)
|
|
358
|
+
return reject(new Error('aborted'));
|
|
359
|
+
const t = setTimeout(resolve, ms);
|
|
360
|
+
signal?.addEventListener('abort', () => { clearTimeout(t); reject(new Error('aborted')); }, { once: true });
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
// ========================================================================
|
|
198
364
|
// Embeddings
|
|
199
365
|
// ========================================================================
|
|
200
366
|
async embed(text) {
|
|
@@ -268,8 +434,29 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
268
434
|
config['temperature'] = options.temperature;
|
|
269
435
|
if (options?.maxTokens !== undefined)
|
|
270
436
|
config['maxOutputTokens'] = options.maxTokens;
|
|
271
|
-
|
|
272
|
-
|
|
437
|
+
// Unified thinking flag → Gemini thinkingConfig. Per-call overrides model
|
|
438
|
+
// config. Gemini 3.x uses `thinkingLevel`; 2.5/2.0 use `thinkingBudget`
|
|
439
|
+
// (0 = off, -1 = dynamic). `includeThoughts` surfaces the reasoning text.
|
|
440
|
+
// A user-supplied thinkingConfig (via parameters) is left untouched.
|
|
441
|
+
const thinking = resolveThinking(options?.thinking, this.options.thinking);
|
|
442
|
+
if (thinking && config['thinkingConfig'] === undefined) {
|
|
443
|
+
if (/gemini-3/i.test(this.options.model)) {
|
|
444
|
+
const tc = {};
|
|
445
|
+
if (!thinking.enabled) {
|
|
446
|
+
tc['thinkingLevel'] = 'MINIMAL';
|
|
447
|
+
}
|
|
448
|
+
else {
|
|
449
|
+
if (thinking.level)
|
|
450
|
+
tc['thinkingLevel'] = thinking.level.toUpperCase();
|
|
451
|
+
tc['includeThoughts'] = true;
|
|
452
|
+
}
|
|
453
|
+
config['thinkingConfig'] = tc;
|
|
454
|
+
}
|
|
455
|
+
else {
|
|
456
|
+
config['thinkingConfig'] = thinking.enabled
|
|
457
|
+
? { thinkingBudget: geminiThinkingBudget(thinking.level), includeThoughts: true }
|
|
458
|
+
: { thinkingBudget: 0 };
|
|
459
|
+
}
|
|
273
460
|
}
|
|
274
461
|
// Structured output: add responseMimeType and responseSchema
|
|
275
462
|
const schemaOptions = this.extractSchemaOptions(options);
|
|
@@ -346,9 +533,7 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
346
533
|
const part = {
|
|
347
534
|
functionCall: {
|
|
348
535
|
name: tc.function.name,
|
|
349
|
-
args:
|
|
350
|
-
? JSON.parse(tc.function.arguments)
|
|
351
|
-
: tc.function.arguments,
|
|
536
|
+
args: this.parseToolArguments(tc.function.arguments),
|
|
352
537
|
},
|
|
353
538
|
};
|
|
354
539
|
// Echo thought signature back (required by Gemini 3.x)
|
|
@@ -430,8 +615,8 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
430
615
|
id: this.generateToolCallId(),
|
|
431
616
|
type: 'function',
|
|
432
617
|
function: {
|
|
433
|
-
name: fc.name,
|
|
434
|
-
arguments: JSON.stringify(fc.args),
|
|
618
|
+
name: fc.name || '',
|
|
619
|
+
arguments: JSON.stringify(fc.args ?? {}),
|
|
435
620
|
},
|
|
436
621
|
};
|
|
437
622
|
if (thoughtSignature) {
|
|
@@ -439,6 +624,23 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
439
624
|
}
|
|
440
625
|
return toolCall;
|
|
441
626
|
}
|
|
627
|
+
parseToolArguments(args) {
|
|
628
|
+
if (typeof args !== 'string') {
|
|
629
|
+
return args ?? {};
|
|
630
|
+
}
|
|
631
|
+
if (args.length === 0) {
|
|
632
|
+
return {};
|
|
633
|
+
}
|
|
634
|
+
try {
|
|
635
|
+
const parsed = JSON.parse(args);
|
|
636
|
+
return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
637
|
+
? parsed
|
|
638
|
+
: {};
|
|
639
|
+
}
|
|
640
|
+
catch {
|
|
641
|
+
return {};
|
|
642
|
+
}
|
|
643
|
+
}
|
|
442
644
|
// ========================================================================
|
|
443
645
|
// Response Parsing
|
|
444
646
|
// ========================================================================
|
|
@@ -451,10 +653,17 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
451
653
|
};
|
|
452
654
|
}
|
|
453
655
|
let textContent = '';
|
|
656
|
+
let reasoningText = '';
|
|
454
657
|
const toolCalls = [];
|
|
455
658
|
for (const part of candidate.content.parts) {
|
|
456
|
-
if (part.text)
|
|
457
|
-
|
|
659
|
+
if (part.text) {
|
|
660
|
+
// Thought summaries (includeThoughts) carry the reasoning trace;
|
|
661
|
+
// keep them out of `content` and surface them as `reasoning`.
|
|
662
|
+
if (part.thought)
|
|
663
|
+
reasoningText += part.text;
|
|
664
|
+
else
|
|
665
|
+
textContent += part.text;
|
|
666
|
+
}
|
|
458
667
|
if (part.functionCall) {
|
|
459
668
|
toolCalls.push(this.convertFunctionCallToToolCall(part.functionCall, part.thoughtSignature));
|
|
460
669
|
}
|
|
@@ -474,6 +683,7 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
474
683
|
content: textContent,
|
|
475
684
|
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
476
685
|
},
|
|
686
|
+
reasoning: reasoningText || undefined,
|
|
477
687
|
usage,
|
|
478
688
|
provider: this.isVertex ? 'vertex' : 'google',
|
|
479
689
|
};
|
|
@@ -502,4 +712,3 @@ export class GoogleClient extends BaseLLMClient {
|
|
|
502
712
|
throw new Error('Unreachable');
|
|
503
713
|
}
|
|
504
714
|
}
|
|
505
|
-
//# sourceMappingURL=google.js.map
|
package/dist/providers/index.js
CHANGED
|
@@ -18,6 +18,8 @@ export declare class OllamaClient extends BaseLLMClient {
|
|
|
18
18
|
constructor(options: LLMClientOptions, auditor?: Auditor);
|
|
19
19
|
chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
|
|
20
20
|
chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
|
|
21
|
+
private normalizeToolCall;
|
|
22
|
+
private normalizeToolArguments;
|
|
21
23
|
embed(text: string): Promise<number[]>;
|
|
22
24
|
embedArray(texts: string[]): Promise<number[][]>;
|
|
23
25
|
getModels(): Promise<string[]>;
|
|
@@ -35,4 +37,3 @@ export declare class OllamaClient extends BaseLLMClient {
|
|
|
35
37
|
*/
|
|
36
38
|
private buildFormatParameter;
|
|
37
39
|
}
|
|
38
|
-
//# sourceMappingURL=ollama.d.ts.map
|
package/dist/providers/ollama.js
CHANGED
|
@@ -11,9 +11,11 @@
|
|
|
11
11
|
* - VAL-PROVIDER-OLLAMA-004: format "json" vs schema modes
|
|
12
12
|
*/
|
|
13
13
|
import { BaseLLMClient } from '../client.js';
|
|
14
|
+
import { resolveThinking } from '../thinking.js';
|
|
14
15
|
import { httpRequest, httpStream, parseNDJSON, buildHeaders } from '../http.js';
|
|
15
16
|
import { StandardChatDecoder } from '../stream-decoder.js';
|
|
16
17
|
import { normalizeJsonSchema, getJsonSchemaFromConfig, } from '../structured-output.js';
|
|
18
|
+
import { extractGemmaThoughtChannels } from '../gemma-channel.js';
|
|
17
19
|
export class OllamaClient extends BaseLLMClient {
|
|
18
20
|
constructor(options, auditor) {
|
|
19
21
|
super({
|
|
@@ -39,7 +41,8 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
39
41
|
}
|
|
40
42
|
// Enable native thinking by default — thinking models produce better
|
|
41
43
|
// tool selections and reasoning when allowed to think before acting.
|
|
42
|
-
|
|
44
|
+
// Ollama `think` is on/off (no levels); default on for thinking models.
|
|
45
|
+
body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
|
|
43
46
|
// Handle structured output via format parameter
|
|
44
47
|
const schemaOptions = this.extractSchemaOptions(options);
|
|
45
48
|
if (schemaOptions) {
|
|
@@ -68,28 +71,25 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
68
71
|
inputTokens: data.prompt_eval_count ?? 0,
|
|
69
72
|
outputTokens: data.eval_count ?? 0,
|
|
70
73
|
totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
|
|
74
|
+
// Ollama reports server-precise timing in nanoseconds.
|
|
75
|
+
durationMs: data.total_duration ? data.total_duration / 1e6 : undefined,
|
|
76
|
+
tokensPerSecond: data.eval_duration && data.eval_count
|
|
77
|
+
? data.eval_count / (data.eval_duration / 1e9)
|
|
78
|
+
: undefined,
|
|
71
79
|
}
|
|
72
80
|
: undefined;
|
|
73
|
-
// Normalize tool
|
|
74
|
-
const toolCalls = data.message.tool_calls?.map(tc => (
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
function: {
|
|
78
|
-
...tc.function,
|
|
79
|
-
arguments: typeof tc.function.arguments === 'string'
|
|
80
|
-
? tc.function.arguments
|
|
81
|
-
: JSON.stringify(tc.function.arguments),
|
|
82
|
-
},
|
|
83
|
-
}));
|
|
84
|
-
// Get content, handling potential null
|
|
85
|
-
const content = data.message.content || data.message.thinking || '';
|
|
81
|
+
// Normalize tool calls (Ollama sometimes omits IDs and empty args).
|
|
82
|
+
const toolCalls = data.message.tool_calls?.map(tc => this.normalizeToolCall(tc));
|
|
83
|
+
const gemmaContent = extractGemmaThoughtChannels(data.message.content || '');
|
|
84
|
+
const reasoning = [data.message.thinking, gemmaContent.reasoning].filter(Boolean).join('\n\n') || undefined;
|
|
86
85
|
const result = {
|
|
87
86
|
message: {
|
|
88
87
|
role: 'assistant',
|
|
89
|
-
content,
|
|
88
|
+
content: gemmaContent.content,
|
|
90
89
|
tool_calls: toolCalls,
|
|
91
90
|
},
|
|
92
|
-
|
|
91
|
+
finishReason: data.done_reason,
|
|
92
|
+
reasoning,
|
|
93
93
|
usage,
|
|
94
94
|
provider: 'ollama',
|
|
95
95
|
};
|
|
@@ -118,7 +118,8 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
118
118
|
if (tools?.length) {
|
|
119
119
|
body['tools'] = this.convertToolsToOllama(tools);
|
|
120
120
|
}
|
|
121
|
-
|
|
121
|
+
// Ollama `think` is on/off (no levels); default on for thinking models.
|
|
122
|
+
body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
|
|
122
123
|
const start = Date.now();
|
|
123
124
|
this.auditor.record({
|
|
124
125
|
timestamp: start,
|
|
@@ -126,7 +127,8 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
126
127
|
provider: 'ollama',
|
|
127
128
|
model: this.options.model,
|
|
128
129
|
});
|
|
129
|
-
const
|
|
130
|
+
const decoderEvents = [];
|
|
131
|
+
const decoder = new StandardChatDecoder(event => decoderEvents.push(event));
|
|
130
132
|
let lastResponse;
|
|
131
133
|
const streamedToolCalls = [];
|
|
132
134
|
// Stream idle timeout: thinking models can pause for minutes between chunks.
|
|
@@ -142,33 +144,38 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
142
144
|
lastResponse = chunk;
|
|
143
145
|
if (chunk.message?.thinking) {
|
|
144
146
|
decoder.pushReasoning(chunk.message.thinking);
|
|
145
|
-
|
|
147
|
+
const pending = decoderEvents.splice(0);
|
|
148
|
+
for (const event of pending) {
|
|
149
|
+
yield event;
|
|
150
|
+
}
|
|
146
151
|
}
|
|
147
152
|
if (chunk.message?.content) {
|
|
148
153
|
decoder.push(chunk.message.content);
|
|
149
|
-
|
|
154
|
+
const pending = decoderEvents.splice(0);
|
|
155
|
+
for (const event of pending) {
|
|
156
|
+
yield event;
|
|
157
|
+
}
|
|
150
158
|
}
|
|
151
159
|
if (chunk.message?.tool_calls?.length) {
|
|
152
|
-
const normalized = chunk.message.tool_calls.map(tc => (
|
|
153
|
-
...tc,
|
|
154
|
-
id: tc.id || this.generateToolCallId(),
|
|
155
|
-
function: {
|
|
156
|
-
...tc.function,
|
|
157
|
-
arguments: typeof tc.function.arguments === 'string'
|
|
158
|
-
? tc.function.arguments
|
|
159
|
-
: JSON.stringify(tc.function.arguments),
|
|
160
|
-
},
|
|
161
|
-
}));
|
|
160
|
+
const normalized = chunk.message.tool_calls.map(tc => this.normalizeToolCall(tc));
|
|
162
161
|
streamedToolCalls.push(...normalized);
|
|
163
162
|
yield { type: 'tool_call', calls: normalized };
|
|
164
163
|
}
|
|
165
164
|
}
|
|
166
165
|
decoder.flush();
|
|
166
|
+
const pending = decoderEvents.splice(0);
|
|
167
|
+
for (const event of pending) {
|
|
168
|
+
yield event;
|
|
169
|
+
}
|
|
167
170
|
const usage = lastResponse?.prompt_eval_count
|
|
168
171
|
? {
|
|
169
172
|
inputTokens: lastResponse.prompt_eval_count ?? 0,
|
|
170
173
|
outputTokens: lastResponse.eval_count ?? 0,
|
|
171
174
|
totalTokens: (lastResponse.prompt_eval_count ?? 0) + (lastResponse.eval_count ?? 0),
|
|
175
|
+
durationMs: lastResponse.total_duration ? lastResponse.total_duration / 1e6 : undefined,
|
|
176
|
+
tokensPerSecond: lastResponse.eval_duration && lastResponse.eval_count
|
|
177
|
+
? lastResponse.eval_count / (lastResponse.eval_duration / 1e9)
|
|
178
|
+
: undefined,
|
|
172
179
|
}
|
|
173
180
|
: undefined;
|
|
174
181
|
this.auditor.record({
|
|
@@ -185,11 +192,33 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
185
192
|
content: decoder.getCleanContent(),
|
|
186
193
|
tool_calls: streamedToolCalls.length > 0 ? streamedToolCalls : undefined,
|
|
187
194
|
},
|
|
195
|
+
finishReason: lastResponse?.done_reason,
|
|
188
196
|
reasoning: decoder.getReasoning(),
|
|
189
197
|
usage,
|
|
190
198
|
provider: 'ollama',
|
|
191
199
|
};
|
|
192
200
|
}
|
|
201
|
+
normalizeToolCall(toolCall) {
|
|
202
|
+
return {
|
|
203
|
+
...toolCall,
|
|
204
|
+
id: toolCall.id || this.generateToolCallId(),
|
|
205
|
+
type: 'function',
|
|
206
|
+
function: {
|
|
207
|
+
...toolCall.function,
|
|
208
|
+
name: toolCall.function?.name || '',
|
|
209
|
+
arguments: this.normalizeToolArguments(toolCall.function?.arguments),
|
|
210
|
+
},
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
normalizeToolArguments(args) {
|
|
214
|
+
if (typeof args === 'string') {
|
|
215
|
+
return args.trim().length > 0 ? args : '{}';
|
|
216
|
+
}
|
|
217
|
+
if (args == null) {
|
|
218
|
+
return '{}';
|
|
219
|
+
}
|
|
220
|
+
return JSON.stringify(args) ?? '{}';
|
|
221
|
+
}
|
|
193
222
|
// ========================================================================
|
|
194
223
|
// Embeddings
|
|
195
224
|
// ========================================================================
|
|
@@ -397,4 +426,3 @@ export class OllamaClient extends BaseLLMClient {
|
|
|
397
426
|
return 'json';
|
|
398
427
|
}
|
|
399
428
|
}
|
|
400
|
-
//# sourceMappingURL=ollama.js.map
|
|
@@ -9,9 +9,25 @@ import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions } f
|
|
|
9
9
|
import type { DecodedEvent } from '../stream-decoder.js';
|
|
10
10
|
import type { Auditor } from '../auditor.js';
|
|
11
11
|
export declare class OpenAICompatibleClient extends BaseLLMClient {
|
|
12
|
+
private warnedVllmToolFallback;
|
|
13
|
+
/**
|
|
14
|
+
* DiffusionGemma on trimmed vLLM builds has no server-side reasoning or
|
|
15
|
+
* tool-call parser — the native channel protocol is handled client-side
|
|
16
|
+
* (see gemma-diffusion.ts). Auto-detected from the model name; override
|
|
17
|
+
* with `gemmaNativeProtocol` in LLMClientOptions.
|
|
18
|
+
*/
|
|
19
|
+
private get gemmaNative();
|
|
20
|
+
/**
|
|
21
|
+
* Build a full endpoint URL, respecting apiBasePath (already baked into this.options.url)
|
|
22
|
+
* and any queryParams provided at the provider config level.
|
|
23
|
+
*/
|
|
24
|
+
private buildUrl;
|
|
12
25
|
constructor(options: LLMClientOptions, auditor?: Auditor);
|
|
26
|
+
private warnVllmToolFallback;
|
|
13
27
|
chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
|
|
14
28
|
chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
|
|
29
|
+
private normalizeToolCall;
|
|
30
|
+
private normalizeToolArguments;
|
|
15
31
|
embed(text: string): Promise<number[]>;
|
|
16
32
|
getModels(): Promise<string[]>;
|
|
17
33
|
private convertMessages;
|
|
@@ -21,4 +37,3 @@ export declare class OpenAICompatibleClient extends BaseLLMClient {
|
|
|
21
37
|
*/
|
|
22
38
|
private buildResponseFormat;
|
|
23
39
|
}
|
|
24
|
-
//# sourceMappingURL=openai.d.ts.map
|