@providerprotocol/ai 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/anthropic/index.js +4 -2
- package/dist/anthropic/index.js.map +1 -1
- package/dist/{chunk-FTFX2VET.js → chunk-SUNYWHTH.js} +2 -89
- package/dist/chunk-SUNYWHTH.js.map +1 -0
- package/dist/chunk-X5G4EHL7.js +90 -0
- package/dist/chunk-X5G4EHL7.js.map +1 -0
- package/dist/google/index.js +4 -2
- package/dist/google/index.js.map +1 -1
- package/dist/http/index.js +5 -3
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/ollama/index.d.ts +108 -0
- package/dist/ollama/index.js +537 -0
- package/dist/ollama/index.js.map +1 -0
- package/dist/openai/index.js +4 -2
- package/dist/openai/index.js.map +1 -1
- package/dist/openrouter/index.d.ts +235 -0
- package/dist/openrouter/index.js +1342 -0
- package/dist/openrouter/index.js.map +1 -0
- package/package.json +16 -1
- package/src/ollama/index.ts +3 -0
- package/src/openrouter/index.ts +10 -0
- package/src/providers/ollama/index.ts +43 -0
- package/src/providers/ollama/llm.ts +272 -0
- package/src/providers/ollama/transform.ts +456 -0
- package/src/providers/ollama/types.ts +260 -0
- package/src/providers/openrouter/index.ts +173 -0
- package/src/providers/openrouter/llm.completions.ts +201 -0
- package/src/providers/openrouter/llm.responses.ts +211 -0
- package/src/providers/openrouter/transform.completions.ts +605 -0
- package/src/providers/openrouter/transform.responses.ts +755 -0
- package/src/providers/openrouter/types.ts +723 -0
- package/dist/chunk-FTFX2VET.js.map +0 -1
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
import type { LLMRequest, LLMResponse } from '../../types/llm.ts';
|
|
2
|
+
import type { Message } from '../../types/messages.ts';
|
|
3
|
+
import type { StreamEvent } from '../../types/stream.ts';
|
|
4
|
+
import type { Tool, ToolCall } from '../../types/tool.ts';
|
|
5
|
+
import type { TokenUsage } from '../../types/turn.ts';
|
|
6
|
+
import type { ContentBlock, TextBlock, ImageBlock } from '../../types/content.ts';
|
|
7
|
+
import {
|
|
8
|
+
AssistantMessage,
|
|
9
|
+
isUserMessage,
|
|
10
|
+
isAssistantMessage,
|
|
11
|
+
isToolResultMessage,
|
|
12
|
+
} from '../../types/messages.ts';
|
|
13
|
+
import type {
|
|
14
|
+
OllamaLLMParams,
|
|
15
|
+
OllamaRequest,
|
|
16
|
+
OllamaMessage,
|
|
17
|
+
OllamaTool,
|
|
18
|
+
OllamaResponse,
|
|
19
|
+
OllamaStreamChunk,
|
|
20
|
+
OllamaToolCall,
|
|
21
|
+
OllamaOptions,
|
|
22
|
+
} from './types.ts';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Transform UPP request to Ollama format
|
|
26
|
+
*/
|
|
27
|
+
export function transformRequest<TParams extends OllamaLLMParams>(
|
|
28
|
+
request: LLMRequest<TParams>,
|
|
29
|
+
modelId: string
|
|
30
|
+
): OllamaRequest {
|
|
31
|
+
const params = (request.params ?? {}) as OllamaLLMParams;
|
|
32
|
+
|
|
33
|
+
const ollamaRequest: OllamaRequest = {
|
|
34
|
+
model: modelId,
|
|
35
|
+
messages: transformMessages(request.messages, request.system),
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// Build options object for runtime parameters
|
|
39
|
+
const options: OllamaOptions = {};
|
|
40
|
+
|
|
41
|
+
if (params.num_predict !== undefined) options.num_predict = params.num_predict;
|
|
42
|
+
if (params.temperature !== undefined) options.temperature = params.temperature;
|
|
43
|
+
if (params.top_p !== undefined) options.top_p = params.top_p;
|
|
44
|
+
if (params.top_k !== undefined) options.top_k = params.top_k;
|
|
45
|
+
if (params.min_p !== undefined) options.min_p = params.min_p;
|
|
46
|
+
if (params.typical_p !== undefined) options.typical_p = params.typical_p;
|
|
47
|
+
if (params.repeat_penalty !== undefined) options.repeat_penalty = params.repeat_penalty;
|
|
48
|
+
if (params.repeat_last_n !== undefined) options.repeat_last_n = params.repeat_last_n;
|
|
49
|
+
if (params.presence_penalty !== undefined) options.presence_penalty = params.presence_penalty;
|
|
50
|
+
if (params.frequency_penalty !== undefined) options.frequency_penalty = params.frequency_penalty;
|
|
51
|
+
if (params.mirostat !== undefined) options.mirostat = params.mirostat;
|
|
52
|
+
if (params.mirostat_eta !== undefined) options.mirostat_eta = params.mirostat_eta;
|
|
53
|
+
if (params.mirostat_tau !== undefined) options.mirostat_tau = params.mirostat_tau;
|
|
54
|
+
if (params.penalize_newline !== undefined) options.penalize_newline = params.penalize_newline;
|
|
55
|
+
if (params.stop !== undefined) options.stop = params.stop;
|
|
56
|
+
if (params.seed !== undefined) options.seed = params.seed;
|
|
57
|
+
if (params.num_keep !== undefined) options.num_keep = params.num_keep;
|
|
58
|
+
if (params.num_ctx !== undefined) options.num_ctx = params.num_ctx;
|
|
59
|
+
if (params.num_batch !== undefined) options.num_batch = params.num_batch;
|
|
60
|
+
if (params.num_thread !== undefined) options.num_thread = params.num_thread;
|
|
61
|
+
if (params.num_gpu !== undefined) options.num_gpu = params.num_gpu;
|
|
62
|
+
if (params.main_gpu !== undefined) options.main_gpu = params.main_gpu;
|
|
63
|
+
if (params.low_vram !== undefined) options.low_vram = params.low_vram;
|
|
64
|
+
if (params.f16_kv !== undefined) options.f16_kv = params.f16_kv;
|
|
65
|
+
if (params.use_mmap !== undefined) options.use_mmap = params.use_mmap;
|
|
66
|
+
if (params.use_mlock !== undefined) options.use_mlock = params.use_mlock;
|
|
67
|
+
if (params.vocab_only !== undefined) options.vocab_only = params.vocab_only;
|
|
68
|
+
if (params.numa !== undefined) options.numa = params.numa;
|
|
69
|
+
if (params.tfs_z !== undefined) options.tfs_z = params.tfs_z;
|
|
70
|
+
|
|
71
|
+
if (Object.keys(options).length > 0) {
|
|
72
|
+
ollamaRequest.options = options;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Top-level parameters
|
|
76
|
+
if (params.keep_alive !== undefined) {
|
|
77
|
+
ollamaRequest.keep_alive = params.keep_alive;
|
|
78
|
+
}
|
|
79
|
+
if (params.think !== undefined) {
|
|
80
|
+
ollamaRequest.think = params.think;
|
|
81
|
+
}
|
|
82
|
+
if (params.logprobs !== undefined) {
|
|
83
|
+
ollamaRequest.logprobs = params.logprobs;
|
|
84
|
+
}
|
|
85
|
+
if (params.top_logprobs !== undefined) {
|
|
86
|
+
ollamaRequest.top_logprobs = params.top_logprobs;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Tools
|
|
90
|
+
if (request.tools && request.tools.length > 0) {
|
|
91
|
+
ollamaRequest.tools = request.tools.map(transformTool);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Structured output via format field
|
|
95
|
+
if (request.structure) {
|
|
96
|
+
ollamaRequest.format = request.structure as unknown as Record<string, unknown>;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return ollamaRequest;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Transform UPP Messages to Ollama messages
|
|
104
|
+
*/
|
|
105
|
+
function transformMessages(messages: Message[], system?: string): OllamaMessage[] {
|
|
106
|
+
const ollamaMessages: OllamaMessage[] = [];
|
|
107
|
+
|
|
108
|
+
// System prompt as first message
|
|
109
|
+
if (system) {
|
|
110
|
+
ollamaMessages.push({
|
|
111
|
+
role: 'system',
|
|
112
|
+
content: system,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
for (const msg of messages) {
|
|
117
|
+
if (isUserMessage(msg)) {
|
|
118
|
+
const textContent: string[] = [];
|
|
119
|
+
const images: string[] = [];
|
|
120
|
+
|
|
121
|
+
for (const block of msg.content) {
|
|
122
|
+
if (block.type === 'text') {
|
|
123
|
+
textContent.push(block.text);
|
|
124
|
+
} else if (block.type === 'image') {
|
|
125
|
+
const imageBlock = block as ImageBlock;
|
|
126
|
+
if (imageBlock.source.type === 'base64') {
|
|
127
|
+
images.push(imageBlock.source.data);
|
|
128
|
+
} else if (imageBlock.source.type === 'bytes') {
|
|
129
|
+
// Convert bytes to base64
|
|
130
|
+
const base64 = btoa(
|
|
131
|
+
Array.from(imageBlock.source.data)
|
|
132
|
+
.map((b) => String.fromCharCode(b))
|
|
133
|
+
.join('')
|
|
134
|
+
);
|
|
135
|
+
images.push(base64);
|
|
136
|
+
} else if (imageBlock.source.type === 'url') {
|
|
137
|
+
// Ollama doesn't support URL images directly
|
|
138
|
+
// Would need to fetch and convert, for now just add as text
|
|
139
|
+
textContent.push(`[Image: ${imageBlock.source.url}]`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const message: OllamaMessage = {
|
|
145
|
+
role: 'user',
|
|
146
|
+
content: textContent.join('\n'),
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
if (images.length > 0) {
|
|
150
|
+
message.images = images;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
ollamaMessages.push(message);
|
|
154
|
+
} else if (isAssistantMessage(msg)) {
|
|
155
|
+
const textContent = msg.content
|
|
156
|
+
.filter((block): block is TextBlock => block.type === 'text')
|
|
157
|
+
.map((block) => block.text)
|
|
158
|
+
.join('\n');
|
|
159
|
+
|
|
160
|
+
const message: OllamaMessage = {
|
|
161
|
+
role: 'assistant',
|
|
162
|
+
content: textContent,
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// Add tool calls if present
|
|
166
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
167
|
+
message.tool_calls = msg.toolCalls.map((call) => ({
|
|
168
|
+
function: {
|
|
169
|
+
name: call.toolName,
|
|
170
|
+
arguments: call.arguments,
|
|
171
|
+
},
|
|
172
|
+
}));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
ollamaMessages.push(message);
|
|
176
|
+
} else if (isToolResultMessage(msg)) {
|
|
177
|
+
// Tool results are sent as 'tool' role messages
|
|
178
|
+
for (const result of msg.results) {
|
|
179
|
+
ollamaMessages.push({
|
|
180
|
+
role: 'tool',
|
|
181
|
+
tool_name: result.toolCallId, // In our UPP, toolCallId maps to tool name for Ollama
|
|
182
|
+
content:
|
|
183
|
+
typeof result.result === 'string'
|
|
184
|
+
? result.result
|
|
185
|
+
: JSON.stringify(result.result),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return ollamaMessages;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Transform a UPP Tool to Ollama format
|
|
196
|
+
*/
|
|
197
|
+
function transformTool(tool: Tool): OllamaTool {
|
|
198
|
+
return {
|
|
199
|
+
type: 'function',
|
|
200
|
+
function: {
|
|
201
|
+
name: tool.name,
|
|
202
|
+
description: tool.description,
|
|
203
|
+
parameters: {
|
|
204
|
+
type: 'object',
|
|
205
|
+
properties: tool.parameters.properties,
|
|
206
|
+
required: tool.parameters.required,
|
|
207
|
+
},
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Transform Ollama response to UPP LLMResponse
|
|
214
|
+
*/
|
|
215
|
+
export function transformResponse(data: OllamaResponse): LLMResponse {
|
|
216
|
+
const textContent: TextBlock[] = [];
|
|
217
|
+
const toolCalls: ToolCall[] = [];
|
|
218
|
+
let structuredData: unknown;
|
|
219
|
+
|
|
220
|
+
// Add main content
|
|
221
|
+
if (data.message.content) {
|
|
222
|
+
textContent.push({ type: 'text', text: data.message.content });
|
|
223
|
+
|
|
224
|
+
// Try to parse as JSON for structured output
|
|
225
|
+
try {
|
|
226
|
+
structuredData = JSON.parse(data.message.content);
|
|
227
|
+
} catch {
|
|
228
|
+
// Not valid JSON - that's fine, might not be structured output
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Extract tool calls
|
|
233
|
+
if (data.message.tool_calls) {
|
|
234
|
+
for (const call of data.message.tool_calls) {
|
|
235
|
+
toolCalls.push({
|
|
236
|
+
toolCallId: call.function.name, // Ollama doesn't have separate IDs, use name
|
|
237
|
+
toolName: call.function.name,
|
|
238
|
+
arguments: call.function.arguments,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const message = new AssistantMessage(
|
|
244
|
+
textContent,
|
|
245
|
+
toolCalls.length > 0 ? toolCalls : undefined,
|
|
246
|
+
{
|
|
247
|
+
metadata: {
|
|
248
|
+
ollama: {
|
|
249
|
+
model: data.model,
|
|
250
|
+
created_at: data.created_at,
|
|
251
|
+
done_reason: data.done_reason,
|
|
252
|
+
thinking: data.message.thinking,
|
|
253
|
+
total_duration: data.total_duration,
|
|
254
|
+
load_duration: data.load_duration,
|
|
255
|
+
prompt_eval_duration: data.prompt_eval_duration,
|
|
256
|
+
eval_duration: data.eval_duration,
|
|
257
|
+
logprobs: data.logprobs,
|
|
258
|
+
},
|
|
259
|
+
},
|
|
260
|
+
}
|
|
261
|
+
);
|
|
262
|
+
|
|
263
|
+
// Calculate token usage
|
|
264
|
+
const usage: TokenUsage = {
|
|
265
|
+
inputTokens: data.prompt_eval_count ?? 0,
|
|
266
|
+
outputTokens: data.eval_count ?? 0,
|
|
267
|
+
totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
// Map done_reason to standard stop reason
|
|
271
|
+
let stopReason = 'end_turn';
|
|
272
|
+
if (data.done_reason === 'length') {
|
|
273
|
+
stopReason = 'max_tokens';
|
|
274
|
+
} else if (data.done_reason === 'stop') {
|
|
275
|
+
stopReason = 'end_turn';
|
|
276
|
+
} else if (toolCalls.length > 0) {
|
|
277
|
+
stopReason = 'tool_use';
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return {
|
|
281
|
+
message,
|
|
282
|
+
usage,
|
|
283
|
+
stopReason,
|
|
284
|
+
data: structuredData,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* State for accumulating streaming response
|
|
290
|
+
*/
|
|
291
|
+
export interface StreamState {
|
|
292
|
+
model: string;
|
|
293
|
+
content: string;
|
|
294
|
+
thinking: string;
|
|
295
|
+
toolCalls: Array<{ name: string; args: Record<string, unknown> }>;
|
|
296
|
+
doneReason: string | null;
|
|
297
|
+
promptEvalCount: number;
|
|
298
|
+
evalCount: number;
|
|
299
|
+
totalDuration: number;
|
|
300
|
+
isFirstChunk: boolean;
|
|
301
|
+
createdAt: string;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Create initial stream state
|
|
306
|
+
*/
|
|
307
|
+
export function createStreamState(): StreamState {
|
|
308
|
+
return {
|
|
309
|
+
model: '',
|
|
310
|
+
content: '',
|
|
311
|
+
thinking: '',
|
|
312
|
+
toolCalls: [],
|
|
313
|
+
doneReason: null,
|
|
314
|
+
promptEvalCount: 0,
|
|
315
|
+
evalCount: 0,
|
|
316
|
+
totalDuration: 0,
|
|
317
|
+
isFirstChunk: true,
|
|
318
|
+
createdAt: '',
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Transform Ollama stream chunk to UPP StreamEvents
|
|
324
|
+
*/
|
|
325
|
+
export function transformStreamChunk(
|
|
326
|
+
chunk: OllamaStreamChunk,
|
|
327
|
+
state: StreamState
|
|
328
|
+
): StreamEvent[] {
|
|
329
|
+
const events: StreamEvent[] = [];
|
|
330
|
+
|
|
331
|
+
// First chunk - emit message start
|
|
332
|
+
if (state.isFirstChunk) {
|
|
333
|
+
state.model = chunk.model;
|
|
334
|
+
state.createdAt = chunk.created_at;
|
|
335
|
+
events.push({ type: 'message_start', index: 0, delta: {} });
|
|
336
|
+
state.isFirstChunk = false;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Process message content
|
|
340
|
+
if (chunk.message) {
|
|
341
|
+
// Text content delta
|
|
342
|
+
if (chunk.message.content) {
|
|
343
|
+
state.content += chunk.message.content;
|
|
344
|
+
events.push({
|
|
345
|
+
type: 'text_delta',
|
|
346
|
+
index: 0,
|
|
347
|
+
delta: { text: chunk.message.content },
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Thinking content delta
|
|
352
|
+
if (chunk.message.thinking) {
|
|
353
|
+
state.thinking += chunk.message.thinking;
|
|
354
|
+
events.push({
|
|
355
|
+
type: 'reasoning_delta',
|
|
356
|
+
index: 0,
|
|
357
|
+
delta: { text: chunk.message.thinking },
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Tool calls (typically come in final chunk)
|
|
362
|
+
if (chunk.message.tool_calls) {
|
|
363
|
+
for (const call of chunk.message.tool_calls) {
|
|
364
|
+
state.toolCalls.push({
|
|
365
|
+
name: call.function.name,
|
|
366
|
+
args: call.function.arguments,
|
|
367
|
+
});
|
|
368
|
+
events.push({
|
|
369
|
+
type: 'tool_call_delta',
|
|
370
|
+
index: state.toolCalls.length - 1,
|
|
371
|
+
delta: {
|
|
372
|
+
toolCallId: call.function.name,
|
|
373
|
+
toolName: call.function.name,
|
|
374
|
+
argumentsJson: JSON.stringify(call.function.arguments),
|
|
375
|
+
},
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Final chunk with metrics
|
|
382
|
+
if (chunk.done) {
|
|
383
|
+
state.doneReason = chunk.done_reason ?? null;
|
|
384
|
+
state.promptEvalCount = chunk.prompt_eval_count ?? 0;
|
|
385
|
+
state.evalCount = chunk.eval_count ?? 0;
|
|
386
|
+
state.totalDuration = chunk.total_duration ?? 0;
|
|
387
|
+
events.push({ type: 'message_stop', index: 0, delta: {} });
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return events;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Build LLMResponse from accumulated stream state
|
|
395
|
+
*/
|
|
396
|
+
export function buildResponseFromState(state: StreamState): LLMResponse {
|
|
397
|
+
const textContent: TextBlock[] = [];
|
|
398
|
+
const toolCalls: ToolCall[] = [];
|
|
399
|
+
let structuredData: unknown;
|
|
400
|
+
|
|
401
|
+
if (state.content) {
|
|
402
|
+
textContent.push({ type: 'text', text: state.content });
|
|
403
|
+
|
|
404
|
+
// Try to parse as JSON for structured output
|
|
405
|
+
try {
|
|
406
|
+
structuredData = JSON.parse(state.content);
|
|
407
|
+
} catch {
|
|
408
|
+
// Not valid JSON - that's fine
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
for (const tc of state.toolCalls) {
|
|
413
|
+
toolCalls.push({
|
|
414
|
+
toolCallId: tc.name,
|
|
415
|
+
toolName: tc.name,
|
|
416
|
+
arguments: tc.args,
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
const message = new AssistantMessage(
|
|
421
|
+
textContent,
|
|
422
|
+
toolCalls.length > 0 ? toolCalls : undefined,
|
|
423
|
+
{
|
|
424
|
+
metadata: {
|
|
425
|
+
ollama: {
|
|
426
|
+
model: state.model,
|
|
427
|
+
created_at: state.createdAt,
|
|
428
|
+
done_reason: state.doneReason,
|
|
429
|
+
thinking: state.thinking || undefined,
|
|
430
|
+
total_duration: state.totalDuration,
|
|
431
|
+
},
|
|
432
|
+
},
|
|
433
|
+
}
|
|
434
|
+
);
|
|
435
|
+
|
|
436
|
+
const usage: TokenUsage = {
|
|
437
|
+
inputTokens: state.promptEvalCount,
|
|
438
|
+
outputTokens: state.evalCount,
|
|
439
|
+
totalTokens: state.promptEvalCount + state.evalCount,
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
// Map done_reason to standard stop reason
|
|
443
|
+
let stopReason = 'end_turn';
|
|
444
|
+
if (state.doneReason === 'length') {
|
|
445
|
+
stopReason = 'max_tokens';
|
|
446
|
+
} else if (toolCalls.length > 0) {
|
|
447
|
+
stopReason = 'tool_use';
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
message,
|
|
452
|
+
usage,
|
|
453
|
+
stopReason,
|
|
454
|
+
data: structuredData,
|
|
455
|
+
};
|
|
456
|
+
}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ollama-specific LLM parameters
|
|
3
|
+
* These map to Ollama's runtime options
|
|
4
|
+
*/
|
|
5
|
+
export interface OllamaLLMParams {
|
|
6
|
+
/** Maximum number of tokens to predict (default: -1 = infinite) */
|
|
7
|
+
num_predict?: number;
|
|
8
|
+
|
|
9
|
+
/** Temperature for randomness (default: 0.8) */
|
|
10
|
+
temperature?: number;
|
|
11
|
+
|
|
12
|
+
/** Top-p (nucleus) sampling (default: 0.9) */
|
|
13
|
+
top_p?: number;
|
|
14
|
+
|
|
15
|
+
/** Top-k sampling (default: 40) */
|
|
16
|
+
top_k?: number;
|
|
17
|
+
|
|
18
|
+
/** Minimum probability for a token to be considered (default: 0.0) */
|
|
19
|
+
min_p?: number;
|
|
20
|
+
|
|
21
|
+
/** Typical p sampling (default: 1.0 = disabled) */
|
|
22
|
+
typical_p?: number;
|
|
23
|
+
|
|
24
|
+
/** Repeat penalty (default: 1.1) */
|
|
25
|
+
repeat_penalty?: number;
|
|
26
|
+
|
|
27
|
+
/** Number of tokens to look back for repeat penalty (default: 64) */
|
|
28
|
+
repeat_last_n?: number;
|
|
29
|
+
|
|
30
|
+
/** Presence penalty (default: 0.0) */
|
|
31
|
+
presence_penalty?: number;
|
|
32
|
+
|
|
33
|
+
/** Frequency penalty (default: 0.0) */
|
|
34
|
+
frequency_penalty?: number;
|
|
35
|
+
|
|
36
|
+
/** Mirostat sampling mode (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) */
|
|
37
|
+
mirostat?: 0 | 1 | 2;
|
|
38
|
+
|
|
39
|
+
/** Mirostat learning rate (default: 0.1) */
|
|
40
|
+
mirostat_eta?: number;
|
|
41
|
+
|
|
42
|
+
/** Mirostat target entropy (default: 5.0) */
|
|
43
|
+
mirostat_tau?: number;
|
|
44
|
+
|
|
45
|
+
/** Penalize newlines (default: true) */
|
|
46
|
+
penalize_newline?: boolean;
|
|
47
|
+
|
|
48
|
+
/** Stop sequences */
|
|
49
|
+
stop?: string[];
|
|
50
|
+
|
|
51
|
+
/** Seed for deterministic sampling (default: random) */
|
|
52
|
+
seed?: number;
|
|
53
|
+
|
|
54
|
+
/** Number of tokens to keep from initial prompt (default: 4) */
|
|
55
|
+
num_keep?: number;
|
|
56
|
+
|
|
57
|
+
/** Context window size (default: model-dependent) */
|
|
58
|
+
num_ctx?: number;
|
|
59
|
+
|
|
60
|
+
/** Number of batches (default: 512) */
|
|
61
|
+
num_batch?: number;
|
|
62
|
+
|
|
63
|
+
/** Number of threads (default: auto) */
|
|
64
|
+
num_thread?: number;
|
|
65
|
+
|
|
66
|
+
/** Number of layers to offload to GPU (default: auto) */
|
|
67
|
+
num_gpu?: number;
|
|
68
|
+
|
|
69
|
+
/** Main GPU to use (default: 0) */
|
|
70
|
+
main_gpu?: number;
|
|
71
|
+
|
|
72
|
+
/** Enable low VRAM mode */
|
|
73
|
+
low_vram?: boolean;
|
|
74
|
+
|
|
75
|
+
/** Enable f16 KV cache */
|
|
76
|
+
f16_kv?: boolean;
|
|
77
|
+
|
|
78
|
+
/** Use mmap for model loading */
|
|
79
|
+
use_mmap?: boolean;
|
|
80
|
+
|
|
81
|
+
/** Use mlock for memory locking */
|
|
82
|
+
use_mlock?: boolean;
|
|
83
|
+
|
|
84
|
+
/** Vocabulary only mode */
|
|
85
|
+
vocab_only?: boolean;
|
|
86
|
+
|
|
87
|
+
/** NUMA support */
|
|
88
|
+
numa?: boolean;
|
|
89
|
+
|
|
90
|
+
/** TFS-Z sampling (default: 1.0 = disabled) */
|
|
91
|
+
tfs_z?: number;
|
|
92
|
+
|
|
93
|
+
/** Enable thinking mode (for models that support it) */
|
|
94
|
+
think?: boolean | 'high' | 'medium' | 'low';
|
|
95
|
+
|
|
96
|
+
/** Keep model loaded in memory (string duration like "5m" or number of seconds) */
|
|
97
|
+
keep_alive?: string | number;
|
|
98
|
+
|
|
99
|
+
/** Return log probabilities */
|
|
100
|
+
logprobs?: boolean;
|
|
101
|
+
|
|
102
|
+
/** Number of top log probabilities to return */
|
|
103
|
+
top_logprobs?: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Ollama chat message format
|
|
108
|
+
*/
|
|
109
|
+
export interface OllamaMessage {
|
|
110
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
111
|
+
content: string;
|
|
112
|
+
/** Base64 encoded images for vision models */
|
|
113
|
+
images?: string[];
|
|
114
|
+
/** Tool calls made by the assistant */
|
|
115
|
+
tool_calls?: OllamaToolCall[];
|
|
116
|
+
/** Tool name when role is 'tool' */
|
|
117
|
+
tool_name?: string;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Ollama tool call format
|
|
122
|
+
*/
|
|
123
|
+
export interface OllamaToolCall {
|
|
124
|
+
function: {
|
|
125
|
+
name: string;
|
|
126
|
+
arguments: Record<string, unknown>;
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Ollama tool definition format
|
|
132
|
+
*/
|
|
133
|
+
export interface OllamaTool {
|
|
134
|
+
type: 'function';
|
|
135
|
+
function: {
|
|
136
|
+
name: string;
|
|
137
|
+
description: string;
|
|
138
|
+
parameters: {
|
|
139
|
+
type: 'object';
|
|
140
|
+
properties: Record<string, unknown>;
|
|
141
|
+
required?: string[];
|
|
142
|
+
};
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Ollama API request body for chat endpoint
|
|
148
|
+
*/
|
|
149
|
+
export interface OllamaRequest {
|
|
150
|
+
model: string;
|
|
151
|
+
messages: OllamaMessage[];
|
|
152
|
+
stream?: boolean;
|
|
153
|
+
format?: 'json' | Record<string, unknown>;
|
|
154
|
+
options?: OllamaOptions;
|
|
155
|
+
tools?: OllamaTool[];
|
|
156
|
+
keep_alive?: string | number;
|
|
157
|
+
think?: boolean | 'high' | 'medium' | 'low';
|
|
158
|
+
logprobs?: boolean;
|
|
159
|
+
top_logprobs?: number;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Ollama runtime options (passed in options field)
|
|
164
|
+
*/
|
|
165
|
+
export interface OllamaOptions {
|
|
166
|
+
num_predict?: number;
|
|
167
|
+
temperature?: number;
|
|
168
|
+
top_p?: number;
|
|
169
|
+
top_k?: number;
|
|
170
|
+
min_p?: number;
|
|
171
|
+
typical_p?: number;
|
|
172
|
+
repeat_penalty?: number;
|
|
173
|
+
repeat_last_n?: number;
|
|
174
|
+
presence_penalty?: number;
|
|
175
|
+
frequency_penalty?: number;
|
|
176
|
+
mirostat?: 0 | 1 | 2;
|
|
177
|
+
mirostat_eta?: number;
|
|
178
|
+
mirostat_tau?: number;
|
|
179
|
+
penalize_newline?: boolean;
|
|
180
|
+
stop?: string[];
|
|
181
|
+
seed?: number;
|
|
182
|
+
num_keep?: number;
|
|
183
|
+
num_ctx?: number;
|
|
184
|
+
num_batch?: number;
|
|
185
|
+
num_thread?: number;
|
|
186
|
+
num_gpu?: number;
|
|
187
|
+
main_gpu?: number;
|
|
188
|
+
low_vram?: boolean;
|
|
189
|
+
f16_kv?: boolean;
|
|
190
|
+
use_mmap?: boolean;
|
|
191
|
+
use_mlock?: boolean;
|
|
192
|
+
vocab_only?: boolean;
|
|
193
|
+
numa?: boolean;
|
|
194
|
+
tfs_z?: number;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Ollama API response format
|
|
199
|
+
*/
|
|
200
|
+
export interface OllamaResponse {
|
|
201
|
+
model: string;
|
|
202
|
+
created_at: string;
|
|
203
|
+
message: OllamaResponseMessage;
|
|
204
|
+
done: boolean;
|
|
205
|
+
done_reason?: 'stop' | 'length' | 'load' | 'unload';
|
|
206
|
+
total_duration?: number;
|
|
207
|
+
load_duration?: number;
|
|
208
|
+
prompt_eval_count?: number;
|
|
209
|
+
prompt_eval_duration?: number;
|
|
210
|
+
eval_count?: number;
|
|
211
|
+
eval_duration?: number;
|
|
212
|
+
logprobs?: OllamaLogprob[];
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Ollama response message format
|
|
217
|
+
*/
|
|
218
|
+
export interface OllamaResponseMessage {
|
|
219
|
+
role: 'assistant';
|
|
220
|
+
content: string;
|
|
221
|
+
/** Thinking content (if think mode enabled) */
|
|
222
|
+
thinking?: string;
|
|
223
|
+
/** Tool calls requested by the model */
|
|
224
|
+
tool_calls?: OllamaToolCall[];
|
|
225
|
+
/** Images (for multimodal responses) */
|
|
226
|
+
images?: string[];
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Ollama log probability format
|
|
231
|
+
*/
|
|
232
|
+
export interface OllamaLogprob {
|
|
233
|
+
token: string;
|
|
234
|
+
logprob: number;
|
|
235
|
+
bytes?: number[];
|
|
236
|
+
top_logprobs?: Array<{
|
|
237
|
+
token: string;
|
|
238
|
+
logprob: number;
|
|
239
|
+
bytes?: number[];
|
|
240
|
+
}>;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Ollama streaming response chunk
|
|
245
|
+
* Same structure as regular response but partial
|
|
246
|
+
*/
|
|
247
|
+
export interface OllamaStreamChunk {
|
|
248
|
+
model: string;
|
|
249
|
+
created_at: string;
|
|
250
|
+
message: OllamaResponseMessage;
|
|
251
|
+
done: boolean;
|
|
252
|
+
done_reason?: 'stop' | 'length' | 'load' | 'unload';
|
|
253
|
+
total_duration?: number;
|
|
254
|
+
load_duration?: number;
|
|
255
|
+
prompt_eval_count?: number;
|
|
256
|
+
prompt_eval_duration?: number;
|
|
257
|
+
eval_count?: number;
|
|
258
|
+
eval_duration?: number;
|
|
259
|
+
logprobs?: OllamaLogprob[];
|
|
260
|
+
}
|