@providerprotocol/ai 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,456 @@
1
+ import type { LLMRequest, LLMResponse } from '../../types/llm.ts';
2
+ import type { Message } from '../../types/messages.ts';
3
+ import type { StreamEvent } from '../../types/stream.ts';
4
+ import type { Tool, ToolCall } from '../../types/tool.ts';
5
+ import type { TokenUsage } from '../../types/turn.ts';
6
+ import type { ContentBlock, TextBlock, ImageBlock } from '../../types/content.ts';
7
+ import {
8
+ AssistantMessage,
9
+ isUserMessage,
10
+ isAssistantMessage,
11
+ isToolResultMessage,
12
+ } from '../../types/messages.ts';
13
+ import type {
14
+ OllamaLLMParams,
15
+ OllamaRequest,
16
+ OllamaMessage,
17
+ OllamaTool,
18
+ OllamaResponse,
19
+ OllamaStreamChunk,
20
+ OllamaToolCall,
21
+ OllamaOptions,
22
+ } from './types.ts';
23
+
24
+ /**
25
+ * Transform UPP request to Ollama format
26
+ */
27
+ export function transformRequest<TParams extends OllamaLLMParams>(
28
+ request: LLMRequest<TParams>,
29
+ modelId: string
30
+ ): OllamaRequest {
31
+ const params = (request.params ?? {}) as OllamaLLMParams;
32
+
33
+ const ollamaRequest: OllamaRequest = {
34
+ model: modelId,
35
+ messages: transformMessages(request.messages, request.system),
36
+ };
37
+
38
+ // Build options object for runtime parameters
39
+ const options: OllamaOptions = {};
40
+
41
+ if (params.num_predict !== undefined) options.num_predict = params.num_predict;
42
+ if (params.temperature !== undefined) options.temperature = params.temperature;
43
+ if (params.top_p !== undefined) options.top_p = params.top_p;
44
+ if (params.top_k !== undefined) options.top_k = params.top_k;
45
+ if (params.min_p !== undefined) options.min_p = params.min_p;
46
+ if (params.typical_p !== undefined) options.typical_p = params.typical_p;
47
+ if (params.repeat_penalty !== undefined) options.repeat_penalty = params.repeat_penalty;
48
+ if (params.repeat_last_n !== undefined) options.repeat_last_n = params.repeat_last_n;
49
+ if (params.presence_penalty !== undefined) options.presence_penalty = params.presence_penalty;
50
+ if (params.frequency_penalty !== undefined) options.frequency_penalty = params.frequency_penalty;
51
+ if (params.mirostat !== undefined) options.mirostat = params.mirostat;
52
+ if (params.mirostat_eta !== undefined) options.mirostat_eta = params.mirostat_eta;
53
+ if (params.mirostat_tau !== undefined) options.mirostat_tau = params.mirostat_tau;
54
+ if (params.penalize_newline !== undefined) options.penalize_newline = params.penalize_newline;
55
+ if (params.stop !== undefined) options.stop = params.stop;
56
+ if (params.seed !== undefined) options.seed = params.seed;
57
+ if (params.num_keep !== undefined) options.num_keep = params.num_keep;
58
+ if (params.num_ctx !== undefined) options.num_ctx = params.num_ctx;
59
+ if (params.num_batch !== undefined) options.num_batch = params.num_batch;
60
+ if (params.num_thread !== undefined) options.num_thread = params.num_thread;
61
+ if (params.num_gpu !== undefined) options.num_gpu = params.num_gpu;
62
+ if (params.main_gpu !== undefined) options.main_gpu = params.main_gpu;
63
+ if (params.low_vram !== undefined) options.low_vram = params.low_vram;
64
+ if (params.f16_kv !== undefined) options.f16_kv = params.f16_kv;
65
+ if (params.use_mmap !== undefined) options.use_mmap = params.use_mmap;
66
+ if (params.use_mlock !== undefined) options.use_mlock = params.use_mlock;
67
+ if (params.vocab_only !== undefined) options.vocab_only = params.vocab_only;
68
+ if (params.numa !== undefined) options.numa = params.numa;
69
+ if (params.tfs_z !== undefined) options.tfs_z = params.tfs_z;
70
+
71
+ if (Object.keys(options).length > 0) {
72
+ ollamaRequest.options = options;
73
+ }
74
+
75
+ // Top-level parameters
76
+ if (params.keep_alive !== undefined) {
77
+ ollamaRequest.keep_alive = params.keep_alive;
78
+ }
79
+ if (params.think !== undefined) {
80
+ ollamaRequest.think = params.think;
81
+ }
82
+ if (params.logprobs !== undefined) {
83
+ ollamaRequest.logprobs = params.logprobs;
84
+ }
85
+ if (params.top_logprobs !== undefined) {
86
+ ollamaRequest.top_logprobs = params.top_logprobs;
87
+ }
88
+
89
+ // Tools
90
+ if (request.tools && request.tools.length > 0) {
91
+ ollamaRequest.tools = request.tools.map(transformTool);
92
+ }
93
+
94
+ // Structured output via format field
95
+ if (request.structure) {
96
+ ollamaRequest.format = request.structure as unknown as Record<string, unknown>;
97
+ }
98
+
99
+ return ollamaRequest;
100
+ }
101
+
102
+ /**
103
+ * Transform UPP Messages to Ollama messages
104
+ */
105
+ function transformMessages(messages: Message[], system?: string): OllamaMessage[] {
106
+ const ollamaMessages: OllamaMessage[] = [];
107
+
108
+ // System prompt as first message
109
+ if (system) {
110
+ ollamaMessages.push({
111
+ role: 'system',
112
+ content: system,
113
+ });
114
+ }
115
+
116
+ for (const msg of messages) {
117
+ if (isUserMessage(msg)) {
118
+ const textContent: string[] = [];
119
+ const images: string[] = [];
120
+
121
+ for (const block of msg.content) {
122
+ if (block.type === 'text') {
123
+ textContent.push(block.text);
124
+ } else if (block.type === 'image') {
125
+ const imageBlock = block as ImageBlock;
126
+ if (imageBlock.source.type === 'base64') {
127
+ images.push(imageBlock.source.data);
128
+ } else if (imageBlock.source.type === 'bytes') {
129
+ // Convert bytes to base64
130
+ const base64 = btoa(
131
+ Array.from(imageBlock.source.data)
132
+ .map((b) => String.fromCharCode(b))
133
+ .join('')
134
+ );
135
+ images.push(base64);
136
+ } else if (imageBlock.source.type === 'url') {
137
+ // Ollama doesn't support URL images directly
138
+ // Would need to fetch and convert, for now just add as text
139
+ textContent.push(`[Image: ${imageBlock.source.url}]`);
140
+ }
141
+ }
142
+ }
143
+
144
+ const message: OllamaMessage = {
145
+ role: 'user',
146
+ content: textContent.join('\n'),
147
+ };
148
+
149
+ if (images.length > 0) {
150
+ message.images = images;
151
+ }
152
+
153
+ ollamaMessages.push(message);
154
+ } else if (isAssistantMessage(msg)) {
155
+ const textContent = msg.content
156
+ .filter((block): block is TextBlock => block.type === 'text')
157
+ .map((block) => block.text)
158
+ .join('\n');
159
+
160
+ const message: OllamaMessage = {
161
+ role: 'assistant',
162
+ content: textContent,
163
+ };
164
+
165
+ // Add tool calls if present
166
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
167
+ message.tool_calls = msg.toolCalls.map((call) => ({
168
+ function: {
169
+ name: call.toolName,
170
+ arguments: call.arguments,
171
+ },
172
+ }));
173
+ }
174
+
175
+ ollamaMessages.push(message);
176
+ } else if (isToolResultMessage(msg)) {
177
+ // Tool results are sent as 'tool' role messages
178
+ for (const result of msg.results) {
179
+ ollamaMessages.push({
180
+ role: 'tool',
181
+ tool_name: result.toolCallId, // In our UPP, toolCallId maps to tool name for Ollama
182
+ content:
183
+ typeof result.result === 'string'
184
+ ? result.result
185
+ : JSON.stringify(result.result),
186
+ });
187
+ }
188
+ }
189
+ }
190
+
191
+ return ollamaMessages;
192
+ }
193
+
194
+ /**
195
+ * Transform a UPP Tool to Ollama format
196
+ */
197
+ function transformTool(tool: Tool): OllamaTool {
198
+ return {
199
+ type: 'function',
200
+ function: {
201
+ name: tool.name,
202
+ description: tool.description,
203
+ parameters: {
204
+ type: 'object',
205
+ properties: tool.parameters.properties,
206
+ required: tool.parameters.required,
207
+ },
208
+ },
209
+ };
210
+ }
211
+
212
+ /**
213
+ * Transform Ollama response to UPP LLMResponse
214
+ */
215
+ export function transformResponse(data: OllamaResponse): LLMResponse {
216
+ const textContent: TextBlock[] = [];
217
+ const toolCalls: ToolCall[] = [];
218
+ let structuredData: unknown;
219
+
220
+ // Add main content
221
+ if (data.message.content) {
222
+ textContent.push({ type: 'text', text: data.message.content });
223
+
224
+ // Try to parse as JSON for structured output
225
+ try {
226
+ structuredData = JSON.parse(data.message.content);
227
+ } catch {
228
+ // Not valid JSON - that's fine, might not be structured output
229
+ }
230
+ }
231
+
232
+ // Extract tool calls
233
+ if (data.message.tool_calls) {
234
+ for (const call of data.message.tool_calls) {
235
+ toolCalls.push({
236
+ toolCallId: call.function.name, // Ollama doesn't have separate IDs, use name
237
+ toolName: call.function.name,
238
+ arguments: call.function.arguments,
239
+ });
240
+ }
241
+ }
242
+
243
+ const message = new AssistantMessage(
244
+ textContent,
245
+ toolCalls.length > 0 ? toolCalls : undefined,
246
+ {
247
+ metadata: {
248
+ ollama: {
249
+ model: data.model,
250
+ created_at: data.created_at,
251
+ done_reason: data.done_reason,
252
+ thinking: data.message.thinking,
253
+ total_duration: data.total_duration,
254
+ load_duration: data.load_duration,
255
+ prompt_eval_duration: data.prompt_eval_duration,
256
+ eval_duration: data.eval_duration,
257
+ logprobs: data.logprobs,
258
+ },
259
+ },
260
+ }
261
+ );
262
+
263
+ // Calculate token usage
264
+ const usage: TokenUsage = {
265
+ inputTokens: data.prompt_eval_count ?? 0,
266
+ outputTokens: data.eval_count ?? 0,
267
+ totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
268
+ };
269
+
270
+ // Map done_reason to standard stop reason
271
+ let stopReason = 'end_turn';
272
+ if (data.done_reason === 'length') {
273
+ stopReason = 'max_tokens';
274
+ } else if (data.done_reason === 'stop') {
275
+ stopReason = 'end_turn';
276
+ } else if (toolCalls.length > 0) {
277
+ stopReason = 'tool_use';
278
+ }
279
+
280
+ return {
281
+ message,
282
+ usage,
283
+ stopReason,
284
+ data: structuredData,
285
+ };
286
+ }
287
+
288
+ /**
289
+ * State for accumulating streaming response
290
+ */
291
+ export interface StreamState {
292
+ model: string;
293
+ content: string;
294
+ thinking: string;
295
+ toolCalls: Array<{ name: string; args: Record<string, unknown> }>;
296
+ doneReason: string | null;
297
+ promptEvalCount: number;
298
+ evalCount: number;
299
+ totalDuration: number;
300
+ isFirstChunk: boolean;
301
+ createdAt: string;
302
+ }
303
+
304
+ /**
305
+ * Create initial stream state
306
+ */
307
+ export function createStreamState(): StreamState {
308
+ return {
309
+ model: '',
310
+ content: '',
311
+ thinking: '',
312
+ toolCalls: [],
313
+ doneReason: null,
314
+ promptEvalCount: 0,
315
+ evalCount: 0,
316
+ totalDuration: 0,
317
+ isFirstChunk: true,
318
+ createdAt: '',
319
+ };
320
+ }
321
+
322
+ /**
323
+ * Transform Ollama stream chunk to UPP StreamEvents
324
+ */
325
+ export function transformStreamChunk(
326
+ chunk: OllamaStreamChunk,
327
+ state: StreamState
328
+ ): StreamEvent[] {
329
+ const events: StreamEvent[] = [];
330
+
331
+ // First chunk - emit message start
332
+ if (state.isFirstChunk) {
333
+ state.model = chunk.model;
334
+ state.createdAt = chunk.created_at;
335
+ events.push({ type: 'message_start', index: 0, delta: {} });
336
+ state.isFirstChunk = false;
337
+ }
338
+
339
+ // Process message content
340
+ if (chunk.message) {
341
+ // Text content delta
342
+ if (chunk.message.content) {
343
+ state.content += chunk.message.content;
344
+ events.push({
345
+ type: 'text_delta',
346
+ index: 0,
347
+ delta: { text: chunk.message.content },
348
+ });
349
+ }
350
+
351
+ // Thinking content delta
352
+ if (chunk.message.thinking) {
353
+ state.thinking += chunk.message.thinking;
354
+ events.push({
355
+ type: 'reasoning_delta',
356
+ index: 0,
357
+ delta: { text: chunk.message.thinking },
358
+ });
359
+ }
360
+
361
+ // Tool calls (typically come in final chunk)
362
+ if (chunk.message.tool_calls) {
363
+ for (const call of chunk.message.tool_calls) {
364
+ state.toolCalls.push({
365
+ name: call.function.name,
366
+ args: call.function.arguments,
367
+ });
368
+ events.push({
369
+ type: 'tool_call_delta',
370
+ index: state.toolCalls.length - 1,
371
+ delta: {
372
+ toolCallId: call.function.name,
373
+ toolName: call.function.name,
374
+ argumentsJson: JSON.stringify(call.function.arguments),
375
+ },
376
+ });
377
+ }
378
+ }
379
+ }
380
+
381
+ // Final chunk with metrics
382
+ if (chunk.done) {
383
+ state.doneReason = chunk.done_reason ?? null;
384
+ state.promptEvalCount = chunk.prompt_eval_count ?? 0;
385
+ state.evalCount = chunk.eval_count ?? 0;
386
+ state.totalDuration = chunk.total_duration ?? 0;
387
+ events.push({ type: 'message_stop', index: 0, delta: {} });
388
+ }
389
+
390
+ return events;
391
+ }
392
+
393
+ /**
394
+ * Build LLMResponse from accumulated stream state
395
+ */
396
+ export function buildResponseFromState(state: StreamState): LLMResponse {
397
+ const textContent: TextBlock[] = [];
398
+ const toolCalls: ToolCall[] = [];
399
+ let structuredData: unknown;
400
+
401
+ if (state.content) {
402
+ textContent.push({ type: 'text', text: state.content });
403
+
404
+ // Try to parse as JSON for structured output
405
+ try {
406
+ structuredData = JSON.parse(state.content);
407
+ } catch {
408
+ // Not valid JSON - that's fine
409
+ }
410
+ }
411
+
412
+ for (const tc of state.toolCalls) {
413
+ toolCalls.push({
414
+ toolCallId: tc.name,
415
+ toolName: tc.name,
416
+ arguments: tc.args,
417
+ });
418
+ }
419
+
420
+ const message = new AssistantMessage(
421
+ textContent,
422
+ toolCalls.length > 0 ? toolCalls : undefined,
423
+ {
424
+ metadata: {
425
+ ollama: {
426
+ model: state.model,
427
+ created_at: state.createdAt,
428
+ done_reason: state.doneReason,
429
+ thinking: state.thinking || undefined,
430
+ total_duration: state.totalDuration,
431
+ },
432
+ },
433
+ }
434
+ );
435
+
436
+ const usage: TokenUsage = {
437
+ inputTokens: state.promptEvalCount,
438
+ outputTokens: state.evalCount,
439
+ totalTokens: state.promptEvalCount + state.evalCount,
440
+ };
441
+
442
+ // Map done_reason to standard stop reason
443
+ let stopReason = 'end_turn';
444
+ if (state.doneReason === 'length') {
445
+ stopReason = 'max_tokens';
446
+ } else if (toolCalls.length > 0) {
447
+ stopReason = 'tool_use';
448
+ }
449
+
450
+ return {
451
+ message,
452
+ usage,
453
+ stopReason,
454
+ data: structuredData,
455
+ };
456
+ }
@@ -0,0 +1,260 @@
1
+ /**
2
+ * Ollama-specific LLM parameters
3
+ * These map to Ollama's runtime options
4
+ */
5
+ export interface OllamaLLMParams {
6
+ /** Maximum number of tokens to predict (default: -1 = infinite) */
7
+ num_predict?: number;
8
+
9
+ /** Temperature for randomness (default: 0.8) */
10
+ temperature?: number;
11
+
12
+ /** Top-p (nucleus) sampling (default: 0.9) */
13
+ top_p?: number;
14
+
15
+ /** Top-k sampling (default: 40) */
16
+ top_k?: number;
17
+
18
+ /** Minimum probability for a token to be considered (default: 0.0) */
19
+ min_p?: number;
20
+
21
+ /** Typical p sampling (default: 1.0 = disabled) */
22
+ typical_p?: number;
23
+
24
+ /** Repeat penalty (default: 1.1) */
25
+ repeat_penalty?: number;
26
+
27
+ /** Number of tokens to look back for repeat penalty (default: 64) */
28
+ repeat_last_n?: number;
29
+
30
+ /** Presence penalty (default: 0.0) */
31
+ presence_penalty?: number;
32
+
33
+ /** Frequency penalty (default: 0.0) */
34
+ frequency_penalty?: number;
35
+
36
+ /** Mirostat sampling mode (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) */
37
+ mirostat?: 0 | 1 | 2;
38
+
39
+ /** Mirostat learning rate (default: 0.1) */
40
+ mirostat_eta?: number;
41
+
42
+ /** Mirostat target entropy (default: 5.0) */
43
+ mirostat_tau?: number;
44
+
45
+ /** Penalize newlines (default: true) */
46
+ penalize_newline?: boolean;
47
+
48
+ /** Stop sequences */
49
+ stop?: string[];
50
+
51
+ /** Seed for deterministic sampling (default: random) */
52
+ seed?: number;
53
+
54
+ /** Number of tokens to keep from initial prompt (default: 4) */
55
+ num_keep?: number;
56
+
57
+ /** Context window size (default: model-dependent) */
58
+ num_ctx?: number;
59
+
60
+ /** Number of batches (default: 512) */
61
+ num_batch?: number;
62
+
63
+ /** Number of threads (default: auto) */
64
+ num_thread?: number;
65
+
66
+ /** Number of layers to offload to GPU (default: auto) */
67
+ num_gpu?: number;
68
+
69
+ /** Main GPU to use (default: 0) */
70
+ main_gpu?: number;
71
+
72
+ /** Enable low VRAM mode */
73
+ low_vram?: boolean;
74
+
75
+ /** Enable f16 KV cache */
76
+ f16_kv?: boolean;
77
+
78
+ /** Use mmap for model loading */
79
+ use_mmap?: boolean;
80
+
81
+ /** Use mlock for memory locking */
82
+ use_mlock?: boolean;
83
+
84
+ /** Vocabulary only mode */
85
+ vocab_only?: boolean;
86
+
87
+ /** NUMA support */
88
+ numa?: boolean;
89
+
90
+ /** TFS-Z sampling (default: 1.0 = disabled) */
91
+ tfs_z?: number;
92
+
93
+ /** Enable thinking mode (for models that support it) */
94
+ think?: boolean | 'high' | 'medium' | 'low';
95
+
96
+ /** Keep model loaded in memory (string duration like "5m" or number of seconds) */
97
+ keep_alive?: string | number;
98
+
99
+ /** Return log probabilities */
100
+ logprobs?: boolean;
101
+
102
+ /** Number of top log probabilities to return */
103
+ top_logprobs?: number;
104
+ }
105
+
106
+ /**
107
+ * Ollama chat message format
108
+ */
109
+ export interface OllamaMessage {
110
+ role: 'system' | 'user' | 'assistant' | 'tool';
111
+ content: string;
112
+ /** Base64 encoded images for vision models */
113
+ images?: string[];
114
+ /** Tool calls made by the assistant */
115
+ tool_calls?: OllamaToolCall[];
116
+ /** Tool name when role is 'tool' */
117
+ tool_name?: string;
118
+ }
119
+
120
+ /**
121
+ * Ollama tool call format
122
+ */
123
+ export interface OllamaToolCall {
124
+ function: {
125
+ name: string;
126
+ arguments: Record<string, unknown>;
127
+ };
128
+ }
129
+
130
+ /**
131
+ * Ollama tool definition format
132
+ */
133
+ export interface OllamaTool {
134
+ type: 'function';
135
+ function: {
136
+ name: string;
137
+ description: string;
138
+ parameters: {
139
+ type: 'object';
140
+ properties: Record<string, unknown>;
141
+ required?: string[];
142
+ };
143
+ };
144
+ }
145
+
146
+ /**
147
+ * Ollama API request body for chat endpoint
148
+ */
149
+ export interface OllamaRequest {
150
+ model: string;
151
+ messages: OllamaMessage[];
152
+ stream?: boolean;
153
+ format?: 'json' | Record<string, unknown>;
154
+ options?: OllamaOptions;
155
+ tools?: OllamaTool[];
156
+ keep_alive?: string | number;
157
+ think?: boolean | 'high' | 'medium' | 'low';
158
+ logprobs?: boolean;
159
+ top_logprobs?: number;
160
+ }
161
+
162
+ /**
163
+ * Ollama runtime options (passed in options field)
164
+ */
165
+ export interface OllamaOptions {
166
+ num_predict?: number;
167
+ temperature?: number;
168
+ top_p?: number;
169
+ top_k?: number;
170
+ min_p?: number;
171
+ typical_p?: number;
172
+ repeat_penalty?: number;
173
+ repeat_last_n?: number;
174
+ presence_penalty?: number;
175
+ frequency_penalty?: number;
176
+ mirostat?: 0 | 1 | 2;
177
+ mirostat_eta?: number;
178
+ mirostat_tau?: number;
179
+ penalize_newline?: boolean;
180
+ stop?: string[];
181
+ seed?: number;
182
+ num_keep?: number;
183
+ num_ctx?: number;
184
+ num_batch?: number;
185
+ num_thread?: number;
186
+ num_gpu?: number;
187
+ main_gpu?: number;
188
+ low_vram?: boolean;
189
+ f16_kv?: boolean;
190
+ use_mmap?: boolean;
191
+ use_mlock?: boolean;
192
+ vocab_only?: boolean;
193
+ numa?: boolean;
194
+ tfs_z?: number;
195
+ }
196
+
197
+ /**
198
+ * Ollama API response format
199
+ */
200
+ export interface OllamaResponse {
201
+ model: string;
202
+ created_at: string;
203
+ message: OllamaResponseMessage;
204
+ done: boolean;
205
+ done_reason?: 'stop' | 'length' | 'load' | 'unload';
206
+ total_duration?: number;
207
+ load_duration?: number;
208
+ prompt_eval_count?: number;
209
+ prompt_eval_duration?: number;
210
+ eval_count?: number;
211
+ eval_duration?: number;
212
+ logprobs?: OllamaLogprob[];
213
+ }
214
+
215
+ /**
216
+ * Ollama response message format
217
+ */
218
+ export interface OllamaResponseMessage {
219
+ role: 'assistant';
220
+ content: string;
221
+ /** Thinking content (if think mode enabled) */
222
+ thinking?: string;
223
+ /** Tool calls requested by the model */
224
+ tool_calls?: OllamaToolCall[];
225
+ /** Images (for multimodal responses) */
226
+ images?: string[];
227
+ }
228
+
229
+ /**
230
+ * Ollama log probability format
231
+ */
232
+ export interface OllamaLogprob {
233
+ token: string;
234
+ logprob: number;
235
+ bytes?: number[];
236
+ top_logprobs?: Array<{
237
+ token: string;
238
+ logprob: number;
239
+ bytes?: number[];
240
+ }>;
241
+ }
242
+
243
+ /**
244
+ * Ollama streaming response chunk
245
+ * Same structure as regular response but partial
246
+ */
247
+ export interface OllamaStreamChunk {
248
+ model: string;
249
+ created_at: string;
250
+ message: OllamaResponseMessage;
251
+ done: boolean;
252
+ done_reason?: 'stop' | 'length' | 'load' | 'unload';
253
+ total_duration?: number;
254
+ load_duration?: number;
255
+ prompt_eval_count?: number;
256
+ prompt_eval_duration?: number;
257
+ eval_count?: number;
258
+ eval_duration?: number;
259
+ logprobs?: OllamaLogprob[];
260
+ }