kader 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ """
2
+ Ollama LLM Provider implementation.
3
+
4
+ Provides synchronous and asynchronous access to Ollama models.
5
+ """
6
+
7
+ from typing import AsyncIterator, Iterator
8
+
9
+ from ollama import AsyncClient, Client
10
+ from ollama._types import Options
11
+
12
+ from .base import (
13
+ BaseLLMProvider,
14
+ CostInfo,
15
+ LLMResponse,
16
+ Message,
17
+ ModelConfig,
18
+ ModelInfo,
19
+ StreamChunk,
20
+ Usage,
21
+ )
22
+
23
+
24
+ class OllamaProvider(BaseLLMProvider):
25
+ """
26
+ Ollama LLM Provider.
27
+
28
+ Provides access to locally-running Ollama models with full support
29
+ for synchronous and asynchronous operations, including streaming.
30
+
31
+ Example:
32
+ provider = OllamaProvider(model="llama3.2")
33
+ response = provider.invoke([Message.user("Hello!")])
34
+ print(response.content)
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ model: str,
40
+ host: str | None = None,
41
+ default_config: ModelConfig | None = None,
42
+ ) -> None:
43
+ """
44
+ Initialize the Ollama provider.
45
+
46
+ Args:
47
+ model: The Ollama model identifier (e.g., "llama3.2", "gpt-oss:120b-cloud")
48
+ host: Optional Ollama server host (default: http://localhost:11434)
49
+ default_config: Default configuration for all requests
50
+ """
51
+ super().__init__(model=model, default_config=default_config)
52
+ self._host = host
53
+ self._client = Client(host=host) if host else Client()
54
+ self._async_client = AsyncClient(host=host) if host else AsyncClient()
55
+
56
+ def _convert_messages(self, messages: list[Message]) -> list[dict]:
57
+ """Convert Message objects to Ollama format."""
58
+ return [msg.to_dict() for msg in messages]
59
+
60
+ def _convert_config_to_options(self, config: ModelConfig) -> Options:
61
+ """Convert ModelConfig to Ollama Options."""
62
+ return Options(
63
+ temperature=config.temperature if config.temperature != 1.0 else None,
64
+ num_predict=config.max_tokens,
65
+ top_p=config.top_p if config.top_p != 1.0 else None,
66
+ top_k=config.top_k,
67
+ frequency_penalty=config.frequency_penalty
68
+ if config.frequency_penalty != 0.0
69
+ else None,
70
+ presence_penalty=config.presence_penalty
71
+ if config.presence_penalty != 0.0
72
+ else None,
73
+ stop=config.stop_sequences,
74
+ seed=config.seed,
75
+ )
76
+
77
+ def _parse_response(self, response) -> LLMResponse:
78
+ """Parse Ollama ChatResponse to LLMResponse."""
79
+ # Extract usage information
80
+ usage = Usage(
81
+ prompt_tokens=getattr(response, "prompt_eval_count", 0) or 0,
82
+ completion_tokens=getattr(response, "eval_count", 0) or 0,
83
+ )
84
+
85
+ # Extract content from message
86
+ content = ""
87
+ tool_calls = None
88
+ if hasattr(response, "message"):
89
+ content = response.message.content or ""
90
+ if hasattr(response.message, "tool_calls") and response.message.tool_calls:
91
+ tool_calls = [
92
+ {
93
+ "id": f"call_{i}",
94
+ "type": "function",
95
+ "function": {
96
+ "name": tc.function.name,
97
+ "arguments": tc.function.arguments,
98
+ },
99
+ }
100
+ for i, tc in enumerate(response.message.tool_calls)
101
+ ]
102
+
103
+ # Determine finish reason
104
+ finish_reason = "stop"
105
+ if getattr(response, "done_reason", None):
106
+ done_reason = response.done_reason
107
+ if done_reason == "stop":
108
+ finish_reason = "stop"
109
+ elif done_reason == "length":
110
+ finish_reason = "length"
111
+
112
+ return LLMResponse(
113
+ content=content,
114
+ model=getattr(response, "model", self._model),
115
+ usage=usage,
116
+ finish_reason=finish_reason,
117
+ tool_calls=tool_calls,
118
+ raw_response=response,
119
+ created=getattr(response, "created_at", None),
120
+ )
121
+
122
+ def _parse_stream_chunk(self, chunk, accumulated_content: str) -> StreamChunk:
123
+ """Parse streaming chunk to StreamChunk."""
124
+ delta = ""
125
+ if hasattr(chunk, "message") and chunk.message.content:
126
+ delta = chunk.message.content
127
+
128
+ usage = None
129
+ if getattr(chunk, "done", False):
130
+ usage = Usage(
131
+ prompt_tokens=getattr(chunk, "prompt_eval_count", 0) or 0,
132
+ completion_tokens=getattr(chunk, "eval_count", 0) or 0,
133
+ )
134
+
135
+ finish_reason = None
136
+ if getattr(chunk, "done", False):
137
+ finish_reason = "stop"
138
+ done_reason = getattr(chunk, "done_reason", None)
139
+ if done_reason == "length":
140
+ finish_reason = "length"
141
+
142
+ return StreamChunk(
143
+ content=accumulated_content + delta,
144
+ delta=delta,
145
+ finish_reason=finish_reason,
146
+ usage=usage,
147
+ )
148
+
149
+ # -------------------------------------------------------------------------
150
+ # Synchronous Methods
151
+ # -------------------------------------------------------------------------
152
+
153
+ def invoke(
154
+ self,
155
+ messages: list[Message],
156
+ config: ModelConfig | None = None,
157
+ ) -> LLMResponse:
158
+ """
159
+ Synchronously invoke the Ollama model.
160
+
161
+ Args:
162
+ messages: List of messages in the conversation
163
+ config: Optional configuration overrides
164
+
165
+ Returns:
166
+ LLMResponse with the model's response
167
+ """
168
+ merged_config = self._merge_config(config)
169
+ options = self._convert_config_to_options(merged_config)
170
+
171
+ # Handle response format properly for Ollama
172
+ format_param = None
173
+ if merged_config.response_format:
174
+ resp_format_type = merged_config.response_format.get("type")
175
+ if resp_format_type == "json_object":
176
+ format_param = "json"
177
+ elif resp_format_type == "text":
178
+ format_param = "" # Default, no special format
179
+ # For 'object' type and other types, don't set format (Ollama doesn't support all OpenAI formats)
180
+
181
+ response = self._client.chat(
182
+ model=self._model,
183
+ messages=self._convert_messages(messages),
184
+ options=options,
185
+ tools=merged_config.tools,
186
+ format=format_param,
187
+ stream=False,
188
+ )
189
+
190
+ llm_response = self._parse_response(response)
191
+ self._update_tracking(llm_response)
192
+ return llm_response
193
+
194
+ def stream(
195
+ self,
196
+ messages: list[Message],
197
+ config: ModelConfig | None = None,
198
+ ) -> Iterator[StreamChunk]:
199
+ """
200
+ Synchronously stream the Ollama model response.
201
+
202
+ Args:
203
+ messages: List of messages in the conversation
204
+ config: Optional configuration overrides
205
+
206
+ Yields:
207
+ StreamChunk objects as they arrive
208
+ """
209
+ merged_config = self._merge_config(config)
210
+ options = self._convert_config_to_options(merged_config)
211
+
212
+ # Handle response format properly for Ollama
213
+ format_param = None
214
+ if merged_config.response_format:
215
+ resp_format_type = merged_config.response_format.get("type")
216
+ if resp_format_type == "json_object":
217
+ format_param = "json"
218
+ elif resp_format_type == "text":
219
+ format_param = "" # Default, no special format
220
+ # For 'object' type and other types, don't set format (Ollama doesn't support all OpenAI formats)
221
+
222
+ response_stream = self._client.chat(
223
+ model=self._model,
224
+ messages=self._convert_messages(messages),
225
+ options=options,
226
+ tools=merged_config.tools,
227
+ format=format_param,
228
+ stream=True,
229
+ )
230
+
231
+ accumulated_content = ""
232
+ for chunk in response_stream:
233
+ stream_chunk = self._parse_stream_chunk(chunk, accumulated_content)
234
+ accumulated_content = stream_chunk.content
235
+ yield stream_chunk
236
+
237
+ # Update tracking on final chunk
238
+ if stream_chunk.is_final and stream_chunk.usage:
239
+ final_response = LLMResponse(
240
+ content=accumulated_content,
241
+ model=self._model,
242
+ usage=stream_chunk.usage,
243
+ finish_reason=stream_chunk.finish_reason,
244
+ )
245
+ self._update_tracking(final_response)
246
+
247
+ # -------------------------------------------------------------------------
248
+ # Asynchronous Methods
249
+ # -------------------------------------------------------------------------
250
+
251
+ async def ainvoke(
252
+ self,
253
+ messages: list[Message],
254
+ config: ModelConfig | None = None,
255
+ ) -> LLMResponse:
256
+ """
257
+ Asynchronously invoke the Ollama model.
258
+
259
+ Args:
260
+ messages: List of messages in the conversation
261
+ config: Optional configuration overrides
262
+
263
+ Returns:
264
+ LLMResponse with the model's response
265
+ """
266
+ merged_config = self._merge_config(config)
267
+ options = self._convert_config_to_options(merged_config)
268
+
269
+ # Handle response format properly for Ollama
270
+ format_param = None
271
+ if merged_config.response_format:
272
+ resp_format_type = merged_config.response_format.get("type")
273
+ if resp_format_type == "json_object":
274
+ format_param = "json"
275
+ elif resp_format_type == "text":
276
+ format_param = "" # Default, no special format
277
+ # For 'object' type and other types, don't set format (Ollama doesn't support all OpenAI formats)
278
+
279
+ response = await self._async_client.chat(
280
+ model=self._model,
281
+ messages=self._convert_messages(messages),
282
+ options=options,
283
+ tools=merged_config.tools,
284
+ format=format_param,
285
+ stream=False,
286
+ )
287
+
288
+ llm_response = self._parse_response(response)
289
+ self._update_tracking(llm_response)
290
+ return llm_response
291
+
292
+ async def astream(
293
+ self,
294
+ messages: list[Message],
295
+ config: ModelConfig | None = None,
296
+ ) -> AsyncIterator[StreamChunk]:
297
+ """
298
+ Asynchronously stream the Ollama model response.
299
+
300
+ Args:
301
+ messages: List of messages in the conversation
302
+ config: Optional configuration overrides
303
+
304
+ Yields:
305
+ StreamChunk objects as they arrive
306
+ """
307
+ merged_config = self._merge_config(config)
308
+ options = self._convert_config_to_options(merged_config)
309
+
310
+ # Handle response format properly for Ollama
311
+ format_param = None
312
+ if merged_config.response_format:
313
+ resp_format_type = merged_config.response_format.get("type")
314
+ if resp_format_type == "json_object":
315
+ format_param = "json"
316
+ elif resp_format_type == "text":
317
+ format_param = "" # Default, no special format
318
+ # For 'object' type and other types, don't set format (Ollama doesn't support all OpenAI formats)
319
+
320
+ response_stream = await self._async_client.chat(
321
+ model=self._model,
322
+ messages=self._convert_messages(messages),
323
+ options=options,
324
+ tools=merged_config.tools,
325
+ format=format_param,
326
+ stream=True,
327
+ )
328
+
329
+ accumulated_content = ""
330
+ async for chunk in response_stream:
331
+ stream_chunk = self._parse_stream_chunk(chunk, accumulated_content)
332
+ accumulated_content = stream_chunk.content
333
+ yield stream_chunk
334
+
335
+ # Update tracking on final chunk
336
+ if stream_chunk.is_final and stream_chunk.usage:
337
+ final_response = LLMResponse(
338
+ content=accumulated_content,
339
+ model=self._model,
340
+ usage=stream_chunk.usage,
341
+ finish_reason=stream_chunk.finish_reason,
342
+ )
343
+ self._update_tracking(final_response)
344
+
345
+ # -------------------------------------------------------------------------
346
+ # Token & Cost Methods
347
+ # -------------------------------------------------------------------------
348
+
349
+ def count_tokens(
350
+ self,
351
+ text: str | list[Message],
352
+ ) -> int:
353
+ """
354
+ Estimate token count for text or messages.
355
+
356
+ Note: Ollama doesn't provide a direct tokenization API,
357
+ so this is an approximation based on character count.
358
+
359
+ Args:
360
+ text: A string or list of messages to count tokens for
361
+
362
+ Returns:
363
+ Estimated number of tokens (approx 4 chars per token)
364
+ """
365
+ if isinstance(text, str):
366
+ # Rough estimate: ~4 characters per token
367
+ return len(text) // 4
368
+ else:
369
+ total_chars = sum(len(msg.content) for msg in text)
370
+ return total_chars // 4
371
+
372
+ def estimate_cost(
373
+ self,
374
+ usage: Usage,
375
+ ) -> CostInfo:
376
+ """
377
+ Estimate cost for usage.
378
+
379
+ Note: Ollama runs locally, so there's no API cost.
380
+
381
+ Args:
382
+ usage: Token usage information
383
+
384
+ Returns:
385
+ CostInfo with zero cost (Ollama is free/local)
386
+ """
387
+ return CostInfo(
388
+ input_cost=0.0,
389
+ output_cost=0.0,
390
+ total_cost=0.0,
391
+ currency="USD",
392
+ )
393
+
394
+ # -------------------------------------------------------------------------
395
+ # Utility Methods
396
+ # -------------------------------------------------------------------------
397
+
398
+ def get_model_info(self) -> ModelInfo | None:
399
+ """Get information about the current model."""
400
+ try:
401
+ info = self._client.show(self._model)
402
+ return ModelInfo(
403
+ name=self._model,
404
+ provider="ollama",
405
+ context_window=info.get("model_info", {}).get("context_length", 4096),
406
+ max_output_tokens=info.get("model_info", {}).get("max_output_tokens"),
407
+ supports_tools=True,
408
+ supports_streaming=True,
409
+ capabilities={
410
+ "family": info.get("details", {}).get("family"),
411
+ "parameter_size": info.get("details", {}).get("parameter_size"),
412
+ "quantization": info.get("details", {}).get("quantization_level"),
413
+ },
414
+ )
415
+ except Exception:
416
+ return None
417
+
418
+ @classmethod
419
+ def get_supported_models(cls, host: str | None = None) -> list[str]:
420
+ """
421
+ Get list of models available on the Ollama server.
422
+
423
+ Args:
424
+ host: Optional Ollama server host
425
+
426
+ Returns:
427
+ List of available model names
428
+ """
429
+ try:
430
+ client = Client(host=host) if host else Client()
431
+ response = client.list()
432
+ models = [model.model for model in response.models]
433
+ models_config = {}
434
+ for model in models:
435
+ models_config[model] = client.show(model)
436
+ return [
437
+ model
438
+ for model, config in models_config.items()
439
+ if config.capabilities
440
+ in [["completion", "tools", "thinking"], ["completion", "tools"]]
441
+ ]
442
+ except Exception:
443
+ return []
444
+
445
+ def list_models(self) -> list[str]:
446
+ """List all available models on the Ollama server."""
447
+ return self.get_supported_models(self._host)