lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
  3. lollms_client/llm_bindings/claude/__init__.py +125 -35
  4. lollms_client/llm_bindings/gemini/__init__.py +261 -159
  5. lollms_client/llm_bindings/grok/__init__.py +52 -15
  6. lollms_client/llm_bindings/groq/__init__.py +2 -2
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
  8. lollms_client/llm_bindings/litellm/__init__.py +1 -1
  9. lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
  10. lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
  11. lollms_client/llm_bindings/lollms/__init__.py +76 -21
  12. lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
  13. lollms_client/llm_bindings/mistral/__init__.py +2 -2
  14. lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
  15. lollms_client/llm_bindings/ollama/__init__.py +345 -89
  16. lollms_client/llm_bindings/open_router/__init__.py +2 -2
  17. lollms_client/llm_bindings/openai/__init__.py +81 -20
  18. lollms_client/llm_bindings/openllm/__init__.py +362 -506
  19. lollms_client/llm_bindings/openwebui/__init__.py +333 -171
  20. lollms_client/llm_bindings/perplexity/__init__.py +2 -2
  21. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
  22. lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
  23. lollms_client/llm_bindings/transformers/__init__.py +428 -632
  24. lollms_client/llm_bindings/vllm/__init__.py +1 -1
  25. lollms_client/lollms_agentic.py +4 -2
  26. lollms_client/lollms_base_binding.py +61 -0
  27. lollms_client/lollms_core.py +512 -1890
  28. lollms_client/lollms_discussion.py +65 -39
  29. lollms_client/lollms_llm_binding.py +126 -261
  30. lollms_client/lollms_mcp_binding.py +49 -77
  31. lollms_client/lollms_stt_binding.py +99 -52
  32. lollms_client/lollms_tti_binding.py +38 -38
  33. lollms_client/lollms_ttm_binding.py +38 -42
  34. lollms_client/lollms_tts_binding.py +43 -18
  35. lollms_client/lollms_ttv_binding.py +38 -42
  36. lollms_client/lollms_types.py +4 -2
  37. lollms_client/stt_bindings/whisper/__init__.py +108 -23
  38. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  39. lollms_client/tti_bindings/diffusers/__init__.py +464 -803
  40. lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
  41. lollms_client/tti_bindings/gemini/__init__.py +182 -239
  42. lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
  43. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  44. lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
  45. lollms_client/tti_bindings/openai/__init__.py +10 -11
  46. lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
  47. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  48. lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
  49. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  50. lollms_client/ttm_bindings/replicate/__init__.py +7 -4
  51. lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
  52. lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
  53. lollms_client/tts_bindings/bark/__init__.py +7 -10
  54. lollms_client/tts_bindings/lollms/__init__.py +6 -1
  55. lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
  56. lollms_client/tts_bindings/xtts/__init__.py +157 -74
  57. lollms_client/tts_bindings/xtts/server/main.py +241 -280
  58. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
  59. lollms_client-1.7.13.dist-info/RECORD +90 -0
  60. lollms_client-1.5.6.dist-info/RECORD +0 -87
  61. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
  62. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
  63. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0
@@ -7,13 +7,18 @@ from lollms_client.lollms_types import MSG_TYPE
7
7
  # from lollms_client.lollms_utilities import encode_image
8
8
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
9
9
  from lollms_client.lollms_discussion import LollmsDiscussion
10
- from typing import Optional, Callable, List, Union, Dict
10
+ from typing import Optional, Callable, List, Union, Dict, Any
11
11
 
12
12
  from ascii_colors import ASCIIColors, trace_exception
13
13
  import pipmaster as pm
14
14
  from lollms_client.lollms_utilities import ImageTokenizer
15
15
  pm.ensure_packages(["ollama","pillow","tiktoken"])
16
16
  import re
17
+ import platform
18
+ import subprocess
19
+ import urllib.request
20
+ import zipfile
21
+ import os
17
22
 
18
23
  import ollama
19
24
  import tiktoken
@@ -57,7 +62,9 @@ def count_tokens_ollama(
57
62
  res = ollama_client.chat(
58
63
  model=model_name,
59
64
  messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
60
- stream=False,options={"num_predict":1}
65
+ stream=False,
66
+ think=False,
67
+ options={"num_predict":1}
61
68
  )
62
69
 
63
70
  return res.prompt_eval_count-5
@@ -108,24 +115,28 @@ class OllamaBinding(LollmsLLMBinding):
108
115
  raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
109
116
 
110
117
  def generate_text(self,
111
- prompt: str,
112
- images: Optional[List[str]] = None,
113
- system_prompt: str = "",
114
- n_predict: Optional[int] = None,
115
- stream: Optional[bool] = None,
116
- temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
117
- top_k: int = 40, # Ollama default is 40
118
- top_p: float = 0.9, # Ollama default is 0.9
119
- repeat_penalty: float = 1.1, # Ollama default is 1.1
120
- repeat_last_n: int = 64, # Ollama default is 64
121
- seed: Optional[int] = None,
122
- n_threads: Optional[int] = None,
123
- ctx_size: int | None = None,
124
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
125
- split:Optional[bool]=False, # put to true if the prompt is a discussion
126
- user_keyword:Optional[str]="!@>user:",
127
- ai_keyword:Optional[str]="!@>assistant:",
128
- ) -> Union[str, dict]:
118
+ prompt: str,
119
+ images: Optional[List[str]] = None,
120
+ system_prompt: str = "",
121
+ n_predict: Optional[int] = None,
122
+ stream: Optional[bool] = None,
123
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
124
+ top_k: int = 40, # Ollama default is 40
125
+ top_p: float = 0.9, # Ollama default is 0.9
126
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
127
+ repeat_last_n: int = 64, # Ollama default is 64
128
+ seed: Optional[int] = None,
129
+ n_threads: Optional[int] = None,
130
+ ctx_size: int | None = None,
131
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
132
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
133
+ user_keyword:Optional[str]="!@>user:",
134
+ ai_keyword:Optional[str]="!@>assistant:",
135
+ think: Optional[bool] = False,
136
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
137
+ reasoning_summary: Optional[bool] = "auto", # auto
138
+ **kwargs
139
+ ) -> Union[str, dict]:
129
140
  """
130
141
  Generate text using the active LLM binding, using instance defaults if parameters are not provided.
131
142
 
@@ -168,6 +179,8 @@ class OllamaBinding(LollmsLLMBinding):
168
179
  if ctx_size is not None: options['num_ctx'] = ctx_size
169
180
 
170
181
  full_response_text = ""
182
+ think = think if "gpt-oss" not in self.model_name else reasoning_effort
183
+ ASCIIColors.magenta(f"Generation with think: {think}")
171
184
 
172
185
  try:
173
186
  if images: # Multimodal
@@ -176,6 +189,8 @@ class OllamaBinding(LollmsLLMBinding):
176
189
  for img_path in images:
177
190
  # Assuming img_path is a file path. ollama-python will read and encode it.
178
191
  # If images were base64 strings, they would need decoding to bytes first.
192
+ if img_path.startswith("data:image/png;base64,"):
193
+ img_path = img_path[len("data:image/png;base64,"):]
179
194
  processed_images.append(img_path)
180
195
 
181
196
  messages = [
@@ -192,24 +207,37 @@ class OllamaBinding(LollmsLLMBinding):
192
207
  model=self.model_name,
193
208
  messages=messages,
194
209
  stream=True,
210
+ think=think,
195
211
  options=options if options else None
196
212
  )
197
- for chunk_dict in response_stream:
198
- chunk_content = chunk_dict.get('message', {}).get('content', '')
199
- if chunk_content: # Ensure there is content to process
213
+ in_thinking = False
214
+ for chunk in response_stream:
215
+ if chunk.message.thinking and not in_thinking:
216
+ full_response_text += "<think>\n"
217
+ in_thinking = True
218
+
219
+ if chunk.message.content:# Ensure there is content to process
220
+ chunk_content = chunk.message.content
221
+ if in_thinking:
222
+ full_response_text += "\n</think>\n"
223
+ in_thinking = False
200
224
  full_response_text += chunk_content
201
225
  if streaming_callback:
202
226
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
203
227
  break # Callback requested stop
204
228
  return full_response_text
205
229
  else: # Not streaming
206
- response_dict = self.ollama_client.chat(
230
+ response = self.ollama_client.chat(
207
231
  model=self.model_name,
208
232
  messages=messages,
209
233
  stream=False,
234
+ think=think,
210
235
  options=options if options else None
211
236
  )
212
- return response_dict.get('message', {}).get('content', '')
237
+ full_response_text = response.message.content
238
+ if think:
239
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
240
+ return full_response_text
213
241
  else: # Text-only
214
242
  messages = [
215
243
  {'role': 'system', 'content':system_prompt},
@@ -224,24 +252,38 @@ class OllamaBinding(LollmsLLMBinding):
224
252
  model=self.model_name,
225
253
  messages=messages,
226
254
  stream=True,
255
+ think=think,
227
256
  options=options if options else None
228
257
  )
229
- for chunk_dict in response_stream:
230
- chunk_content = chunk_dict.message.content
231
- if chunk_content:
258
+ in_thinking = False
259
+ for chunk in response_stream:
260
+ if chunk.message.thinking and not in_thinking:
261
+ full_response_text += "<think>\n"
262
+ in_thinking = True
263
+
264
+ if chunk.message.content:# Ensure there is content to process
265
+ chunk_content = chunk.message.content
266
+ if in_thinking:
267
+ full_response_text += "\n</think>\n"
268
+ in_thinking = False
232
269
  full_response_text += chunk_content
233
270
  if streaming_callback:
234
271
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
235
- break
272
+ break # Callback requested stop
236
273
  return full_response_text
237
274
  else: # Not streaming
238
- response_dict = self.ollama_client.chat(
275
+ response = self.ollama_client.chat(
239
276
  model=self.model_name,
240
277
  messages=messages,
241
278
  stream=False,
279
+ think=think,
242
280
  options=options if options else None
243
281
  )
244
- return response_dict.message.content
282
+ full_response_text = response.message.content
283
+ if think:
284
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
285
+ return full_response_text
286
+
245
287
  except ollama.ResponseError as e:
246
288
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
247
289
  ASCIIColors.error(error_message)
@@ -268,6 +310,9 @@ class OllamaBinding(LollmsLLMBinding):
268
310
  n_threads: Optional[int] = None,
269
311
  ctx_size: int | None = None,
270
312
  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
313
+ think: Optional[bool] = False,
314
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
315
+ reasoning_summary: Optional[bool] = "auto", # auto
271
316
  **kwargs
272
317
  ) -> Union[str, dict]:
273
318
  if not self.ollama_client:
@@ -296,12 +341,23 @@ class OllamaBinding(LollmsLLMBinding):
296
341
  for item in content:
297
342
  if item.get("type") == "text":
298
343
  text_parts.append(item.get("text", ""))
299
- elif item.get("type") == "input_image":
344
+ elif item.get("type") == "input_image" or item.get("type") == "image_url":
300
345
  base64_data = item.get("image_url")
301
346
  if base64_data:
302
- # ⚠️ remove prefix "data:image/...;base64,"
303
- cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
304
- images.append(cleaned)
347
+ if isinstance(base64_data, str):
348
+ # ⚠️ remove prefix "data:image/...;base64,"
349
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
350
+ images.append(cleaned)
351
+ elif base64_data and isinstance(base64_data, dict) :
352
+ if "base64" in base64_data:
353
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["base64"])
354
+ images.append(cleaned)
355
+ elif "url" in base64_data :
356
+ if "http" in base64_data["url"]:
357
+ images.append(base64_data["url"])
358
+ else:
359
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["url"])
360
+ images.append(cleaned)
305
361
 
306
362
 
307
363
  return {
@@ -333,6 +389,7 @@ class OllamaBinding(LollmsLLMBinding):
333
389
  model=self.model_name,
334
390
  messages=ollama_messages,
335
391
  stream=True,
392
+ think = think,
336
393
  options=options if options else None
337
394
  )
338
395
  for chunk_dict in response_stream:
@@ -344,13 +401,17 @@ class OllamaBinding(LollmsLLMBinding):
344
401
  break
345
402
  return full_response_text
346
403
  else:
347
- response_dict = self.ollama_client.chat(
404
+ response = self.ollama_client.chat(
348
405
  model=self.model_name,
349
406
  messages=ollama_messages,
350
407
  stream=False,
408
+ think=think if "gpt-oss" not in self.model_name else reasoning_effort,
351
409
  options=options if options else None
352
410
  )
353
- return response_dict.get('message', {}).get('content', '')
411
+ full_response_text = response.message.content
412
+ if think:
413
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
414
+ return full_response_text
354
415
 
355
416
  except ollama.ResponseError as e:
356
417
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -364,37 +425,28 @@ class OllamaBinding(LollmsLLMBinding):
364
425
  error_message = f"An unexpected error occurred: {str(ex)}"
365
426
  trace_exception(ex)
366
427
  return {"status": False, "error": error_message}
367
-
368
-
369
- except ollama.ResponseError as e:
370
- error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
371
- ASCIIColors.error(error_message)
372
- return {"status": False, "error": error_message, "status_code": e.status_code}
373
- except ollama.RequestError as e: # Covers connection errors, timeouts during request
374
- error_message = f"Ollama API RequestError: {str(e)}"
375
- ASCIIColors.error(error_message)
376
- return {"status": False, "error": error_message}
377
- except Exception as ex:
378
- error_message = f"An unexpected error occurred: {str(ex)}"
379
- trace_exception(ex)
380
- return {"status": False, "error": error_message}
381
428
 
382
429
 
383
430
  def chat(self,
384
- discussion: LollmsDiscussion,
385
- branch_tip_id: Optional[str] = None,
386
- n_predict: Optional[int] = None,
387
- stream: Optional[bool] = None,
388
- temperature: float = 0.7,
389
- top_k: int = 40,
390
- top_p: float = 0.9,
391
- repeat_penalty: float = 1.1,
392
- repeat_last_n: int = 64,
393
- seed: Optional[int] = None,
394
- n_threads: Optional[int] = None,
395
- ctx_size: Optional[int] = None,
396
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
397
- ) -> Union[str, dict]:
431
+ discussion: LollmsDiscussion,
432
+ branch_tip_id: Optional[str] = None,
433
+ n_predict: Optional[int] = None,
434
+ stream: Optional[bool] = None,
435
+ temperature: float = 0.7,
436
+ top_k: int = 40,
437
+ top_p: float = 0.9,
438
+ repeat_penalty: float = 1.1,
439
+ repeat_last_n: int = 64,
440
+ seed: Optional[int] = None,
441
+ n_threads: Optional[int] = None,
442
+ ctx_size: Optional[int] = None,
443
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
444
+ think: Optional[bool] = False,
445
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
446
+ reasoning_summary: Optional[bool] = "auto", # auto
447
+ **kwargs
448
+
449
+ ) -> Union[str, dict]:
398
450
  """
399
451
  Conduct a chat session with the Ollama model using a LollmsDiscussion object.
400
452
 
@@ -439,6 +491,8 @@ class OllamaBinding(LollmsLLMBinding):
439
491
  options = {k: v for k, v in options.items() if v is not None}
440
492
 
441
493
  full_response_text = ""
494
+ think = think if "gpt-oss" not in self.model_name else reasoning_effort
495
+ ASCIIColors.magenta(f"Generation with think: {think}")
442
496
 
443
497
  try:
444
498
  # 3. Call the Ollama API
@@ -447,24 +501,38 @@ class OllamaBinding(LollmsLLMBinding):
447
501
  model=self.model_name,
448
502
  messages=messages,
449
503
  stream=True,
504
+ think=think,
450
505
  options=options if options else None
451
506
  )
507
+ in_thinking = False
452
508
  for chunk in response_stream:
453
- chunk_content = chunk.get('message', {}).get('content', '')
454
- if chunk_content:
509
+ if chunk.message.thinking and not in_thinking:
510
+ full_response_text += "<think>\n"
511
+ in_thinking = True
512
+
513
+ if chunk.message.content:# Ensure there is content to process
514
+ chunk_content = chunk.message.content
515
+ if in_thinking:
516
+ full_response_text += "\n</think>\n"
517
+ in_thinking = False
455
518
  full_response_text += chunk_content
456
519
  if streaming_callback:
457
520
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
458
- break
521
+ break # Callback requested stop
522
+
459
523
  return full_response_text
460
524
  else: # Not streaming
461
- response_dict = self.ollama_client.chat(
525
+ response = self.ollama_client.chat(
462
526
  model=self.model_name,
463
527
  messages=messages,
464
528
  stream=False,
529
+ think=think,
465
530
  options=options if options else None
466
531
  )
467
- return response_dict.get('message', {}).get('content', '')
532
+ full_response_text = response.message.content
533
+ if think:
534
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
535
+ return full_response_text
468
536
 
469
537
  except ollama.ResponseError as e:
470
538
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -595,7 +663,182 @@ class OllamaBinding(LollmsLLMBinding):
595
663
  "supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
596
664
  }
597
665
 
598
- def listModels(self) -> List[Dict[str, str]]:
666
+ def pull_model(self, model_name: str, progress_callback: Callable[[dict], None] = None, **kwargs) -> dict:
667
+ """
668
+ Pulls a model from the Ollama library.
669
+
670
+ Args:
671
+ model_name (str): The name of the model to pull.
672
+ progress_callback (Callable[[dict], None], optional): A callback function that receives progress updates.
673
+ The dict typically contains 'status', 'completed', 'total'.
674
+
675
+ Returns:
676
+ dict: Dictionary with status (bool) and message (str).
677
+ """
678
+ if not self.ollama_client:
679
+ msg = "Ollama client not initialized. Cannot pull model."
680
+ ASCIIColors.error(msg)
681
+ return {"status": False, "message": msg}
682
+
683
+ try:
684
+ ASCIIColors.info(f"Pulling model {model_name}...")
685
+ # Stream the pull progress
686
+ for progress in self.ollama_client.pull(model_name, stream=True):
687
+ # Send raw progress to callback if provided
688
+ if progress_callback:
689
+ progress_callback(progress)
690
+
691
+ # Default console logging
692
+ status = progress.get('status', '')
693
+ completed = progress.get('completed')
694
+ total = progress.get('total')
695
+
696
+ if completed and total:
697
+ percent = (completed / total) * 100
698
+ print(f"\r{status}: {percent:.2f}%", end="", flush=True)
699
+ else:
700
+ print(f"\r{status}", end="", flush=True)
701
+
702
+ print() # Clear line
703
+ msg = f"Model {model_name} pulled successfully."
704
+ ASCIIColors.success(msg)
705
+ return {"status": True, "message": msg}
706
+
707
+ except ollama.ResponseError as e:
708
+ msg = f"Ollama API Pull Error: {e.error or 'Unknown error'} (status code: {e.status_code})"
709
+ ASCIIColors.error(msg)
710
+ return {"status": False, "message": msg}
711
+ except ollama.RequestError as e:
712
+ msg = f"Ollama API Request Error: {str(e)}"
713
+ ASCIIColors.error(msg)
714
+ return {"status": False, "message": msg}
715
+ except Exception as ex:
716
+ msg = f"An unexpected error occurred while pulling model: {str(ex)}"
717
+ ASCIIColors.error(msg)
718
+ trace_exception(ex)
719
+ return {"status": False, "message": msg}
720
+
721
+ def get_zoo(self) -> List[Dict[str, Any]]:
722
+ """
723
+ Returns a list of models available for download.
724
+ each entry is a dict with:
725
+ name, description, size, type, link
726
+ """
727
+ return [
728
+ {"name": "Llama3 8B", "description": "Meta's Llama 3 8B model. Good for general purpose chat.", "size": "4.7GB", "type": "model", "link": "llama3"},
729
+ {"name": "Llama3 70B", "description": "Meta's Llama 3 70B model. High capability.", "size": "40GB", "type": "model", "link": "llama3:70b"},
730
+ {"name": "Phi-3 Mini", "description": "Microsoft's Phi-3 Mini 3.8B model. Lightweight and capable.", "size": "2.3GB", "type": "model", "link": "phi3"},
731
+ {"name": "Phi-3 Medium", "description": "Microsoft's Phi-3 Medium 14B model.", "size": "7.9GB", "type": "model", "link": "phi3:medium"},
732
+ {"name": "Mistral 7B", "description": "Mistral AI's 7B model v0.3.", "size": "4.1GB", "type": "model", "link": "mistral"},
733
+ {"name": "Mixtral 8x7B", "description": "Mistral AI's Mixture of Experts model.", "size": "26GB", "type": "model", "link": "mixtral"},
734
+ {"name": "Gemma 2 9B", "description": "Google's Gemma 2 9B model.", "size": "5.4GB", "type": "model", "link": "gemma2"},
735
+ {"name": "Gemma 2 27B", "description": "Google's Gemma 2 27B model.", "size": "16GB", "type": "model", "link": "gemma2:27b"},
736
+ {"name": "Qwen 2.5 7B", "description": "Alibaba Cloud's Qwen2.5 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5"},
737
+ {"name": "Qwen 2.5 Coder 7B", "description": "Alibaba Cloud's Qwen2.5 Coder 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5-coder"},
738
+ {"name": "CodeLlama 7B", "description": "Meta's CodeLlama 7B model.", "size": "3.8GB", "type": "model", "link": "codellama"},
739
+ {"name": "LLaVA 7B", "description": "Visual instruction tuning model (Vision).", "size": "4.5GB", "type": "model", "link": "llava"},
740
+ {"name": "Nomic Embed Text", "description": "A high-performing open embedding model.", "size": "274MB", "type": "embedding", "link": "nomic-embed-text"},
741
+ {"name": "DeepSeek Coder V2", "description": "DeepSeek Coder V2 model.", "size": "8.9GB", "type": "model", "link": "deepseek-coder-v2"},
742
+ {"name": "OpenHermes 2.5 Mistral", "description": "High quality finetune of Mistral 7B.", "size": "4.1GB", "type": "model", "link": "openhermes"},
743
+ {"name": "Dolphin Phi", "description": "Uncensored Dolphin fine-tune of Phi-2.", "size": "1.6GB", "type": "model", "link": "dolphin-phi"},
744
+ {"name": "TinyLlama", "description": "A compact 1.1B model.", "size": "637MB", "type": "model", "link": "tinyllama"},
745
+ ]
746
+
747
+ def download_from_zoo(self, index: int, progress_callback: Callable[[dict], None] = None) -> dict:
748
+ """
749
+ Downloads a model from the zoo using its index.
750
+ """
751
+ zoo = self.get_zoo()
752
+ if index < 0 or index >= len(zoo):
753
+ msg = "Index out of bounds"
754
+ ASCIIColors.error(msg)
755
+ return {"status": False, "message": msg}
756
+ item = zoo[index]
757
+ return self.pull_model(item["link"], progress_callback=progress_callback)
758
+
759
+ def install_ollama(self, callback: Callable[[dict], None] = None, **kwargs) -> dict:
760
+ """
761
+ Installs Ollama based on the operating system.
762
+ """
763
+ system = platform.system()
764
+
765
+ def report_progress(status, message, completed=0, total=100):
766
+ if callback:
767
+ callback({"status": status, "message": message, "completed": completed, "total": total})
768
+ else:
769
+ print(f"{status}: {message}")
770
+
771
+ try:
772
+ if system == "Linux":
773
+ report_progress("working", "Detected Linux. Running installation script...", 10, 100)
774
+ # Use the official install script
775
+ cmd = "curl -fsSL https://ollama.com/install.sh | sh"
776
+ process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
777
+ stdout, stderr = process.communicate()
778
+
779
+ if process.returncode == 0:
780
+ report_progress("success", "Ollama installed successfully on Linux.", 100, 100)
781
+ return {"status": True, "message": "Ollama installed successfully."}
782
+ else:
783
+ msg = f"Installation failed: {stderr}"
784
+ report_progress("error", msg, 0, 0)
785
+ return {"status": False, "error": msg}
786
+
787
+ elif system == "Windows":
788
+ report_progress("working", "Detected Windows. Downloading OllamaSetup.exe...", 10, 100)
789
+ url = "https://ollama.com/download/OllamaSetup.exe"
790
+ filename = "OllamaSetup.exe"
791
+
792
+ # Download with progress
793
+ try:
794
+ def dl_callback(count, block_size, total_size):
795
+ percent = int(count * block_size * 100 / total_size)
796
+ report_progress("working", f"Downloading... {percent}%", percent, 100)
797
+
798
+ urllib.request.urlretrieve(url, filename, dl_callback)
799
+ except Exception as e:
800
+ return {"status": False, "error": f"Failed to download installer: {e}"}
801
+
802
+ report_progress("working", "Running installer...", 90, 100)
803
+ try:
804
+ subprocess.run([filename], check=True) # Runs the installer GUI
805
+ # We can't easily wait for the GUI installer to finish unless we block or it has silent flags.
806
+ # Ollama installer is usually simple.
807
+ report_progress("success", "Installer launched. Please complete the installation.", 100, 100)
808
+ return {"status": True, "message": "Installer launched."}
809
+ except Exception as e:
810
+ return {"status": False, "error": f"Failed to launch installer: {e}"}
811
+
812
+ elif system == "Darwin": # macOS
813
+ report_progress("working", "Detected macOS. Downloading Ollama...", 10, 100)
814
+ url = "https://ollama.com/download/Ollama-darwin.zip"
815
+ filename = "Ollama-darwin.zip"
816
+
817
+ # Download with progress
818
+ try:
819
+ def dl_callback(count, block_size, total_size):
820
+ percent = int(count * block_size * 100 / total_size)
821
+ report_progress("working", f"Downloading... {percent}%", percent, 100)
822
+
823
+ urllib.request.urlretrieve(url, filename, dl_callback)
824
+ except Exception as e:
825
+ return {"status": False, "error": f"Failed to download: {e}"}
826
+
827
+ report_progress("working", "Unzipping...", 80, 100)
828
+ with zipfile.ZipFile(filename, 'r') as zip_ref:
829
+ zip_ref.extractall("Ollama_Install")
830
+
831
+ report_progress("success", "Ollama downloaded and extracted to 'Ollama_Install'. Please move 'Ollama.app' to Applications.", 100, 100)
832
+ return {"status": True, "message": "Downloaded and extracted. Please install Ollama.app manually."}
833
+
834
+ else:
835
+ return {"status": False, "error": f"Unsupported OS: {system}"}
836
+
837
+ except Exception as e:
838
+ trace_exception(e)
839
+ return {"status": False, "error": str(e)}
840
+
841
+ def list_models(self) -> List[Dict[str, str]]:
599
842
  """
600
843
  Lists available models from the Ollama service using the ollama-python library.
601
844
  The returned list of dictionaries matches the format of the original template.
@@ -621,10 +864,10 @@ class OllamaBinding(LollmsLLMBinding):
621
864
  })
622
865
  return model_info_list
623
866
  except ollama.ResponseError as e:
624
- ASCIIColors.error(f"Ollama API listModels ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
867
+ ASCIIColors.error(f"Ollama API list_models ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
625
868
  return []
626
869
  except ollama.RequestError as e: # Covers connection errors, timeouts during request
627
- ASCIIColors.error(f"Ollama API listModels RequestError: {str(e)} from {self.host_address}")
870
+ ASCIIColors.error(f"Ollama API list_models RequestError: {str(e)} from {self.host_address}")
628
871
  return []
629
872
  except Exception as ex:
630
873
  trace_exception(ex)
@@ -658,6 +901,9 @@ class OllamaBinding(LollmsLLMBinding):
658
901
  """
659
902
  if model_name is None:
660
903
  model_name = self.model_name
904
+ if not model_name:
905
+ ASCIIColors.warning("Model name not specified and no default model set.")
906
+ return None
661
907
 
662
908
  try:
663
909
  info = ollama.show(model_name)
@@ -692,6 +938,12 @@ class OllamaBinding(LollmsLLMBinding):
692
938
  'llama3.1': 131072, # Llama 3.1 extended context
693
939
  'llama3.2': 131072, # Llama 3.2 extended context
694
940
  'llama3.3': 131072, # Assuming similar to 3.1/3.2
941
+ 'gpt-oss:20b': 16000, # GPT-OSS extended
942
+ 'gpt-oss:120b': 128000, # GPT-OSS extended
943
+ 'codestral': 256000, # Codestral
944
+ 'mistralai-medium': 128000, # Mistral medium
945
+ 'mistralai-mini': 128000, # Mistral medium
946
+ 'ministral': 256000, # Mistral medium
695
947
  'mistral': 32768, # Mistral 7B v0.2+ default
696
948
  'mixtral': 32768, # Mixtral 8x7B default
697
949
  'mixtral8x22b': 65536, # Mixtral 8x22B default
@@ -704,6 +956,9 @@ class OllamaBinding(LollmsLLMBinding):
704
956
  'qwen': 8192, # Qwen default
705
957
  'qwen2': 32768, # Qwen2 default for 7B
706
958
  'qwen2.5': 131072, # Qwen2.5 with 128K
959
+ 'qwen3': 128000, # Qwen3 with 128k
960
+ 'qwen3-vl': 128000, # Qwen3-vl with 128k
961
+ 'qwen3-coder': 256000, # Qwen3 with 256k
707
962
  'codellama': 16384, # CodeLlama extended
708
963
  'codegemma': 8192, # CodeGemma default
709
964
  'deepseek-coder': 16384, # DeepSeek-Coder V1 default
@@ -724,6 +979,7 @@ class OllamaBinding(LollmsLLMBinding):
724
979
  'orca2': 4096, # Orca 2 default
725
980
  'dolphin': 32768, # Dolphin (often Mistral-based)
726
981
  'openhermes': 8192, # OpenHermes default
982
+ 'gemini-3': 1000000, # Gemini 3 is a beast with 1M tokens
727
983
  }
728
984
 
729
985
  # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
@@ -747,18 +1003,6 @@ class OllamaBinding(LollmsLLMBinding):
747
1003
  Returns:
748
1004
  list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
749
1005
  Returns an empty list if the client is not initialized or if an error occurs.
750
-
751
- Example of a returned model dictionary:
752
- {
753
- "model_name": "gemma3:12b",
754
- "size": 13861175232,
755
- "vram_size": 10961479680,
756
- "parameters_size": "12.2B",
757
- "quantization_level": "Q4_K_M",
758
- "context_size": 32000,
759
- "parent_model": "",
760
- "expires_at": "2025-08-20T22:28:18.6708784+02:00"
761
- }
762
1006
  """
763
1007
  if not self.ollama_client:
764
1008
  ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
@@ -773,10 +1017,22 @@ class OllamaBinding(LollmsLLMBinding):
773
1017
  for model_data in models_list:
774
1018
  details = model_data.get('details', {})
775
1019
 
1020
+ size = model_data.get("size", 0)
1021
+ size_vram = model_data.get("size_vram", 0)
1022
+
1023
+ # Calculate spread
1024
+ gpu_usage = 0
1025
+ cpu_usage = 0
1026
+ if size > 0:
1027
+ gpu_usage = min(100, (size_vram / size) * 100)
1028
+ cpu_usage = max(0, 100 - gpu_usage)
1029
+
776
1030
  flat_model_info = {
777
1031
  "model_name": model_data.get("name"),
778
- "size": model_data.get("size"),
779
- "vram_size": model_data.get("size_vram"),
1032
+ "size": size,
1033
+ "vram_size": size_vram,
1034
+ "gpu_usage_percent": round(gpu_usage, 2),
1035
+ "cpu_usage_percent": round(cpu_usage, 2),
780
1036
  "expires_at": model_data.get("expires_at"),
781
1037
  "parameters_size": details.get("parameter_size"),
782
1038
  "quantization_level": details.get("quantization_level"),
@@ -813,7 +1069,7 @@ if __name__ == '__main__':
813
1069
 
814
1070
  # --- List Models ---
815
1071
  ASCIIColors.cyan("\n--- Listing Models ---")
816
- models = binding.listModels()
1072
+ models = binding.list_models()
817
1073
  if models:
818
1074
  ASCIIColors.green(f"Found {len(models)} models. First 5:")
819
1075
  for m in models[:5]:
@@ -844,7 +1100,7 @@ if __name__ == '__main__':
844
1100
  ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
845
1101
  prompt_text = "Why is the sky blue?"
846
1102
  ASCIIColors.info(f"Prompt: {prompt_text}")
847
- generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
1103
+ generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False, think=False)
848
1104
  if isinstance(generated_text, str):
849
1105
  ASCIIColors.green(f"Generated text: {generated_text}")
850
1106
  else: