lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
  3. lollms_client/llm_bindings/claude/__init__.py +125 -34
  4. lollms_client/llm_bindings/gemini/__init__.py +261 -159
  5. lollms_client/llm_bindings/grok/__init__.py +52 -14
  6. lollms_client/llm_bindings/groq/__init__.py +2 -2
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
  8. lollms_client/llm_bindings/litellm/__init__.py +1 -1
  9. lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
  10. lollms_client/llm_bindings/lollms/__init__.py +151 -32
  11. lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
  12. lollms_client/llm_bindings/mistral/__init__.py +2 -2
  13. lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
  14. lollms_client/llm_bindings/ollama/__init__.py +309 -93
  15. lollms_client/llm_bindings/open_router/__init__.py +2 -2
  16. lollms_client/llm_bindings/openai/__init__.py +148 -29
  17. lollms_client/llm_bindings/openllm/__init__.py +362 -506
  18. lollms_client/llm_bindings/openwebui/__init__.py +465 -0
  19. lollms_client/llm_bindings/perplexity/__init__.py +326 -0
  20. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
  21. lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
  22. lollms_client/llm_bindings/transformers/__init__.py +428 -632
  23. lollms_client/llm_bindings/vllm/__init__.py +1 -1
  24. lollms_client/lollms_agentic.py +4 -2
  25. lollms_client/lollms_base_binding.py +61 -0
  26. lollms_client/lollms_core.py +516 -1890
  27. lollms_client/lollms_discussion.py +55 -18
  28. lollms_client/lollms_llm_binding.py +112 -261
  29. lollms_client/lollms_mcp_binding.py +34 -75
  30. lollms_client/lollms_personality.py +5 -2
  31. lollms_client/lollms_stt_binding.py +85 -52
  32. lollms_client/lollms_tti_binding.py +23 -37
  33. lollms_client/lollms_ttm_binding.py +24 -42
  34. lollms_client/lollms_tts_binding.py +28 -17
  35. lollms_client/lollms_ttv_binding.py +24 -42
  36. lollms_client/lollms_types.py +4 -2
  37. lollms_client/stt_bindings/whisper/__init__.py +108 -23
  38. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  39. lollms_client/tti_bindings/diffusers/__init__.py +418 -810
  40. lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
  41. lollms_client/tti_bindings/gemini/__init__.py +182 -239
  42. lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
  43. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  44. lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
  45. lollms_client/tti_bindings/openai/__init__.py +10 -11
  46. lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
  47. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  48. lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
  49. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  50. lollms_client/ttm_bindings/replicate/__init__.py +115 -0
  51. lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
  52. lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
  53. lollms_client/tts_bindings/bark/__init__.py +7 -10
  54. lollms_client/tts_bindings/lollms/__init__.py +6 -1
  55. lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
  56. lollms_client/tts_bindings/xtts/__init__.py +157 -74
  57. lollms_client/tts_bindings/xtts/server/main.py +241 -280
  58. {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
  59. lollms_client-1.7.10.dist-info/RECORD +89 -0
  60. lollms_client/ttm_bindings/bark/__init__.py +0 -339
  61. lollms_client-1.4.1.dist-info/RECORD +0 -78
  62. {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
  63. {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
  64. {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,11 @@ import pipmaster as pm
14
14
  from lollms_client.lollms_utilities import ImageTokenizer
15
15
  pm.ensure_packages(["ollama","pillow","tiktoken"])
16
16
  import re
17
+ import platform
18
+ import subprocess
19
+ import urllib.request
20
+ import zipfile
21
+ import os
17
22
 
18
23
  import ollama
19
24
  import tiktoken
@@ -57,7 +62,9 @@ def count_tokens_ollama(
57
62
  res = ollama_client.chat(
58
63
  model=model_name,
59
64
  messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
60
- stream=False,options={"num_predict":1}
65
+ stream=False,
66
+ think=False,
67
+ options={"num_predict":1}
61
68
  )
62
69
 
63
70
  return res.prompt_eval_count-5
@@ -108,24 +115,28 @@ class OllamaBinding(LollmsLLMBinding):
108
115
  raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
109
116
 
110
117
  def generate_text(self,
111
- prompt: str,
112
- images: Optional[List[str]] = None,
113
- system_prompt: str = "",
114
- n_predict: Optional[int] = None,
115
- stream: Optional[bool] = None,
116
- temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
117
- top_k: int = 40, # Ollama default is 40
118
- top_p: float = 0.9, # Ollama default is 0.9
119
- repeat_penalty: float = 1.1, # Ollama default is 1.1
120
- repeat_last_n: int = 64, # Ollama default is 64
121
- seed: Optional[int] = None,
122
- n_threads: Optional[int] = None,
123
- ctx_size: int | None = None,
124
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
125
- split:Optional[bool]=False, # put to true if the prompt is a discussion
126
- user_keyword:Optional[str]="!@>user:",
127
- ai_keyword:Optional[str]="!@>assistant:",
128
- ) -> Union[str, dict]:
118
+ prompt: str,
119
+ images: Optional[List[str]] = None,
120
+ system_prompt: str = "",
121
+ n_predict: Optional[int] = None,
122
+ stream: Optional[bool] = None,
123
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
124
+ top_k: int = 40, # Ollama default is 40
125
+ top_p: float = 0.9, # Ollama default is 0.9
126
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
127
+ repeat_last_n: int = 64, # Ollama default is 64
128
+ seed: Optional[int] = None,
129
+ n_threads: Optional[int] = None,
130
+ ctx_size: int | None = None,
131
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
132
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
133
+ user_keyword:Optional[str]="!@>user:",
134
+ ai_keyword:Optional[str]="!@>assistant:",
135
+ think: Optional[bool] = False,
136
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
137
+ reasoning_summary: Optional[bool] = "auto", # auto
138
+ **kwargs
139
+ ) -> Union[str, dict]:
129
140
  """
130
141
  Generate text using the active LLM binding, using instance defaults if parameters are not provided.
131
142
 
@@ -168,6 +179,8 @@ class OllamaBinding(LollmsLLMBinding):
168
179
  if ctx_size is not None: options['num_ctx'] = ctx_size
169
180
 
170
181
  full_response_text = ""
182
+ think = think if "gpt-oss" not in self.model_name else reasoning_effort
183
+ ASCIIColors.magenta(f"Generation with think: {think}")
171
184
 
172
185
  try:
173
186
  if images: # Multimodal
@@ -176,6 +189,8 @@ class OllamaBinding(LollmsLLMBinding):
176
189
  for img_path in images:
177
190
  # Assuming img_path is a file path. ollama-python will read and encode it.
178
191
  # If images were base64 strings, they would need decoding to bytes first.
192
+ if img_path.startswith("data:image/png;base64,"):
193
+ img_path = img_path[len("data:image/png;base64,"):]
179
194
  processed_images.append(img_path)
180
195
 
181
196
  messages = [
@@ -192,24 +207,37 @@ class OllamaBinding(LollmsLLMBinding):
192
207
  model=self.model_name,
193
208
  messages=messages,
194
209
  stream=True,
210
+ think=think,
195
211
  options=options if options else None
196
212
  )
197
- for chunk_dict in response_stream:
198
- chunk_content = chunk_dict.get('message', {}).get('content', '')
199
- if chunk_content: # Ensure there is content to process
213
+ in_thinking = False
214
+ for chunk in response_stream:
215
+ if chunk.message.thinking and not in_thinking:
216
+ full_response_text += "<think>\n"
217
+ in_thinking = True
218
+
219
+ if chunk.message.content:# Ensure there is content to process
220
+ chunk_content = chunk.message.content
221
+ if in_thinking:
222
+ full_response_text += "\n</think>\n"
223
+ in_thinking = False
200
224
  full_response_text += chunk_content
201
225
  if streaming_callback:
202
226
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
203
227
  break # Callback requested stop
204
228
  return full_response_text
205
229
  else: # Not streaming
206
- response_dict = self.ollama_client.chat(
230
+ response = self.ollama_client.chat(
207
231
  model=self.model_name,
208
232
  messages=messages,
209
233
  stream=False,
234
+ think=think,
210
235
  options=options if options else None
211
236
  )
212
- return response_dict.get('message', {}).get('content', '')
237
+ full_response_text = response.message.content
238
+ if think:
239
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
240
+ return full_response_text
213
241
  else: # Text-only
214
242
  messages = [
215
243
  {'role': 'system', 'content':system_prompt},
@@ -224,24 +252,38 @@ class OllamaBinding(LollmsLLMBinding):
224
252
  model=self.model_name,
225
253
  messages=messages,
226
254
  stream=True,
255
+ think=think,
227
256
  options=options if options else None
228
257
  )
229
- for chunk_dict in response_stream:
230
- chunk_content = chunk_dict.message.content
231
- if chunk_content:
258
+ in_thinking = False
259
+ for chunk in response_stream:
260
+ if chunk.message.thinking and not in_thinking:
261
+ full_response_text += "<think>\n"
262
+ in_thinking = True
263
+
264
+ if chunk.message.content:# Ensure there is content to process
265
+ chunk_content = chunk.message.content
266
+ if in_thinking:
267
+ full_response_text += "\n</think>\n"
268
+ in_thinking = False
232
269
  full_response_text += chunk_content
233
270
  if streaming_callback:
234
271
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
235
- break
272
+ break # Callback requested stop
236
273
  return full_response_text
237
274
  else: # Not streaming
238
- response_dict = self.ollama_client.chat(
275
+ response = self.ollama_client.chat(
239
276
  model=self.model_name,
240
277
  messages=messages,
241
278
  stream=False,
279
+ think=think,
242
280
  options=options if options else None
243
281
  )
244
- return response_dict.message.content
282
+ full_response_text = response.message.content
283
+ if think:
284
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
285
+ return full_response_text
286
+
245
287
  except ollama.ResponseError as e:
246
288
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
247
289
  ASCIIColors.error(error_message)
@@ -268,6 +310,9 @@ class OllamaBinding(LollmsLLMBinding):
268
310
  n_threads: Optional[int] = None,
269
311
  ctx_size: int | None = None,
270
312
  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
313
+ think: Optional[bool] = False,
314
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
315
+ reasoning_summary: Optional[bool] = "auto", # auto
271
316
  **kwargs
272
317
  ) -> Union[str, dict]:
273
318
  if not self.ollama_client:
@@ -296,15 +341,24 @@ class OllamaBinding(LollmsLLMBinding):
296
341
  for item in content:
297
342
  if item.get("type") == "text":
298
343
  text_parts.append(item.get("text", ""))
299
- elif item.get("type") == "image_url":
300
- base64_data = item.get("image_url", {}).get("base64")
301
- url = item.get("image_url", {}).get("url")
344
+ elif item.get("type") == "input_image" or item.get("type") == "image_url":
345
+ base64_data = item.get("image_url")
302
346
  if base64_data:
303
- # ⚠️ remove prefix "data:image/...;base64,"
304
- cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
305
- images.append(cleaned)
306
- elif url:
307
- images.append(url)
347
+ if isinstance(base64_data, str):
348
+ # ⚠️ remove prefix "data:image/...;base64,"
349
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
350
+ images.append(cleaned)
351
+ elif base64_data and isinstance(base64_data, dict) :
352
+ if "base64" in base64_data:
353
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["base64"])
354
+ images.append(cleaned)
355
+ elif "url" in base64_data :
356
+ if "http" in base64_data["url"]:
357
+ images.append(base64_data["url"])
358
+ else:
359
+ cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["url"])
360
+ images.append(cleaned)
361
+
308
362
 
309
363
  return {
310
364
  "role": role,
@@ -335,6 +389,7 @@ class OllamaBinding(LollmsLLMBinding):
335
389
  model=self.model_name,
336
390
  messages=ollama_messages,
337
391
  stream=True,
392
+ think = think,
338
393
  options=options if options else None
339
394
  )
340
395
  for chunk_dict in response_stream:
@@ -346,13 +401,17 @@ class OllamaBinding(LollmsLLMBinding):
346
401
  break
347
402
  return full_response_text
348
403
  else:
349
- response_dict = self.ollama_client.chat(
404
+ response = self.ollama_client.chat(
350
405
  model=self.model_name,
351
406
  messages=ollama_messages,
352
407
  stream=False,
408
+ think=think if "gpt-oss" not in self.model_name else reasoning_effort,
353
409
  options=options if options else None
354
410
  )
355
- return response_dict.get('message', {}).get('content', '')
411
+ full_response_text = response.message.content
412
+ if think:
413
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
414
+ return full_response_text
356
415
 
357
416
  except ollama.ResponseError as e:
358
417
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -366,37 +425,28 @@ class OllamaBinding(LollmsLLMBinding):
366
425
  error_message = f"An unexpected error occurred: {str(ex)}"
367
426
  trace_exception(ex)
368
427
  return {"status": False, "error": error_message}
369
-
370
-
371
- except ollama.ResponseError as e:
372
- error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
373
- ASCIIColors.error(error_message)
374
- return {"status": False, "error": error_message, "status_code": e.status_code}
375
- except ollama.RequestError as e: # Covers connection errors, timeouts during request
376
- error_message = f"Ollama API RequestError: {str(e)}"
377
- ASCIIColors.error(error_message)
378
- return {"status": False, "error": error_message}
379
- except Exception as ex:
380
- error_message = f"An unexpected error occurred: {str(ex)}"
381
- trace_exception(ex)
382
- return {"status": False, "error": error_message}
383
428
 
384
429
 
385
430
  def chat(self,
386
- discussion: LollmsDiscussion,
387
- branch_tip_id: Optional[str] = None,
388
- n_predict: Optional[int] = None,
389
- stream: Optional[bool] = None,
390
- temperature: float = 0.7,
391
- top_k: int = 40,
392
- top_p: float = 0.9,
393
- repeat_penalty: float = 1.1,
394
- repeat_last_n: int = 64,
395
- seed: Optional[int] = None,
396
- n_threads: Optional[int] = None,
397
- ctx_size: Optional[int] = None,
398
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
399
- ) -> Union[str, dict]:
431
+ discussion: LollmsDiscussion,
432
+ branch_tip_id: Optional[str] = None,
433
+ n_predict: Optional[int] = None,
434
+ stream: Optional[bool] = None,
435
+ temperature: float = 0.7,
436
+ top_k: int = 40,
437
+ top_p: float = 0.9,
438
+ repeat_penalty: float = 1.1,
439
+ repeat_last_n: int = 64,
440
+ seed: Optional[int] = None,
441
+ n_threads: Optional[int] = None,
442
+ ctx_size: Optional[int] = None,
443
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
444
+ think: Optional[bool] = False,
445
+ reasoning_effort: Optional[bool] = "low", # low, medium, high
446
+ reasoning_summary: Optional[bool] = "auto", # auto
447
+ **kwargs
448
+
449
+ ) -> Union[str, dict]:
400
450
  """
401
451
  Conduct a chat session with the Ollama model using a LollmsDiscussion object.
402
452
 
@@ -441,6 +491,8 @@ class OllamaBinding(LollmsLLMBinding):
441
491
  options = {k: v for k, v in options.items() if v is not None}
442
492
 
443
493
  full_response_text = ""
494
+ think = think if "gpt-oss" not in self.model_name else reasoning_effort
495
+ ASCIIColors.magenta(f"Generation with think: {think}")
444
496
 
445
497
  try:
446
498
  # 3. Call the Ollama API
@@ -449,24 +501,38 @@ class OllamaBinding(LollmsLLMBinding):
449
501
  model=self.model_name,
450
502
  messages=messages,
451
503
  stream=True,
504
+ think=think,
452
505
  options=options if options else None
453
506
  )
507
+ in_thinking = False
454
508
  for chunk in response_stream:
455
- chunk_content = chunk.get('message', {}).get('content', '')
456
- if chunk_content:
509
+ if chunk.message.thinking and not in_thinking:
510
+ full_response_text += "<think>\n"
511
+ in_thinking = True
512
+
513
+ if chunk.message.content:# Ensure there is content to process
514
+ chunk_content = chunk.message.content
515
+ if in_thinking:
516
+ full_response_text += "\n</think>\n"
517
+ in_thinking = False
457
518
  full_response_text += chunk_content
458
519
  if streaming_callback:
459
520
  if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
460
- break
521
+ break # Callback requested stop
522
+
461
523
  return full_response_text
462
524
  else: # Not streaming
463
- response_dict = self.ollama_client.chat(
525
+ response = self.ollama_client.chat(
464
526
  model=self.model_name,
465
527
  messages=messages,
466
528
  stream=False,
529
+ think=think,
467
530
  options=options if options else None
468
531
  )
469
- return response_dict.get('message', {}).get('content', '')
532
+ full_response_text = response.message.content
533
+ if think:
534
+ full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
535
+ return full_response_text
470
536
 
471
537
  except ollama.ResponseError as e:
472
538
  error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -597,7 +663,144 @@ class OllamaBinding(LollmsLLMBinding):
597
663
  "supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
598
664
  }
599
665
 
600
- def listModels(self) -> List[Dict[str, str]]:
666
+ def pull_model(self, model_name: str, progress_callback: Callable[[dict], None] = None, **kwargs) -> dict:
667
+ """
668
+ Pulls a model from the Ollama library.
669
+
670
+ Args:
671
+ model_name (str): The name of the model to pull.
672
+ progress_callback (Callable[[dict], None], optional): A callback function that receives progress updates.
673
+ The dict typically contains 'status', 'completed', 'total'.
674
+
675
+ Returns:
676
+ dict: Dictionary with status (bool) and message (str).
677
+ """
678
+ if not self.ollama_client:
679
+ msg = "Ollama client not initialized. Cannot pull model."
680
+ ASCIIColors.error(msg)
681
+ return {"status": False, "message": msg}
682
+
683
+ try:
684
+ ASCIIColors.info(f"Pulling model {model_name}...")
685
+ # Stream the pull progress
686
+ for progress in self.ollama_client.pull(model_name, stream=True):
687
+ # Send raw progress to callback if provided
688
+ if progress_callback:
689
+ progress_callback(progress)
690
+
691
+ # Default console logging
692
+ status = progress.get('status', '')
693
+ completed = progress.get('completed')
694
+ total = progress.get('total')
695
+
696
+ if completed and total:
697
+ percent = (completed / total) * 100
698
+ print(f"\r{status}: {percent:.2f}%", end="", flush=True)
699
+ else:
700
+ print(f"\r{status}", end="", flush=True)
701
+
702
+ print() # Clear line
703
+ msg = f"Model {model_name} pulled successfully."
704
+ ASCIIColors.success(msg)
705
+ return {"status": True, "message": msg}
706
+
707
+ except ollama.ResponseError as e:
708
+ msg = f"Ollama API Pull Error: {e.error or 'Unknown error'} (status code: {e.status_code})"
709
+ ASCIIColors.error(msg)
710
+ return {"status": False, "message": msg}
711
+ except ollama.RequestError as e:
712
+ msg = f"Ollama API Request Error: {str(e)}"
713
+ ASCIIColors.error(msg)
714
+ return {"status": False, "message": msg}
715
+ except Exception as ex:
716
+ msg = f"An unexpected error occurred while pulling model: {str(ex)}"
717
+ ASCIIColors.error(msg)
718
+ trace_exception(ex)
719
+ return {"status": False, "message": msg}
720
+
721
+ def install_ollama(self, callback: Callable[[dict], None] = None, **kwargs) -> dict:
722
+ """
723
+ Installs Ollama based on the operating system.
724
+ """
725
+ system = platform.system()
726
+
727
+ def report_progress(status, message, completed=0, total=100):
728
+ if callback:
729
+ callback({"status": status, "message": message, "completed": completed, "total": total})
730
+ else:
731
+ print(f"{status}: {message}")
732
+
733
+ try:
734
+ if system == "Linux":
735
+ report_progress("working", "Detected Linux. Running installation script...", 10, 100)
736
+ # Use the official install script
737
+ cmd = "curl -fsSL https://ollama.com/install.sh | sh"
738
+ process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
739
+ stdout, stderr = process.communicate()
740
+
741
+ if process.returncode == 0:
742
+ report_progress("success", "Ollama installed successfully on Linux.", 100, 100)
743
+ return {"status": True, "message": "Ollama installed successfully."}
744
+ else:
745
+ msg = f"Installation failed: {stderr}"
746
+ report_progress("error", msg, 0, 0)
747
+ return {"status": False, "error": msg}
748
+
749
+ elif system == "Windows":
750
+ report_progress("working", "Detected Windows. Downloading OllamaSetup.exe...", 10, 100)
751
+ url = "https://ollama.com/download/OllamaSetup.exe"
752
+ filename = "OllamaSetup.exe"
753
+
754
+ # Download with progress
755
+ try:
756
+ def dl_callback(count, block_size, total_size):
757
+ percent = int(count * block_size * 100 / total_size)
758
+ report_progress("working", f"Downloading... {percent}%", percent, 100)
759
+
760
+ urllib.request.urlretrieve(url, filename, dl_callback)
761
+ except Exception as e:
762
+ return {"status": False, "error": f"Failed to download installer: {e}"}
763
+
764
+ report_progress("working", "Running installer...", 90, 100)
765
+ try:
766
+ subprocess.run([filename], check=True) # Runs the installer GUI
767
+ # We can't easily wait for the GUI installer to finish unless we block or it has silent flags.
768
+ # Ollama installer is usually simple.
769
+ report_progress("success", "Installer launched. Please complete the installation.", 100, 100)
770
+ return {"status": True, "message": "Installer launched."}
771
+ except Exception as e:
772
+ return {"status": False, "error": f"Failed to launch installer: {e}"}
773
+
774
+ elif system == "Darwin": # macOS
775
+ report_progress("working", "Detected macOS. Downloading Ollama...", 10, 100)
776
+ url = "https://ollama.com/download/Ollama-darwin.zip"
777
+ filename = "Ollama-darwin.zip"
778
+
779
+ # Download with progress
780
+ try:
781
+ def dl_callback(count, block_size, total_size):
782
+ percent = int(count * block_size * 100 / total_size)
783
+ report_progress("working", f"Downloading... {percent}%", percent, 100)
784
+
785
+ urllib.request.urlretrieve(url, filename, dl_callback)
786
+ except Exception as e:
787
+ return {"status": False, "error": f"Failed to download: {e}"}
788
+
789
+ report_progress("working", "Unzipping...", 80, 100)
790
+ with zipfile.ZipFile(filename, 'r') as zip_ref:
791
+ zip_ref.extractall("Ollama_Install")
792
+
793
+ report_progress("success", "Ollama downloaded and extracted to 'Ollama_Install'. Please move 'Ollama.app' to Applications.", 100, 100)
794
+ return {"status": True, "message": "Downloaded and extracted. Please install Ollama.app manually."}
795
+
796
+ else:
797
+ return {"status": False, "error": f"Unsupported OS: {system}"}
798
+
799
+ except Exception as e:
800
+ trace_exception(e)
801
+ return {"status": False, "error": str(e)}
802
+
803
+ def list_models(self) -> List[Dict[str, str]]:
601
804
  """
602
805
  Lists available models from the Ollama service using the ollama-python library.
603
806
  The returned list of dictionaries matches the format of the original template.
@@ -623,10 +826,10 @@ class OllamaBinding(LollmsLLMBinding):
623
826
  })
624
827
  return model_info_list
625
828
  except ollama.ResponseError as e:
626
- ASCIIColors.error(f"Ollama API listModels ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
829
+ ASCIIColors.error(f"Ollama API list_models ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
627
830
  return []
628
831
  except ollama.RequestError as e: # Covers connection errors, timeouts during request
629
- ASCIIColors.error(f"Ollama API listModels RequestError: {str(e)} from {self.host_address}")
832
+ ASCIIColors.error(f"Ollama API list_models RequestError: {str(e)} from {self.host_address}")
630
833
  return []
631
834
  except Exception as ex:
632
835
  trace_exception(ex)
@@ -660,6 +863,9 @@ class OllamaBinding(LollmsLLMBinding):
660
863
  """
661
864
  if model_name is None:
662
865
  model_name = self.model_name
866
+ if not model_name:
867
+ ASCIIColors.warning("Model name not specified and no default model set.")
868
+ return None
663
869
 
664
870
  try:
665
871
  info = ollama.show(model_name)
@@ -694,6 +900,12 @@ class OllamaBinding(LollmsLLMBinding):
694
900
  'llama3.1': 131072, # Llama 3.1 extended context
695
901
  'llama3.2': 131072, # Llama 3.2 extended context
696
902
  'llama3.3': 131072, # Assuming similar to 3.1/3.2
903
+ 'gpt-oss:20b': 16000, # GPT-OSS extended
904
+ 'gpt-oss:120b': 128000, # GPT-OSS extended
905
+ 'codestral': 256000, # Codestral
906
+ 'mistralai-medium': 128000, # Mistral medium
907
+ 'mistralai-mini': 128000, # Mistral medium
908
+ 'ministral': 256000, # Mistral medium
697
909
  'mistral': 32768, # Mistral 7B v0.2+ default
698
910
  'mixtral': 32768, # Mixtral 8x7B default
699
911
  'mixtral8x22b': 65536, # Mixtral 8x22B default
@@ -706,6 +918,9 @@ class OllamaBinding(LollmsLLMBinding):
706
918
  'qwen': 8192, # Qwen default
707
919
  'qwen2': 32768, # Qwen2 default for 7B
708
920
  'qwen2.5': 131072, # Qwen2.5 with 128K
921
+ 'qwen3': 128000, # Qwen3 with 128k
922
+ 'qwen3-vl': 128000, # Qwen3-vl with 128k
923
+ 'qwen3-coder': 256000, # Qwen3 with 256k
709
924
  'codellama': 16384, # CodeLlama extended
710
925
  'codegemma': 8192, # CodeGemma default
711
926
  'deepseek-coder': 16384, # DeepSeek-Coder V1 default
@@ -726,6 +941,7 @@ class OllamaBinding(LollmsLLMBinding):
726
941
  'orca2': 4096, # Orca 2 default
727
942
  'dolphin': 32768, # Dolphin (often Mistral-based)
728
943
  'openhermes': 8192, # OpenHermes default
944
+ 'gemini-3': 1000000, # Gemini 3 is a beast with 1M tokens
729
945
  }
730
946
 
731
947
  # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
@@ -749,18 +965,6 @@ class OllamaBinding(LollmsLLMBinding):
749
965
  Returns:
750
966
  list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
751
967
  Returns an empty list if the client is not initialized or if an error occurs.
752
-
753
- Example of a returned model dictionary:
754
- {
755
- "model_name": "gemma3:12b",
756
- "size": 13861175232,
757
- "vram_size": 10961479680,
758
- "parameters_size": "12.2B",
759
- "quantization_level": "Q4_K_M",
760
- "context_size": 32000,
761
- "parent_model": "",
762
- "expires_at": "2025-08-20T22:28:18.6708784+02:00"
763
- }
764
968
  """
765
969
  if not self.ollama_client:
766
970
  ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
@@ -775,10 +979,22 @@ class OllamaBinding(LollmsLLMBinding):
775
979
  for model_data in models_list:
776
980
  details = model_data.get('details', {})
777
981
 
982
+ size = model_data.get("size", 0)
983
+ size_vram = model_data.get("size_vram", 0)
984
+
985
+ # Calculate spread
986
+ gpu_usage = 0
987
+ cpu_usage = 0
988
+ if size > 0:
989
+ gpu_usage = min(100, (size_vram / size) * 100)
990
+ cpu_usage = max(0, 100 - gpu_usage)
991
+
778
992
  flat_model_info = {
779
993
  "model_name": model_data.get("name"),
780
- "size": model_data.get("size"),
781
- "vram_size": model_data.get("size_vram"),
994
+ "size": size,
995
+ "vram_size": size_vram,
996
+ "gpu_usage_percent": round(gpu_usage, 2),
997
+ "cpu_usage_percent": round(cpu_usage, 2),
782
998
  "expires_at": model_data.get("expires_at"),
783
999
  "parameters_size": details.get("parameter_size"),
784
1000
  "quantization_level": details.get("quantization_level"),
@@ -815,7 +1031,7 @@ if __name__ == '__main__':
815
1031
 
816
1032
  # --- List Models ---
817
1033
  ASCIIColors.cyan("\n--- Listing Models ---")
818
- models = binding.listModels()
1034
+ models = binding.list_models()
819
1035
  if models:
820
1036
  ASCIIColors.green(f"Found {len(models)} models. First 5:")
821
1037
  for m in models[:5]:
@@ -846,7 +1062,7 @@ if __name__ == '__main__':
846
1062
  ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
847
1063
  prompt_text = "Why is the sky blue?"
848
1064
  ASCIIColors.info(f"Prompt: {prompt_text}")
849
- generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
1065
+ generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False, think=False)
850
1066
  if isinstance(generated_text, str):
851
1067
  ASCIIColors.green(f"Generated text: {generated_text}")
852
1068
  else:
@@ -941,4 +1157,4 @@ if __name__ == '__main__':
941
1157
  ASCIIColors.error(f"An error occurred during testing: {e}")
942
1158
  trace_exception(e)
943
1159
 
944
- ASCIIColors.yellow("\nOllamaBinding test finished.")
1160
+ ASCIIColors.yellow("\nOllamaBinding test finished.")
@@ -227,7 +227,7 @@ class OpenRouterBinding(LollmsLLMBinding):
227
227
  "supports_vision": "Depends on the specific model selected. This generic binding does not support vision.",
228
228
  }
229
229
 
230
- def listModels(self) -> List[Dict[str, str]]:
230
+ def list_models(self) -> List[Dict[str, str]]:
231
231
  """Lists available models from the OpenRouter service."""
232
232
  if not self.client:
233
233
  ASCIIColors.error("OpenRouter client not initialized. Cannot list models.")
@@ -274,7 +274,7 @@ if __name__ == '__main__':
274
274
 
275
275
  # --- List Models ---
276
276
  ASCIIColors.cyan("\n--- Listing Models ---")
277
- models = binding.listModels()
277
+ models = binding.list_models()
278
278
  if models:
279
279
  ASCIIColors.green(f"Successfully fetched {len(models)} models from OpenRouter.")
280
280
  ASCIIColors.info("Sample of available models:")