PyPI - lollms-client - Versions diffs - 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl - Mend

lollms-client 1.5.6py3-none-any.whl → 1.7.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

lollms_client/__init__.py +1 -1
lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
lollms_client/llm_bindings/claude/__init__.py +125 -35
lollms_client/llm_bindings/gemini/__init__.py +261 -159
lollms_client/llm_bindings/grok/__init__.py +52 -15
lollms_client/llm_bindings/groq/__init__.py +2 -2
lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
lollms_client/llm_bindings/litellm/__init__.py +1 -1
lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
lollms_client/llm_bindings/lollms/__init__.py +76 -21
lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
lollms_client/llm_bindings/mistral/__init__.py +2 -2
lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
lollms_client/llm_bindings/ollama/__init__.py +345 -89
lollms_client/llm_bindings/open_router/__init__.py +2 -2
lollms_client/llm_bindings/openai/__init__.py +81 -20
lollms_client/llm_bindings/openllm/__init__.py +362 -506
lollms_client/llm_bindings/openwebui/__init__.py +333 -171
lollms_client/llm_bindings/perplexity/__init__.py +2 -2
lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
lollms_client/llm_bindings/transformers/__init__.py +428 -632
lollms_client/llm_bindings/vllm/__init__.py +1 -1
lollms_client/lollms_agentic.py +4 -2
lollms_client/lollms_base_binding.py +61 -0
lollms_client/lollms_core.py +512 -1890
lollms_client/lollms_discussion.py +65 -39
lollms_client/lollms_llm_binding.py +126 -261
lollms_client/lollms_mcp_binding.py +49 -77
lollms_client/lollms_stt_binding.py +99 -52
lollms_client/lollms_tti_binding.py +38 -38
lollms_client/lollms_ttm_binding.py +38 -42
lollms_client/lollms_tts_binding.py +43 -18
lollms_client/lollms_ttv_binding.py +38 -42
lollms_client/lollms_types.py +4 -2
lollms_client/stt_bindings/whisper/__init__.py +108 -23
lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
lollms_client/tti_bindings/diffusers/__init__.py +464 -803
lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
lollms_client/tti_bindings/gemini/__init__.py +182 -239
lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
lollms_client/tti_bindings/lollms/__init__.py +4 -1
lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
lollms_client/tti_bindings/openai/__init__.py +10 -11
lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
lollms_client/ttm_bindings/lollms/__init__.py +4 -17
lollms_client/ttm_bindings/replicate/__init__.py +7 -4
lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
lollms_client/tts_bindings/bark/__init__.py +7 -10
lollms_client/tts_bindings/lollms/__init__.py +6 -1
lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
lollms_client/tts_bindings/xtts/__init__.py +157 -74
lollms_client/tts_bindings/xtts/server/main.py +241 -280
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
lollms_client-1.7.13.dist-info/RECORD +90 -0
lollms_client-1.5.6.dist-info/RECORD +0 -87
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0

lollms_client/llm_bindings/ollama/__init__.py CHANGED Viewed

@@ -7,13 +7,18 @@ from lollms_client.lollms_types import MSG_TYPE
 # from lollms_client.lollms_utilities import encode_image
 from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
 from lollms_client.lollms_discussion import LollmsDiscussion
-from typing import Optional, Callable, List, Union, Dict
+from typing import Optional, Callable, List, Union, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 import pipmaster as pm
 from lollms_client.lollms_utilities import ImageTokenizer
 pm.ensure_packages(["ollama","pillow","tiktoken"])
 import re
+import platform
+import subprocess
+import urllib.request
+import zipfile
+import os
 import ollama
 import tiktoken
@@ -57,7 +62,9 @@ def count_tokens_ollama(
     res = ollama_client.chat(
                         model=model_name,
                         messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
-                        stream=False,options={"num_predict":1}
+                        stream=False,
+                        think=False,
+                        options={"num_predict":1}
                     )
     return res.prompt_eval_count-5
@@ -108,24 +115,28 @@ class OllamaBinding(LollmsLLMBinding):
             raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
     def generate_text(self,
-                     prompt: str,
-                     images: Optional[List[str]] = None,
-                     system_prompt: str = "",
-                     n_predict: Optional[int] = None,
-                     stream: Optional[bool] = None,
-                     temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
-                     top_k: int = 40,          # Ollama default is 40
-                     top_p: float = 0.9,       # Ollama default is 0.9
-                     repeat_penalty: float = 1.1, # Ollama default is 1.1
-                     repeat_last_n: int = 64,  # Ollama default is 64
-                     seed: Optional[int] = None,
-                     n_threads: Optional[int] = None,
-                     ctx_size: int | None = None,
-                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
-                     split:Optional[bool]=False, # put to true if the prompt is a discussion
-                     user_keyword:Optional[str]="!@>user:",
-                     ai_keyword:Optional[str]="!@>assistant:",
-                     ) -> Union[str, dict]:
+                    prompt: str,
+                    images: Optional[List[str]] = None,
+                    system_prompt: str = "",
+                    n_predict: Optional[int] = None,
+                    stream: Optional[bool] = None,
+                    temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
+                    top_k: int = 40,          # Ollama default is 40
+                    top_p: float = 0.9,       # Ollama default is 0.9
+                    repeat_penalty: float = 1.1, # Ollama default is 1.1
+                    repeat_last_n: int = 64,  # Ollama default is 64
+                    seed: Optional[int] = None,
+                    n_threads: Optional[int] = None,
+                    ctx_size: int | None = None,
+                    streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                    split:Optional[bool]=False, # put to true if the prompt is a discussion
+                    user_keyword:Optional[str]="!@>user:",
+                    ai_keyword:Optional[str]="!@>assistant:",
+                    think: Optional[bool] = False,
+                    reasoning_effort: Optional[bool] = "low", # low, medium, high
+                    reasoning_summary: Optional[bool] = "auto", # auto
+                    **kwargs
+                    ) -> Union[str, dict]:
         """
         Generate text using the active LLM binding, using instance defaults if parameters are not provided.
@@ -168,6 +179,8 @@ class OllamaBinding(LollmsLLMBinding):
         if ctx_size is not None: options['num_ctx'] = ctx_size
         full_response_text = ""
+        think = think if "gpt-oss" not in self.model_name else reasoning_effort
+        ASCIIColors.magenta(f"Generation with think: {think}")
         try:
             if images: # Multimodal
@@ -176,6 +189,8 @@ class OllamaBinding(LollmsLLMBinding):
                 for img_path in images:
                     # Assuming img_path is a file path. ollama-python will read and encode it.
                     # If images were base64 strings, they would need decoding to bytes first.
+                    if img_path.startswith("data:image/png;base64,"):
+                        img_path = img_path[len("data:image/png;base64,"):]
                     processed_images.append(img_path)
                 messages = [
@@ -192,24 +207,37 @@ class OllamaBinding(LollmsLLMBinding):
                         model=self.model_name,
                         messages=messages,
                         stream=True,
+                        think=think,
                         options=options if options else None
                     )
-                    for chunk_dict in response_stream:
-                        chunk_content = chunk_dict.get('message', {}).get('content', '')
-                        if chunk_content: # Ensure there is content to process
+                    in_thinking = False
+                    for chunk in response_stream:
+                        if chunk.message.thinking and not in_thinking:
+                            full_response_text += "<think>\n"
+                            in_thinking = True
+                        if chunk.message.content:# Ensure there is content to process
+                            chunk_content = chunk.message.content
+                            if in_thinking:
+                                full_response_text += "\n</think>\n"
+                                in_thinking = False
                             full_response_text += chunk_content
                             if streaming_callback:
                                 if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
                                     break # Callback requested stop
                     return full_response_text
                 else: # Not streaming
-                    response_dict = self.ollama_client.chat(
+                    response = self.ollama_client.chat(
                         model=self.model_name,
                         messages=messages,
                         stream=False,
+                        think=think,
                         options=options if options else None
                     )
-                    return response_dict.get('message', {}).get('content', '')
+                    full_response_text = response.message.content
+                    if think:
+                        full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
+                    return full_response_text
             else: # Text-only
                 messages = [
                             {'role': 'system', 'content':system_prompt},
@@ -224,24 +252,38 @@ class OllamaBinding(LollmsLLMBinding):
                         model=self.model_name,
                         messages=messages,
                         stream=True,
+                        think=think,
                         options=options if options else None
                     )
-                    for chunk_dict in response_stream:
-                        chunk_content = chunk_dict.message.content
-                        if chunk_content:
+                    in_thinking = False
+                    for chunk in response_stream:
+                        if chunk.message.thinking and not in_thinking:
+                            full_response_text += "<think>\n"
+                            in_thinking = True
+                        if chunk.message.content:# Ensure there is content to process
+                            chunk_content = chunk.message.content
+                            if in_thinking:
+                                full_response_text += "\n</think>\n"
+                                in_thinking = False
                             full_response_text += chunk_content
                             if streaming_callback:
                                 if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
-                                    break
+                                    break # Callback requested stop
                     return full_response_text
                 else: # Not streaming
-                    response_dict = self.ollama_client.chat(
+                    response = self.ollama_client.chat(
                         model=self.model_name,
                         messages=messages,
                         stream=False,
+                        think=think,
                         options=options if options else None
                     )
-                    return response_dict.message.content
+                    full_response_text = response.message.content
+                    if think:
+                        full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
+                    return full_response_text
         except ollama.ResponseError as e:
             error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
             ASCIIColors.error(error_message)
@@ -268,6 +310,9 @@ class OllamaBinding(LollmsLLMBinding):
                         n_threads: Optional[int] = None,
                         ctx_size: int | None = None,
                         streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                        think: Optional[bool] = False,
+                        reasoning_effort: Optional[bool] = "low", # low, medium, high
+                        reasoning_summary: Optional[bool] = "auto", # auto
                         **kwargs
                         ) -> Union[str, dict]:
         if not self.ollama_client:
@@ -296,12 +341,23 @@ class OllamaBinding(LollmsLLMBinding):
                 for item in content:
                     if item.get("type") == "text":
                         text_parts.append(item.get("text", ""))
-                    elif item.get("type") == "input_image":
+                    elif item.get("type") == "input_image" or  item.get("type") == "image_url":
                         base64_data = item.get("image_url")
                         if base64_data:
-                            # ⚠️ remove prefix "data:image/...;base64,"
-                            cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
-                            images.append(cleaned)
+                            if isinstance(base64_data, str):
+                                # ⚠️ remove prefix "data:image/...;base64,"
+                                cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
+                                images.append(cleaned)
+                            elif base64_data and isinstance(base64_data, dict) :
+                                if "base64" in base64_data:
+                                    cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["base64"])
+                                    images.append(cleaned)
+                                elif "url" in base64_data :
+                                    if "http" in base64_data["url"]:
+                                        images.append(base64_data["url"])
+                                    else:
+                                        cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["url"])
+                                        images.append(cleaned)
             return {
@@ -333,6 +389,7 @@ class OllamaBinding(LollmsLLMBinding):
                     model=self.model_name,
                     messages=ollama_messages,
                     stream=True,
+                    think = think,
                     options=options if options else None
                 )
                 for chunk_dict in response_stream:
@@ -344,13 +401,17 @@ class OllamaBinding(LollmsLLMBinding):
                                 break
                 return full_response_text
             else:
-                response_dict = self.ollama_client.chat(
+                response = self.ollama_client.chat(
                     model=self.model_name,
                     messages=ollama_messages,
                     stream=False,
+                    think=think if "gpt-oss" not in self.model_name else reasoning_effort,
                     options=options if options else None
                 )
-                return response_dict.get('message', {}).get('content', '')
+                full_response_text = response.message.content
+                if think:
+                    full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
+                return full_response_text
         except ollama.ResponseError as e:
             error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -364,37 +425,28 @@ class OllamaBinding(LollmsLLMBinding):
             error_message = f"An unexpected error occurred: {str(ex)}"
             trace_exception(ex)
             return {"status": False, "error": error_message}
-        except ollama.ResponseError as e:
-            error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
-            ASCIIColors.error(error_message)
-            return {"status": False, "error": error_message, "status_code": e.status_code}
-        except ollama.RequestError as e: # Covers connection errors, timeouts during request
-            error_message = f"Ollama API RequestError: {str(e)}"
-            ASCIIColors.error(error_message)
-            return {"status": False, "error": error_message}
-        except Exception as ex:
-            error_message = f"An unexpected error occurred: {str(ex)}"
-            trace_exception(ex)
-            return {"status": False, "error": error_message}
     def chat(self,
-             discussion: LollmsDiscussion,
-             branch_tip_id: Optional[str] = None,
-             n_predict: Optional[int] = None,
-             stream: Optional[bool] = None,
-             temperature: float = 0.7,
-             top_k: int = 40,
-             top_p: float = 0.9,
-             repeat_penalty: float = 1.1,
-             repeat_last_n: int = 64,
-             seed: Optional[int] = None,
-             n_threads: Optional[int] = None,
-             ctx_size: Optional[int] = None,
-             streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
-             ) -> Union[str, dict]:
+            discussion: LollmsDiscussion,
+            branch_tip_id: Optional[str] = None,
+            n_predict: Optional[int] = None,
+            stream: Optional[bool] = None,
+            temperature: float = 0.7,
+            top_k: int = 40,
+            top_p: float = 0.9,
+            repeat_penalty: float = 1.1,
+            repeat_last_n: int = 64,
+            seed: Optional[int] = None,
+            n_threads: Optional[int] = None,
+            ctx_size: Optional[int] = None,
+            streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+            think: Optional[bool] = False,
+            reasoning_effort: Optional[bool] = "low", # low, medium, high
+            reasoning_summary: Optional[bool] = "auto", # auto
+            **kwargs
+            ) -> Union[str, dict]:
         """
         Conduct a chat session with the Ollama model using a LollmsDiscussion object.
@@ -439,6 +491,8 @@ class OllamaBinding(LollmsLLMBinding):
         options = {k: v for k, v in options.items() if v is not None}
         full_response_text = ""
+        think = think if "gpt-oss" not in self.model_name else reasoning_effort
+        ASCIIColors.magenta(f"Generation with think: {think}")
         try:
             # 3. Call the Ollama API
@@ -447,24 +501,38 @@ class OllamaBinding(LollmsLLMBinding):
                     model=self.model_name,
                     messages=messages,
                     stream=True,
+                    think=think,
                     options=options if options else None
                 )
+                in_thinking = False
                 for chunk in response_stream:
-                    chunk_content = chunk.get('message', {}).get('content', '')
-                    if chunk_content:
+                    if chunk.message.thinking and not in_thinking:
+                        full_response_text += "<think>\n"
+                        in_thinking = True
+                    if chunk.message.content:# Ensure there is content to process
+                        chunk_content = chunk.message.content
+                        if in_thinking:
+                            full_response_text += "\n</think>\n"
+                            in_thinking = False
                         full_response_text += chunk_content
                         if streaming_callback:
                             if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
-                                break
+                                break # Callback requested stop
                 return full_response_text
             else: # Not streaming
-                response_dict = self.ollama_client.chat(
+                response = self.ollama_client.chat(
                     model=self.model_name,
                     messages=messages,
                     stream=False,
+                    think=think,
                     options=options if options else None
                 )
-                return response_dict.get('message', {}).get('content', '')
+                full_response_text = response.message.content
+                if think:
+                    full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
+                return full_response_text
         except ollama.ResponseError as e:
             error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
@@ -595,7 +663,182 @@ class OllamaBinding(LollmsLLMBinding):
             "supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
         }
-    def listModels(self) -> List[Dict[str, str]]:
+    def pull_model(self, model_name: str, progress_callback: Callable[[dict], None] = None, **kwargs) -> dict:
+        """
+        Pulls a model from the Ollama library.
+        Args:
+            model_name (str): The name of the model to pull.
+            progress_callback (Callable[[dict], None], optional): A callback function that receives progress updates.
+                                                                  The dict typically contains 'status', 'completed', 'total'.
+        Returns:
+            dict: Dictionary with status (bool) and message (str).
+        """
+        if not self.ollama_client:
+             msg = "Ollama client not initialized. Cannot pull model."
+             ASCIIColors.error(msg)
+             return {"status": False, "message": msg}
+        try:
+            ASCIIColors.info(f"Pulling model {model_name}...")
+            # Stream the pull progress
+            for progress in self.ollama_client.pull(model_name, stream=True):
+                # Send raw progress to callback if provided
+                if progress_callback:
+                    progress_callback(progress)
+                # Default console logging
+                status = progress.get('status', '')
+                completed = progress.get('completed')
+                total = progress.get('total')
+                if completed and total:
+                    percent = (completed / total) * 100
+                    print(f"\r{status}: {percent:.2f}%", end="", flush=True)
+                else:
+                     print(f"\r{status}", end="", flush=True)
+            print() # Clear line
+            msg = f"Model {model_name} pulled successfully."
+            ASCIIColors.success(msg)
+            return {"status": True, "message": msg}
+        except ollama.ResponseError as e:
+            msg = f"Ollama API Pull Error: {e.error or 'Unknown error'} (status code: {e.status_code})"
+            ASCIIColors.error(msg)
+            return {"status": False, "message": msg}
+        except ollama.RequestError as e:
+            msg = f"Ollama API Request Error: {str(e)}"
+            ASCIIColors.error(msg)
+            return {"status": False, "message": msg}
+        except Exception as ex:
+            msg = f"An unexpected error occurred while pulling model: {str(ex)}"
+            ASCIIColors.error(msg)
+            trace_exception(ex)
+            return {"status": False, "message": msg}
+    def get_zoo(self) -> List[Dict[str, Any]]:
+        """
+        Returns a list of models available for download.
+        each entry is a dict with:
+        name, description, size, type, link
+        """
+        return [
+            {"name": "Llama3 8B", "description": "Meta's Llama 3 8B model. Good for general purpose chat.", "size": "4.7GB", "type": "model", "link": "llama3"},
+            {"name": "Llama3 70B", "description": "Meta's Llama 3 70B model. High capability.", "size": "40GB", "type": "model", "link": "llama3:70b"},
+            {"name": "Phi-3 Mini", "description": "Microsoft's Phi-3 Mini 3.8B model. Lightweight and capable.", "size": "2.3GB", "type": "model", "link": "phi3"},
+            {"name": "Phi-3 Medium", "description": "Microsoft's Phi-3 Medium 14B model.", "size": "7.9GB", "type": "model", "link": "phi3:medium"},
+            {"name": "Mistral 7B", "description": "Mistral AI's 7B model v0.3.", "size": "4.1GB", "type": "model", "link": "mistral"},
+            {"name": "Mixtral 8x7B", "description": "Mistral AI's Mixture of Experts model.", "size": "26GB", "type": "model", "link": "mixtral"},
+            {"name": "Gemma 2 9B", "description": "Google's Gemma 2 9B model.", "size": "5.4GB", "type": "model", "link": "gemma2"},
+            {"name": "Gemma 2 27B", "description": "Google's Gemma 2 27B model.", "size": "16GB", "type": "model", "link": "gemma2:27b"},
+            {"name": "Qwen 2.5 7B", "description": "Alibaba Cloud's Qwen2.5 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5"},
+            {"name": "Qwen 2.5 Coder 7B", "description": "Alibaba Cloud's Qwen2.5 Coder 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5-coder"},
+            {"name": "CodeLlama 7B", "description": "Meta's CodeLlama 7B model.", "size": "3.8GB", "type": "model", "link": "codellama"},
+            {"name": "LLaVA 7B", "description": "Visual instruction tuning model (Vision).", "size": "4.5GB", "type": "model", "link": "llava"},
+            {"name": "Nomic Embed Text", "description": "A high-performing open embedding model.", "size": "274MB", "type": "embedding", "link": "nomic-embed-text"},
+            {"name": "DeepSeek Coder V2", "description": "DeepSeek Coder V2 model.", "size": "8.9GB", "type": "model", "link": "deepseek-coder-v2"},
+            {"name": "OpenHermes 2.5 Mistral", "description": "High quality finetune of Mistral 7B.", "size": "4.1GB", "type": "model", "link": "openhermes"},
+            {"name": "Dolphin Phi", "description": "Uncensored Dolphin fine-tune of Phi-2.", "size": "1.6GB", "type": "model", "link": "dolphin-phi"},
+            {"name": "TinyLlama", "description": "A compact 1.1B model.", "size": "637MB", "type": "model", "link": "tinyllama"},
+        ]
+    def download_from_zoo(self, index: int, progress_callback: Callable[[dict], None] = None) -> dict:
+        """
+        Downloads a model from the zoo using its index.
+        """
+        zoo = self.get_zoo()
+        if index < 0 or index >= len(zoo):
+            msg = "Index out of bounds"
+            ASCIIColors.error(msg)
+            return {"status": False, "message": msg}
+        item = zoo[index]
+        return self.pull_model(item["link"], progress_callback=progress_callback)
+    def install_ollama(self, callback: Callable[[dict], None] = None, **kwargs) -> dict:
+        """
+        Installs Ollama based on the operating system.
+        """
+        system = platform.system()
+        def report_progress(status, message, completed=0, total=100):
+            if callback:
+                callback({"status": status, "message": message, "completed": completed, "total": total})
+            else:
+                print(f"{status}: {message}")
+        try:
+            if system == "Linux":
+                report_progress("working", "Detected Linux. Running installation script...", 10, 100)
+                # Use the official install script
+                cmd = "curl -fsSL https://ollama.com/install.sh | sh"
+                process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+                stdout, stderr = process.communicate()
+                if process.returncode == 0:
+                    report_progress("success", "Ollama installed successfully on Linux.", 100, 100)
+                    return {"status": True, "message": "Ollama installed successfully."}
+                else:
+                    msg = f"Installation failed: {stderr}"
+                    report_progress("error", msg, 0, 0)
+                    return {"status": False, "error": msg}
+            elif system == "Windows":
+                report_progress("working", "Detected Windows. Downloading OllamaSetup.exe...", 10, 100)
+                url = "https://ollama.com/download/OllamaSetup.exe"
+                filename = "OllamaSetup.exe"
+                # Download with progress
+                try:
+                    def dl_callback(count, block_size, total_size):
+                        percent = int(count * block_size * 100 / total_size)
+                        report_progress("working", f"Downloading... {percent}%", percent, 100)
+                    urllib.request.urlretrieve(url, filename, dl_callback)
+                except Exception as e:
+                    return {"status": False, "error": f"Failed to download installer: {e}"}
+                report_progress("working", "Running installer...", 90, 100)
+                try:
+                    subprocess.run([filename], check=True) # Runs the installer GUI
+                    # We can't easily wait for the GUI installer to finish unless we block or it has silent flags.
+                    # Ollama installer is usually simple.
+                    report_progress("success", "Installer launched. Please complete the installation.", 100, 100)
+                    return {"status": True, "message": "Installer launched."}
+                except Exception as e:
+                    return {"status": False, "error": f"Failed to launch installer: {e}"}
+            elif system == "Darwin": # macOS
+                report_progress("working", "Detected macOS. Downloading Ollama...", 10, 100)
+                url = "https://ollama.com/download/Ollama-darwin.zip"
+                filename = "Ollama-darwin.zip"
+                 # Download with progress
+                try:
+                    def dl_callback(count, block_size, total_size):
+                        percent = int(count * block_size * 100 / total_size)
+                        report_progress("working", f"Downloading... {percent}%", percent, 100)
+                    urllib.request.urlretrieve(url, filename, dl_callback)
+                except Exception as e:
+                     return {"status": False, "error": f"Failed to download: {e}"}
+                report_progress("working", "Unzipping...", 80, 100)
+                with zipfile.ZipFile(filename, 'r') as zip_ref:
+                    zip_ref.extractall("Ollama_Install")
+                report_progress("success", "Ollama downloaded and extracted to 'Ollama_Install'. Please move 'Ollama.app' to Applications.", 100, 100)
+                return {"status": True, "message": "Downloaded and extracted. Please install Ollama.app manually."}
+            else:
+                return {"status": False, "error": f"Unsupported OS: {system}"}
+        except Exception as e:
+            trace_exception(e)
+            return {"status": False, "error": str(e)}
+    def list_models(self) -> List[Dict[str, str]]:
         """
         Lists available models from the Ollama service using the ollama-python library.
         The returned list of dictionaries matches the format of the original template.
@@ -621,10 +864,10 @@ class OllamaBinding(LollmsLLMBinding):
                     })
             return model_info_list
         except ollama.ResponseError as e:
-            ASCIIColors.error(f"Ollama API listModels ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
+            ASCIIColors.error(f"Ollama API list_models ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
             return []
         except ollama.RequestError as e: # Covers connection errors, timeouts during request
-            ASCIIColors.error(f"Ollama API listModels RequestError: {str(e)} from {self.host_address}")
+            ASCIIColors.error(f"Ollama API list_models RequestError: {str(e)} from {self.host_address}")
             return []
         except Exception as ex:
             trace_exception(ex)
@@ -658,6 +901,9 @@ class OllamaBinding(LollmsLLMBinding):
         """
         if model_name is None:
             model_name = self.model_name
+            if not model_name:
+                ASCIIColors.warning("Model name not specified and no default model set.")
+                return None
         try:
             info = ollama.show(model_name)
@@ -692,6 +938,12 @@ class OllamaBinding(LollmsLLMBinding):
             'llama3.1': 131072,   # Llama 3.1 extended context
             'llama3.2': 131072,   # Llama 3.2 extended context
             'llama3.3': 131072,   # Assuming similar to 3.1/3.2
+            'gpt-oss:20b': 16000,     # GPT-OSS extended
+            'gpt-oss:120b': 128000,     # GPT-OSS extended
+            'codestral': 256000,  # Codestral
+            'mistralai-medium': 128000,  # Mistral medium
+            'mistralai-mini':   128000,  # Mistral medium
+            'ministral':   256000,  # Mistral medium
             'mistral': 32768,     # Mistral 7B v0.2+ default
             'mixtral': 32768,     # Mixtral 8x7B default
             'mixtral8x22b': 65536, # Mixtral 8x22B default
@@ -704,6 +956,9 @@ class OllamaBinding(LollmsLLMBinding):
             'qwen': 8192,         # Qwen default
             'qwen2': 32768,       # Qwen2 default for 7B
             'qwen2.5': 131072,    # Qwen2.5 with 128K
+            'qwen3': 128000,       # Qwen3 with 128k
+            'qwen3-vl': 128000,       # Qwen3-vl with 128k
+            'qwen3-coder': 256000, # Qwen3 with 256k
             'codellama': 16384,   # CodeLlama extended
             'codegemma': 8192,    # CodeGemma default
             'deepseek-coder': 16384,  # DeepSeek-Coder V1 default
@@ -724,6 +979,7 @@ class OllamaBinding(LollmsLLMBinding):
             'orca2': 4096,        # Orca 2 default
             'dolphin': 32768,     # Dolphin (often Mistral-based)
             'openhermes': 8192,   # OpenHermes default
+            'gemini-3': 1000000,  # Gemini 3 is a beast with 1M tokens
         }
         # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
@@ -747,18 +1003,6 @@ class OllamaBinding(LollmsLLMBinding):
         Returns:
             list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
                         Returns an empty list if the client is not initialized or if an error occurs.
-        Example of a returned model dictionary:
-        {
-            "model_name": "gemma3:12b",
-            "size": 13861175232,
-            "vram_size": 10961479680,
-            "parameters_size": "12.2B",
-            "quantization_level": "Q4_K_M",
-            "context_size": 32000,
-            "parent_model": "",
-            "expires_at": "2025-08-20T22:28:18.6708784+02:00"
-        }
         """
         if not self.ollama_client:
             ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
@@ -773,10 +1017,22 @@ class OllamaBinding(LollmsLLMBinding):
             for model_data in models_list:
                 details = model_data.get('details', {})
+                size = model_data.get("size", 0)
+                size_vram = model_data.get("size_vram", 0)
+                # Calculate spread
+                gpu_usage = 0
+                cpu_usage = 0
+                if size > 0:
+                    gpu_usage = min(100, (size_vram / size) * 100)
+                    cpu_usage = max(0, 100 - gpu_usage)
                 flat_model_info = {
                     "model_name": model_data.get("name"),
-                    "size": model_data.get("size"),
-                    "vram_size": model_data.get("size_vram"),
+                    "size": size,
+                    "vram_size": size_vram,
+                    "gpu_usage_percent": round(gpu_usage, 2),
+                    "cpu_usage_percent": round(cpu_usage, 2),
                     "expires_at": model_data.get("expires_at"),
                     "parameters_size": details.get("parameter_size"),
                     "quantization_level": details.get("quantization_level"),
@@ -813,7 +1069,7 @@ if __name__ == '__main__':
         # --- List Models ---
         ASCIIColors.cyan("\n--- Listing Models ---")
-        models = binding.listModels()
+        models = binding.list_models()
         if models:
             ASCIIColors.green(f"Found {len(models)} models. First 5:")
             for m in models[:5]:
@@ -844,7 +1100,7 @@ if __name__ == '__main__':
         ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
         prompt_text = "Why is the sky blue?"
         ASCIIColors.info(f"Prompt: {prompt_text}")
-        generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
+        generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False, think=False)
         if isinstance(generated_text, str):
             ASCIIColors.green(f"Generated text: {generated_text}")
         else:

lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl

lollms-client 1.5.6py3-none-any.whl → 1.7.13py3-none-any.whl