PyPI - lollms-client - Versions diffs - 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl - Mend

lollms-client 1.5.6py3-none-any.whl → 1.7.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

lollms_client/__init__.py +1 -1
lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
lollms_client/llm_bindings/claude/__init__.py +125 -35
lollms_client/llm_bindings/gemini/__init__.py +261 -159
lollms_client/llm_bindings/grok/__init__.py +52 -15
lollms_client/llm_bindings/groq/__init__.py +2 -2
lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
lollms_client/llm_bindings/litellm/__init__.py +1 -1
lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
lollms_client/llm_bindings/lollms/__init__.py +76 -21
lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
lollms_client/llm_bindings/mistral/__init__.py +2 -2
lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
lollms_client/llm_bindings/ollama/__init__.py +345 -89
lollms_client/llm_bindings/open_router/__init__.py +2 -2
lollms_client/llm_bindings/openai/__init__.py +81 -20
lollms_client/llm_bindings/openllm/__init__.py +362 -506
lollms_client/llm_bindings/openwebui/__init__.py +333 -171
lollms_client/llm_bindings/perplexity/__init__.py +2 -2
lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
lollms_client/llm_bindings/transformers/__init__.py +428 -632
lollms_client/llm_bindings/vllm/__init__.py +1 -1
lollms_client/lollms_agentic.py +4 -2
lollms_client/lollms_base_binding.py +61 -0
lollms_client/lollms_core.py +512 -1890
lollms_client/lollms_discussion.py +65 -39
lollms_client/lollms_llm_binding.py +126 -261
lollms_client/lollms_mcp_binding.py +49 -77
lollms_client/lollms_stt_binding.py +99 -52
lollms_client/lollms_tti_binding.py +38 -38
lollms_client/lollms_ttm_binding.py +38 -42
lollms_client/lollms_tts_binding.py +43 -18
lollms_client/lollms_ttv_binding.py +38 -42
lollms_client/lollms_types.py +4 -2
lollms_client/stt_bindings/whisper/__init__.py +108 -23
lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
lollms_client/tti_bindings/diffusers/__init__.py +464 -803
lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
lollms_client/tti_bindings/gemini/__init__.py +182 -239
lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
lollms_client/tti_bindings/lollms/__init__.py +4 -1
lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
lollms_client/tti_bindings/openai/__init__.py +10 -11
lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
lollms_client/ttm_bindings/lollms/__init__.py +4 -17
lollms_client/ttm_bindings/replicate/__init__.py +7 -4
lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
lollms_client/tts_bindings/bark/__init__.py +7 -10
lollms_client/tts_bindings/lollms/__init__.py +6 -1
lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
lollms_client/tts_bindings/xtts/__init__.py +157 -74
lollms_client/tts_bindings/xtts/server/main.py +241 -280
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
lollms_client-1.7.13.dist-info/RECORD +90 -0
lollms_client-1.5.6.dist-info/RECORD +0 -87
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0

lollms_client/llm_bindings/openllm/__init__.py CHANGED Viewed

@@ -1,550 +1,406 @@
-# bindings/openllm/binding.py
-import requests # May not be strictly needed if openllm client handles all
+import requests
 import json
-from lollms_client.lollms_llm_binding import LollmsLLMBinding
-from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
-from lollms_client.lollms_utilities import encode_image # Keep for potential image handling
+import base64
+import os
+import mimetypes
+import math
 from typing import Optional, Callable, List, Union, Dict
-from ascii_colors import ASCIIColors, trace_exception
+import httpx
+import tiktoken
 import pipmaster as pm
-# Ensure openllm, pillow (for dummy image), and tiktoken are installed
-pm.ensure_packages(["openllm", "pillow", "tiktoken"])
+from lollms_client.lollms_llm_binding import LollmsLLMBinding
+from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
+from lollms_client.lollms_discussion import LollmsDiscussion
+from lollms_client.lollms_utilities import encode_image
+from ascii_colors import ASCIIColors, trace_exception
-import openllm
-import tiktoken # For fallback tokenization
+# Ensure required packages are installed
+pm.ensure_packages(["httpx", "tiktoken"])
 BindingName = "OpenLLMBinding"
-# Helper function to count tokens by making a minimal API call
-# This is more accurate for the specific model than a generic tokenizer
-def count_tokens_openllm(
-    text_to_tokenize: str,
-    openllm_client: openllm.client.HTTPClient,
-    timeout: int = 60,
-) -> int:
+def _read_file_as_base64(path):
+    with open(path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def _extract_markdown_path(s):
+    s = s.strip()
+    if s.startswith("[") and s.endswith(")"):
+        lb, rb = s.find("["), s.find("]")
+        if lb != -1 and rb != -1 and rb > lb:
+            return s[lb + 1 : rb].strip()
+    return s
+def _guess_mime_from_name(name, default="image/jpeg"):
+    mime, _ = mimetypes.guess_type(name)
+    return mime or default
+def _to_data_url(b64_str, mime):
+    return f"data:{mime};base64,{b64_str}"
+def normalize_image_input(img, default_mime="image/jpeg"):
     """
-    Counts the number of tokens in a given text for the connected OpenLLM model
-    by making a minimal request to the /v1/generate endpoint and extracting
-    the length of 'prompt_token_ids' from the response.
+    Returns an OpenAI API‑compatible content block for an image.
+    Accepts various input formats and converts them to a data URL.
     """
-    try:
-        # Make a generation request asking for 0 or 1 new token
-        # Some models might require at least 1 max_new_tokens
-        llm_config = openllm.LLMConfig(max_new_tokens=1).model_dump(flatten=True, omit_default=True)
-        response = openllm_client.generate(prompt=text_to_tokenize, llm_config=llm_config, timeout=timeout)
-        if response.prompt_token_ids is not None and len(response.prompt_token_ids) > 0:
-            # The prompt_token_ids from OpenLLM often include special tokens (e.g., BOS)
-            # depending on the model's tokenizer configuration.
-            # For consistency with typical "user text token count", we might need to adjust.
-            # However, for now, let's return the raw count from the model.
-            # A simple heuristic might be to subtract 1 for a BOS token if always present.
-            # This needs model-specific knowledge or further investigation.
-            # For llama3 with ollama, it was prompt_eval_count - 5 (system, user, content etc)
-            # For OpenLLM, it's harder to generalize the "overhead".
-            # Let's assume prompt_token_ids is the count of tokens for the user's text.
-            return len(response.prompt_token_ids)
-        else:
-            # Fallback if prompt_token_ids is not available or empty
-            ASCIIColors.warning("prompt_token_ids not found in OpenLLM response, using tiktoken for count_tokens.")
-            return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
-    except Exception as e:
-        ASCIIColors.warning(f"Failed to count tokens via OpenLLM API, using tiktoken fallback: {e}")
-        return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
+    if isinstance(img, str):
+        # Handle path‑like strings or raw base64
+        s = _extract_markdown_path(img)
+        if os.path.exists(s):
+            b64 = _read_file_as_base64(s)
+            mime = _guess_mime_from_name(s, default_mime)
+            url = _to_data_url(b64, mime)
+        else:  # Assume it's a raw base64 string
+            url = _to_data_url(s, default_mime)
+        return {"type": "image_url", "image_url": {"url": url}}
+    raise ValueError("Unsupported image input type for OpenLLM")
 class OpenLLMBinding(LollmsLLMBinding):
-    """OpenLLM-specific binding implementation using the openllm-python client."""
-    DEFAULT_HOST_ADDRESS = "http://localhost:3000" # Default OpenLLM server address
-    def __init__(self,
-                 **kwargs
-                 ):
-        """        Initialize the OpenLLM binding.
+    """OpenLLM‑specific binding implementation"""
+    def __init__(self, **kwargs):
+        """
+        Initialize the OpenLLM binding.
         Args:
-            host_address (str): The address of the OpenLLM server (default: http://localhost:3000).
-            model_name (str): The name of the model to connect to. This is primarily for informational purposes.
-            service_key (Optional[str]): Optional service key for authentication, not used by openllm client.
-            verify_ssl_certificate (bool): Whether to verify SSL certificates (default: True).
-            timeout (int): Timeout for client requests in seconds (default: 120).
+            host_address (str): URL of the OpenLLM server (e.g. ``http://localhost:3000``).
+            model_name (str): Name of the model to use.
+            service_key (str): Authentication token for the service (optional).
+            verify_ssl_certificate (bool): Whether to verify SSL certificates.
         """
-        host_address = kwargs.get("host_address")
-        _host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
         super().__init__(BindingName, **kwargs)
-        self.host_address = _host_address
-        self.model_name = kwargs.get("model_name") # Can be set by load_model or from config
-        self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
-        self.timeout = kwargs.get("timeout")
-        if openllm is None or openllm.client is None:
-            raise ImportError("OpenLLM library is not installed or client module not found. Please run 'pip install openllm'.")
-        try:
-            self.openllm_client = openllm.client.HTTPClient(
-                address=self.host_address,
-                timeout=self.timeout
+        self.host_address = kwargs.get("host_address")
+        self.model_name = kwargs.get("model_name")
+        self.service_key = kwargs.get("service_key", os.getenv("OPENLLM_API_KEY"))
+        self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
+        if not self.host_address:
+            raise ValueError("OpenLLM host address is required.")
+        # Build headers – only include Authorization if a key is actually provided
+        headers = {"Content-Type": "application/json"}
+        if self.service_key:
+            headers["Authorization"] = f"Bearer {self.service_key}"
+        else:
+            ASCIIColors.warning(
+                "No service key provided for OpenLLM. Requests will be made without Authorization header."
             )
-            # Perform a quick health check or metadata fetch to confirm connection
-            if not self._verify_connection():
-                raise ConnectionError(f"Failed to connect or verify OpenLLM server at {self.host_address}")
-            # Try to fetch model_name if not provided
-            if not self.model_name:
-                metadata = self._get_model_metadata_from_server()
-                if metadata and 'model_id' in metadata:
-                    self.model_name = metadata['model_id']
-                else:
-                    ASCIIColors.warning("Could not automatically determine model name from OpenLLM server.")
-        except Exception as e:
-            ASCIIColors.error(f"Failed to initialize OpenLLM client: {e}")
-            self.openllm_client = None
-            raise ConnectionError(f"Could not connect or initialize OpenLLM client at {self.host_address}: {e}") from e
+        # Append /v1 to the base URL for OpenAI compatibility
+        base_url = f"{self.host_address.rstrip('/')}/v1"
+        self.client = httpx.Client(
+            base_url=base_url,
+            headers=headers,
+            verify=self.verify_ssl_certificate,
+            timeout=None,
+        )
+    # --------------------------------------------------------------------- #
+    # Helper methods
+    # --------------------------------------------------------------------- #
+    def _build_request_params(self, messages: list, **kwargs) -> dict:
+        """Construct the JSON payload expected by the OpenLLM /chat/completions endpoint."""
+        params = {
+            "model": kwargs.get("model", self.model_name),
+            "messages": messages,
+            "stream": kwargs.get("stream", True),
+        }
-    def _verify_connection(self) -> bool:
-        if not self.openllm_client:
-            return False
+        # Map Lollms parameters to OpenAI‑compatible fields
+        if "n_predict" in kwargs and kwargs["n_predict"] is not None:
+            params["max_tokens"] = kwargs["n_predict"]
+        if "temperature" in kwargs and kwargs["temperature"] is not None:
+            params["temperature"] = kwargs["temperature"]
+        if "top_p" in kwargs and kwargs["top_p"] is not None:
+            params["top_p"] = kwargs["top_p"]
+        if "top_k" in kwargs and kwargs["top_k"] is not None:
+            params["top_k"] = kwargs["top_k"]
+        if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
+            params["frequency_penalty"] = kwargs["repeat_penalty"]
+        if "seed" in kwargs and kwargs["seed"] is not None:
+            params["seed"] = kwargs["seed"]
+        return params
+    def _process_request(
+        self,
+        params: dict,
+        stream: Optional[bool],
+        streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
+    ) -> Union[str, dict]:
+        """Execute the request – handling both streaming and non‑streaming modes."""
+        output = ""
         try:
-            return self.openllm_client.health() # health() returns True if healthy, raises error otherwise
-        except Exception as e:
-            ASCIIColors.warning(f"OpenLLM server health check failed for {self.host_address}: {e}")
-            return False
+            if stream:
+                with self.client.stream(
+                    "POST", "/chat/completions", json=params
+                ) as response:
+                    if response.status_code != 200:
+                        err = response.read().decode("utf-8")
+                        raise Exception(
+                            f"API Error: {response.status_code} - {err}"
+                        )
+                    for line in response.iter_lines():
+                        if not line:
+                            continue
+                        if line.startswith("data:"):
+                            data_str = line[len("data:") :].strip()
+                            if data_str == "[DONE]":
+                                break
+                            try:
+                                chunk = json.loads(data_str)
+                                if chunk.get("choices"):
+                                    delta = chunk["choices"][0].get("delta", {})
+                                    word = delta.get("content", "")
+                                    if word:
+                                        if streaming_callback:
+                                            if not streaming_callback(
+                                                word, MSG_TYPE.MSG_TYPE_CHUNK
+                                            ):
+                                                break
+                                        output += word
+                            except json.JSONDecodeError:
+                                continue
+            else:
+                response = self.client.post("/chat/completions", json=params)
+                if response.status_code != 200:
+                    raise Exception(
+                        f"API Error: {response.status_code} - {response.text}"
+                    )
+                data = response.json()
+                output = data["choices"][0]["message"]["content"]
+                if streaming_callback:
+                    streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
-    def _get_model_metadata_from_server(self) -> Optional[Dict]:
-        if not self.openllm_client:
-            return None
-        try:
-            # metadata() returns a GenerationOutput object which contains model_name, backend etc.
-            meta_output = self.openllm_client.metadata()
-            # The actual LLMConfig and model details are in meta_output.configuration (a string JSON)
-            # and meta_output.model_name, meta_output.backend etc.
-            # For simplicity, let's try to parse configuration or use model_name
-            config_dict = {}
-            if meta_output.configuration:
-                try:
-                    config_dict = json.loads(meta_output.configuration)
-                except json.JSONDecodeError:
-                    ASCIIColors.warning("Failed to parse model configuration from OpenLLM metadata.")
-            return {
-                "model_id": config_dict.get("model_id", meta_output.model_name), # model_id from config is better
-                "model_name": meta_output.model_name, # As reported by client.metadata()
-                "backend": meta_output.backend,
-                "timeout": meta_output.timeout,
-                "configuration": config_dict
-            }
         except Exception as e:
-            ASCIIColors.warning(f"Could not fetch metadata from OpenLLM server: {e}")
-            return None
-    def generate_text(self,
-                     prompt: str,
-                     images: Optional[List[str]] = None, # List of image file paths
-                     system_prompt: str = "",
-                     n_predict: Optional[int] = None,
-                     stream: bool = False,
-                     temperature: float = 0.7,
-                     top_k: int = 40,
-                     top_p: float = 0.9,
-                     repeat_penalty: float = 1.1,
-                     # repeat_last_n: int = 64, # OpenLLM's LLMConfig doesn't have direct repeat_last_n
-                     seed: Optional[int] = None,
-                     # n_threads: Optional[int] = None, # Server-side config for OpenLLM
-                     # ctx_size: Optional[int] = None,  # Server-side config, though some models might allow via llm_config
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None,
-                     split:Optional[bool]=False, # put to true if the prompt is a discussion
-                     user_keyword:Optional[str]="!@>user:",
-                     ai_keyword:Optional[str]="!@>assistant:",
-                     ) -> Union[str, Dict[str, any]]:
-        if not self.openllm_client:
-             return {"status": False, "error": "OpenLLM client not initialized."}
-        # Construct LLMConfig
-        # Note: Not all Lollms params map directly to OpenLLM's LLMConfig.
-        # We map what's available.
-        config_params = {
-            "temperature": float(temperature),
-            "top_k": top_k,
-            "top_p": top_p,
-            "repetition_penalty": repeat_penalty,
-        }
-        if n_predict is not None: config_params['max_new_tokens'] = n_predict
-        if seed is not None: config_params['seed'] = seed # seed might not be supported by all backends/models
-        llm_config = openllm.LLMConfig(**config_params).model_dump(flatten=True, omit_default=True)
-        # Prepend system prompt if provided
-        full_prompt = prompt
-        if system_prompt and system_prompt.strip():
-            full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Common instruct format
-        # Handle images: This is highly model-dependent for OpenLLM.
-        # For LLaVA-like models, images are base64 encoded and put in the prompt.
-        # This is a simplified approach. A robust solution needs model-specific prompt templating.
+            trace_exception(e)
+            err_msg = f"An error occurred with the OpenLLM API: {e}"
+            if streaming_callback:
+                streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
+            return {"status": "error", "message": err_msg}
+        return output
+    # --------------------------------------------------------------------- #
+    # Public API required by LollmsLLMBinding
+    # --------------------------------------------------------------------- #
+    def generate_text(
+        self,
+        prompt: str,
+        images: Optional[List[str]] = None,
+        system_prompt: str = "",
+        n_predict: Optional[int] = None,
+        stream: Optional[bool] = None,
+        temperature: float = 0.7,
+        top_k: int = 40,
+        top_p: float = 0.9,
+        repeat_penalty: float = 1.1,
+        streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+        **kwargs,
+    ) -> Union[str, dict]:
+        """Generate text (or multimodal output) via OpenLLM."""
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        user_content = [{"type": "text", "text": prompt}]
         if images:
-            ASCIIColors.warning("Image support in OpenLLMBinding is basic and assumes a LLaVA-like model "
-                                "that accepts base64 image data in the prompt.")
-            image_parts = []
-            for img_path in images:
-                try:
-                    # encode_image from lollms_utilities returns base64 string
-                    base64_image = encode_image(img_path)
-                    # Basic assumption: image can be prepended or appended.
-                    # For LLaVA, it's often "<image>\nUSER: What is this? ASSISTANT:"
-                    # or the raw base64 data might be directly in the prompt.
-                    # This is a placeholder for where more complex prompt construction would go.
-                    # For now, let's just put the base64 string.
-                    image_parts.append(f"[Image data: {base64_image}]") # Simplistic
-                except Exception as e:
-                    ASCIIColors.error(f"Could not encode image {img_path}: {e}")
-            if image_parts:
-                full_prompt = "\n".join(image_parts) + "\n" + full_prompt
-        full_response_text = ""
+            for img in images:
+                user_content.append(normalize_image_input(img))
+        messages.append({"role": "user", "content": user_content})
+        params = self._build_request_params(
+            messages=messages,
+            n_predict=n_predict,
+            stream=stream,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repeat_penalty=repeat_penalty,
+            **kwargs,
+        )
+        return self._process_request(params, stream, streaming_callback)
+    def generate_from_messages(
+        self,
+        messages: List[Dict],
+        n_predict: Optional[int] = None,
+        stream: Optional[bool] = None,
+        temperature: Optional[float] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        repeat_penalty: Optional[float] = None,
+        streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+        **kwargs,
+    ) -> Union[str, dict]:
+        """Generate from a pre‑formatted list of OpenAI‑compatible messages."""
+        params = self._build_request_params(
+            messages=messages,
+            n_predict=n_predict,
+            stream=stream,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repeat_penalty=repeat_penalty,
+            **kwargs,
+        )
+        return self._process_request(params, stream, streaming_callback)
+    def chat(
+        self,
+        discussion: LollmsDiscussion,
+        branch_tip_id: Optional[str] = None,
+        n_predict: Optional[int] = None,
+        stream: Optional[bool] = None,
+        temperature: float = 0.7,
+        top_k: int = 40,
+        top_p: float = 0.9,
+        repeat_penalty: float = 1.1,
+        repeat_last_n: int = 64,
+        seed: Optional[int] = None,
+        n_threads: Optional[int] = None,
+        ctx_size: Optional[int] = None,
+        streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+        think: Optional[bool] = False,
+        reasoning_effort: Optional[bool] = "low",
+        reasoning_summary: Optional[bool] = "auto",
+        **kwargs,
+    ) -> Union[str, dict]:
+        """
+        Conduct a chat session using a :class:`LollmsDiscussion` object.
+        The discussion is exported in an OpenAI‑compatible format and then
+        passed to :meth:`_process_request`.
+        """
+        messages = discussion.export("openai_chat", branch_tip_id)
+        params = self._build_request_params(
+            messages=messages,
+            n_predict=n_predict,
+            stream=stream,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repeat_penalty=repeat_penalty,
+            **kwargs,
+        )
+        return self._process_request(params, stream, streaming_callback)
+    def list_models(self) -> List[Dict]:
+        """Return a list of models known to the OpenLLM server."""
+        models_info = []
         try:
-            if stream:
-                response_stream = self.openllm_client.generate_stream(
-                    prompt=full_prompt,
-                    llm_config=llm_config,
-                    timeout=self.timeout
+            response = self.client.get("/models")
+            if response.status_code != 200:
+                ASCIIColors.error(
+                    f"OpenLLM /v1/models returned status {response.status_code}. "
+                    f"Response body: {response.text}"
                 )
-                for chunk in response_stream:
-                    # chunk is openllm.GenerationChunk
-                    chunk_content = chunk.text
-                    if chunk_content:
-                        full_response_text += chunk_content
-                        if streaming_callback:
-                            if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
-                                break # Callback requested stop
-                return full_response_text
-            else: # Not streaming
-                response_output = self.openllm_client.generate(
-                    prompt=full_prompt,
-                    llm_config=llm_config,
-                    timeout=self.timeout
+                try:
+                    response.raise_for_status()
+                except Exception as e:
+                    trace_exception(e)
+                return models_info  # Empty list due to error
+            models_data = response.json().get("data", [])
+            for model in models_data:
+                models_info.append(
+                    {
+                        "model_name": model.get("id", "N/A"),
+                        "owned_by": model.get("owned_by", "N/A"),
+                        "created": model.get("created", "N/A"),
+                        "context_length": "unknown", # Not a standard field in OpenAI spec
+                    }
                 )
-                # response_output is openllm.GenerationOutput
-                # It can contain multiple responses if n > 1 (not used here)
-                if response_output.responses:
-                    return response_output.responses[0].text
-                else:
-                    return {"status": False, "error": "OpenLLM returned no response."}
-        except openllm.exceptions.OpenLLMException as e:
-            error_message = f"OpenLLM API Error: {str(e)}"
-            ASCIIColors.error(error_message)
-            # Attempt to get more details if it's an HTTPError from httpx
-            if hasattr(e, '__cause__') and isinstance(e.__cause__, requests.exceptions.HTTPError):
-                 error_message += f" - HTTP Status: {e.__cause__.response.status_code}, Response: {e.__cause__.response.text}"
-            elif hasattr(e, 'response') and hasattr(e.response, 'status_code'): # For httpx.HTTPStatusError
-                 error_message += f" - HTTP Status: {e.response.status_code}, Response: {e.response.text}"
-            return {"status": False, "error": error_message}
-        except Exception as ex:
-            error_message = f"An unexpected error occurred: {str(ex)}"
-            trace_exception(ex)
-            return {"status": False, "error": error_message}
-    def tokenize(self, text: str) -> list:
-        """Tokenize text using tiktoken as a fallback."""
-        # OpenLLM client doesn't provide a direct tokenization API.
-        # For accurate tokenization, it would depend on the specific model served.
-        # Using tiktoken as a general approximation.
-        try:
-            # Try to use a tokenizer related to the model if known, else default
-            if "llama" in self.model_name.lower(): # Crude check
-                enc = tiktoken.encoding_for_model("text-davinci-003") # Llama tokenizers are different but this is a proxy
-            elif "gpt" in self.model_name.lower(): # e.g. gpt2 served by OpenLLM
-                enc = tiktoken.get_encoding("gpt2")
-            else:
-                enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo") # Fallback
-            return enc.encode(text)
-        except Exception:
-            # Further fallback
-            return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
-    def detokenize(self, tokens: list) -> str:
-        """Detokenize tokens using tiktoken as a fallback."""
+        except Exception as e:
+            ASCIIColors.error(
+                f"Failed to list models from OpenLLM: {e.__class__.__name__}: {e}"
+            )
+            trace_exception(e)
+        return models_info
+    def _get_encoding(self, model_name: str | None = None):
+        """Fallback to tiktoken for generic tokenisation."""
         try:
-            if "llama" in self.model_name.lower():
-                enc = tiktoken.encoding_for_model("text-davinci-003")
-            elif "gpt" in self.model_name.lower():
-                enc = tiktoken.get_encoding("gpt2")
-            else:
-                enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo")
-            return enc.decode(tokens)
-        except Exception:
-            return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
-    def count_tokens(self, text: str) -> int:
-        """Count tokens using the OpenLLM server if possible, else tiktoken."""
-        if not self.openllm_client:
-            ASCIIColors.warning("OpenLLM client not initialized. Using tiktoken for count_tokens.")
-            return len(self.tokenize(text)) # Fallback to tiktoken via self.tokenize
-        # Try the API call method for better accuracy for the specific model
-        # return count_tokens_openllm(text, self.openllm_client, self.timeout)
-        # The API call above can be slow. For faster, but less model-specific count:
-        return len(self.tokenize(text))
+            return tiktoken.encoding_for_model(model_name or self.model_name)
+        except KeyError:
+            return tiktoken.get_encoding("cl100k_base")
+    def tokenize(self, text: str) -> list[int]:
+        return self._get_encoding().encode(text)
+    def detokenize(self, tokens: list[int]) -> str:
+        return self._get_encoding().decode(tokens)
-    def embed(self, text: str, **kwargs) -> List[float]:
-        """Get embeddings for the input text using OpenLLM API."""
-        if not self.openllm_client:
-             raise Exception("OpenLLM client not initialized.")
+    def count_tokens(self, text: str) -> int:
+        return len(self.tokenize(text))
-        # model_to_use kwarg is less relevant here as client is tied to one model server.
-        # If that server is an embedding model, it will work.
-        # llm_config can be passed via kwargs if needed for embeddings.
-        llm_config_dict = kwargs.get("llm_config", {})
-        llm_config = openllm.LLMConfig(**llm_config_dict).model_dump(flatten=True, omit_default=True) if llm_config_dict else None
+    def embed(self, text: str | List[str], **kwargs) -> List:
+        """
+        Obtain embeddings via the OpenLLM ``/embeddings`` endpoint.
+        If a single string is supplied, a single embedding vector is returned;
+        otherwise a list of vectors is returned.
+        """
+        embedding_model = kwargs.get("model", self.model_name)
+        single_input = isinstance(text, str)
+        inputs = [text] if single_input else list(text)
         try:
-            # openllm_client.embeddings expects a list of prompts
-            response = self.openllm_client.embeddings(
-                prompts=[text],
-                llm_config=llm_config,
-                timeout=self.timeout
-            )
-            # response is a list of embeddings (list of lists of floats)
-            if response and len(response) > 0:
-                return response[0]
+            payload = {"model": embedding_model, "input": inputs}
+            response = self.client.post("/embeddings", json=payload)
+            response.raise_for_status()
+            data = response.json()
+            embeddings = [item["embedding"] for item in data.get("data", [])]
+            if single_input and embeddings:
+                return embeddings[0]
             else:
-                raise Exception("OpenLLM returned no embeddings.")
-        except openllm.exceptions.OpenLLMException as e:
-            error_message = f"OpenLLM API Embeddings Error: {str(e)}"
-            ASCIIColors.error(error_message)
-            raise Exception(error_message) from e
-        except Exception as ex:
-            trace_exception(ex)
-            raise Exception(f"Embedding failed: {str(ex)}") from ex
-    def get_model_info(self) -> dict:
-        """Return information about the current OpenLLM model setup."""
-        server_metadata = self._get_model_metadata_from_server()
-        model_id_from_server = "unknown"
-        if server_metadata and 'model_id' in server_metadata:
-            model_id_from_server = server_metadata['model_id']
-        # Try to determine vision support based on model name (very basic)
-        supports_vision = False
-        if self.model_name and any(vm_name in self.model_name.lower() for vm_name in ["llava", "bakllava", "vision"]):
-            supports_vision = True
+                return embeddings
+        except Exception as e:
+            ASCIIColors.error(
+                f"Failed to generate embeddings using model '{embedding_model}': {e}"
+            )
+            trace_exception(e)
+            return []
+    def get_model_info(self) -> dict:
+        """Return basic information about the current binding configuration."""
         return {
             "name": self.binding_name,
-            "version": openllm.__version__ if openllm else "unknown",
+            "version": pm.get_installed_version("openllm")
+            if pm.is_installed("openllm")
+            else "unknown",
             "host_address": self.host_address,
-            "model_name": self.model_name or model_id_from_server, # Use self.model_name if set, else from server
-            "supports_structured_output": False, # Generic OpenLLM text generation doesn't guarantee this
-            "supports_vision": supports_vision # Highly dependent on the specific model served
+            "model_name": self.model_name,
+            "supports_structured_output": False,
+            "supports_vision": True, # Assuming vision support based on original code
         }
-    def listModels(self) -> List[Dict[str, str]]:
-        """
-        Lists the model currently served by the connected OpenLLM instance.
-        OpenLLM client connects to one model server at a time.
-        """
-        if not self.openllm_client:
-            ASCIIColors.error("OpenLLM client not initialized. Cannot list models.")
-            return []
-        metadata = self._get_model_metadata_from_server()
-        if metadata:
-            return [{
-                'model_name': metadata.get('model_id', metadata.get('model_name', 'Unknown Model')), # Prefer model_id
-                'owned_by': metadata.get('backend', 'OpenLLM'), # Using backend as a proxy for owner/type
-                # OpenLLM metadata doesn't typically include a creation/modification date for the model files themselves.
-                'created_datetime': None
-            }]
-        return []
     def load_model(self, model_name: str) -> bool:
-        """
-        For OpenLLM, this primarily sets the model_name for reference, as the
-        model is already loaded by the server the client connects to.
-        Optionally, it could re-initialize the client if host_address also changes,
-        or verify the existing connection serves this model.
-        Args:
-            model_name (str): Name of the model (e.g., 'mistralai/Mistral-7B-Instruct-v0.1').
-                              This should match what the server at self.host_address is running.
-        Returns:
-            bool: True if model name is set and connection seems okay.
-        """
+        """Select a model for subsequent calls."""
         self.model_name = model_name
-        ASCIIColors.info(f"OpenLLM binding model_name set to: {model_name}.")
-        ASCIIColors.info(f"Ensure OpenLLM server at {self.host_address} is running this model.")
-        # Optionally, verify the connected server's model matches
-        server_meta = self._get_model_metadata_from_server()
-        if server_meta:
-            current_server_model_id = server_meta.get('model_id', server_meta.get('model_name'))
-            if current_server_model_id and model_name not in current_server_model_id : # Check if model_name is substring of actual ID
-                ASCIIColors.warning(f"Warning: Requested model '{model_name}' may not match model '{current_server_model_id}' served at {self.host_address}.")
-            else:
-                ASCIIColors.green(f"Connected OpenLLM server model appears to be '{current_server_model_id}'.")
-        return self._verify_connection()
-if __name__ == '__main__':
-    global full_streamed_text
-    ASCIIColors.yellow("Testing OpenLLMBinding...")
-    # --- Configuration ---
-    # Ensure an OpenLLM server is running. Example:
-    # `openllm start mistralai/Mistral-7B-Instruct-v0.1`
-    # or for embeddings: `openllm start baai/bge-small-en-v1.5`
-    # or for vision (if you have a LLaVA model compatible with OpenLLM):
-    # `openllm start llava-hf/llava-1.5-7b-hf` (You might need to convert/setup some vision models for OpenLLM)
-    openllm_host = "http://localhost:3000"
-    # This should match the model_id you started OpenLLM with
-    test_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Example, change if your server runs a different model
-    # test_model_name = "facebook/opt-125m" # A smaller model for quicker tests if available
-    # For embedding test, you'd point to an OpenLLM server running an embedding model
-    # openllm_embedding_host = "http://localhost:3001" # If running embedding model on different port
-    # test_embedding_model_name = "baai/bge-small-en-v1.5"
-    # For vision, if you have a LLaVA model running with OpenLLM
-    # openllm_vision_host = "http://localhost:3002"
-    # test_vision_model_name = "llava-hf/llava-1.5-7b-hf" # Example
-    try:
-        ASCIIColors.cyan("\n--- Initializing Binding for Text Generation ---")
-        # Initialize with the host where your text generation model is running
-        binding = OpenLLMBinding(host_address=openllm_host, model_name=test_model_name)
-        ASCIIColors.green(f"Binding initialized successfully. Connected to model: {binding.model_name}")
-        ASCIIColors.info(f"Using OpenLLM client version: {openllm.__version__ if openllm else 'N/A'}")
-        ASCIIColors.cyan("\n--- Listing Model (should be the one connected) ---")
-        models = binding.listModels()
-        if models:
-            ASCIIColors.green(f"Connected model info:")
-            for m in models:
-                print(m)
-        else:
-            ASCIIColors.warning("Failed to list model from server. Ensure OpenLLM server is running.")
-        ASCIIColors.cyan(f"\n--- Setting model to (for info): {test_model_name} ---")
-        binding.load_model(test_model_name) # This confirms the model name and checks connection
-        ASCIIColors.cyan("\n--- Counting Tokens (using tiktoken fallback or API) ---")
-        sample_text = "Hello, OpenLLM world! This is a test."
-        token_count = binding.count_tokens(sample_text)
-        ASCIIColors.green(f"Token count for '{sample_text}': {token_count} (may use tiktoken approximation)")
-        ASCIIColors.cyan("\n--- Tokenize/Detokenize (using tiktoken fallback) ---")
-        tokens = binding.tokenize(sample_text)
-        ASCIIColors.green(f"Tokens (tiktoken): {tokens[:10]}...")
-        detokenized_text = binding.detokenize(tokens)
-        ASCIIColors.green(f"Detokenized text (tiktoken): {detokenized_text}")
-        ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
-        prompt_text = "Why is the sky blue?"
-        system_prompt_text = "You are a helpful AI assistant providing concise answers."
-        ASCIIColors.info(f"System Prompt: {system_prompt_text}")
-        ASCIIColors.info(f"User Prompt: {prompt_text}")
-        generated_text = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=50, stream=False)
-        if isinstance(generated_text, str):
-            ASCIIColors.green(f"Generated text: {generated_text}")
-        else:
-            ASCIIColors.error(f"Generation failed: {generated_text}")
-        ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
-        full_streamed_text = ""
-        def stream_callback(chunk: str, msg_type: int):
-            global full_streamed_text
-            print(f"{ASCIIColors.GREEN}{chunk}{ASCIIColors.RESET}", end="", flush=True)
-            full_streamed_text += chunk
-            return True
-        ASCIIColors.info(f"Prompt: {prompt_text}")
-        result = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
-        print("\n--- End of Stream ---")
-        if isinstance(result, str):
-             ASCIIColors.green(f"Full streamed text: {result}")
-        else:
-            ASCIIColors.error(f"Streaming generation failed: {result}")
-        # --- Embeddings Test ---
-        # You need to run an OpenLLM server with an embedding model for this.
-        # Example: `openllm start baai/bge-small-en-v1.5 --port 3001`
-        # Then change openllm_host to "http://localhost:3001" for this section.
-        ASCIIColors.cyan("\n--- Embeddings Test ---")
-        ASCIIColors.magenta("INFO: This test requires an OpenLLM server running an EMBEDDING model (e.g., bge, E5).")
-        ASCIIColors.magenta(f"      If your server at {openllm_host} is a text generation model, this might fail.")
-        embedding_text = "Lollms is a cool project using OpenLLM."
-        try:
-            # If your main binding is for text-gen, you might need a separate binding instance
-            # for an embedding model if it's on a different host/port.
-            # For this example, we'll try with the current binding.
-            # If it fails, it means the model at openllm_host doesn't support /v1/embeddings
-            embedding_vector = binding.embed(embedding_text)
-            ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
-            ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
-        except Exception as e:
-            ASCIIColors.warning(f"Could not get embedding with model '{binding.model_name}' at '{binding.host_address}': {e}")
-            ASCIIColors.warning("Ensure the OpenLLM server is running an embedding-capable model and supports the /v1/embeddings endpoint.")
+        ASCIIColors.info(f"OpenLLM model set to: {model_name}")
+        return True
-        # --- Vision Model Test ---
-        ASCIIColors.cyan("\n--- Vision Model Test (Conceptual) ---")
-        ASCIIColors.magenta("INFO: This test requires an OpenLLM server running a VISION model (e.g., LLaVA).")
-        ASCIIColors.magenta(f"      And the model needs to accept images as base64 in prompt. This is a basic test.")
-        dummy_image_path = "dummy_test_image_openllm.png"
-        try:
-            from PIL import Image, ImageDraw
-            img = Image.new('RGB', (200, 50), color = ('blue'))
-            d = ImageDraw.Draw(img)
-            d.text((10,10), "OpenLLM Test", fill=('white'))
-            img.save(dummy_image_path)
-            ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
-            # Assuming your 'binding' is connected to a vision model server.
-            # If not, you'd initialize a new binding pointing to your vision model server.
-            # e.g., vision_binding = OpenLLMBinding(host_address=openllm_vision_host, model_name=test_vision_model_name)
-            # Check if current model_name hints at vision
-            if "llava" not in binding.model_name.lower() and "vision" not in binding.model_name.lower() :
-                 ASCIIColors.warning(f"Current model '{binding.model_name}' might not be a vision model. Vision test may not be meaningful.")
+    def ps(self):
+        """Placeholder – OpenLLM does not expose a process‑list endpoint."""
+        return []
-            vision_prompt = "What is written in the image and what color is the background?"
-            ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
-            vision_response = binding.generate_text(
-                prompt=vision_prompt,
-                images=[dummy_image_path], # The binding will attempt to base64 encode this
-                n_predict=50,
-                stream=False
-            )
-            if isinstance(vision_response, str):
-                ASCIIColors.green(f"Vision model response: {vision_response}")
-            else:
-                ASCIIColors.error(f"Vision generation failed: {vision_response}")
-        except ImportError:
-            ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
-        except Exception as e:
-            ASCIIColors.error(f"Error during vision test: {e}")
-            trace_exception(e)
-        finally:
-            import os
-            if os.path.exists(dummy_image_path):
-                os.remove(dummy_image_path)
-    except ConnectionRefusedError:
-        ASCIIColors.error(f"Connection to OpenLLM server at {openllm_host} refused. Is OpenLLM server running?")
-        ASCIIColors.error("Example: `openllm start mistralai/Mistral-7B-Instruct-v0.1`")
-    except openllm.exceptions.OpenLLMException as e:
-        ASCIIColors.error(f"OpenLLM specific error: {e}")
-        trace_exception(e)
-    except Exception as e:
-        ASCIIColors.error(f"An error occurred during testing: {e}")
-        trace_exception(e)
-    ASCIIColors.yellow("\nOpenLLMBinding test finished.")
+# Ensure the class is treated as concrete (no remaining abstract methods)
+OpenLLMBinding.__abstractmethods__ = set()

lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl

lollms-client 1.5.6py3-none-any.whl → 1.7.13py3-none-any.whl