PyPI - lollms-client - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

lollms-client 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lollms_client/__init__.py +1 -1
lollms_client/llm_bindings/lollms/__init__.py +1 -0
lollms_client/llm_bindings/ollama/__init__.py +5 -1
lollms_client/llm_bindings/openai/__init__.py +1 -0
lollms_client/llm_bindings/transformers/__init__.py +67 -71
lollms_client/lollms_core.py +1222 -638
lollms_client/lollms_stt_binding.py +137 -0
lollms_client/lollms_tasks.py +1 -2
lollms_client/lollms_tti_binding.py +175 -0
lollms_client/lollms_ttm_binding.py +135 -0
lollms_client/lollms_tts_binding.py +138 -0
lollms_client/lollms_ttv_binding.py +135 -0
lollms_client/stt_bindings/lollms/__init__.py +138 -0
lollms_client/tti_bindings/lollms/__init__.py +210 -0
lollms_client/ttm_bindings/__init__.py +0 -0
lollms_client/ttm_bindings/lollms/__init__.py +73 -0
lollms_client/tts_bindings/lollms/__init__.py +145 -0
lollms_client/ttv_bindings/__init__.py +73 -0
{lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/METADATA +11 -2
lollms_client-0.12.0.dist-info/RECORD +41 -0
{lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/WHEEL +1 -1
lollms_client-0.10.0.dist-info/RECORD +0 -34
{lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info/licenses}/LICENSE +0 -0
{lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/top_level.txt +0 -0

lollms_client/__init__.py CHANGED Viewed

@@ -4,5 +4,5 @@ from lollms_client.lollms_types import MSG_TYPE
 from lollms_client.lollms_personality import LollmsPersonality
 from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
 from lollms_client.lollms_utilities import PromptReshaper
-from lollms_client.lollms_tts import LollmsTTS
+from lollms_client.lollms_tts_binding import LollmsTTS
 from lollms_client.lollms_functions import FunctionCalling_Library

lollms_client/llm_bindings/lollms/__init__.py CHANGED Viewed

@@ -54,6 +54,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
                      repeat_last_n: int = 40,
                      seed: Optional[int] = None,
                      n_threads: int = 8,
+                     ctx_size: int | None = None,
                      streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
         """
         Generate text using the LOLLMS service, with optional image support.

lollms_client/llm_bindings/ollama/__init__.py CHANGED Viewed

@@ -54,6 +54,7 @@ class OllamaBinding(LollmsLLMBinding):
                      repeat_last_n: int = 40,
                      seed: Optional[int] = None,
                      n_threads: int = 8,
+                     ctx_size: int | None = None,
                      streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
         """
         Generate text using the Ollama service, with optional image support.
@@ -111,8 +112,10 @@ class OllamaBinding(LollmsLLMBinding):
                 }],
                 "stream": stream,
                 "temperature": float(temperature),
-                "max_tokens": n_predict
+                "max_tokens": n_predict,
             }
+            if ctx_size is not None:
+                data["num_ctx"] = ctx_size
             url = f'{host_address}/api/chat'
         else:
             # Text-only generation using /api/generate endpoint
@@ -265,6 +268,7 @@ class OllamaBinding(LollmsLLMBinding):
                 }
         response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
         try:
+            ASCIIColors.debug("Listing ollama models")
             data = response.json()
             model_info = []

lollms_client/llm_bindings/openai/__init__.py CHANGED Viewed

@@ -62,6 +62,7 @@ class OpenAIBinding(LollmsLLMBinding):
                     repeat_last_n: int = 40,
                     seed: Optional[int] = None,
                     n_threads: int = 8,
+                    ctx_size: int | None = None,
                     streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
         """
         Generate text based on the provided prompt and parameters.

lollms_client/llm_bindings/transformers/__init__.py CHANGED Viewed

@@ -11,12 +11,12 @@ from ascii_colors import ASCIIColors
 import pipmaster as pm
 if not pm.is_installed("torch"):
     ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
-    pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
+    pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
 import torch
 if not torch.cuda.is_available():
     ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
-    pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
+    pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
     import torch
 if not pm.is_installed("transformers"):
@@ -26,6 +26,7 @@ BindingName = "TransformersBinding"
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig
 from packaging import version
+import transformers
 class TransformersBinding(LollmsLLMBinding):
     """Transformers-specific binding implementation"""
@@ -35,7 +36,8 @@ class TransformersBinding(LollmsLLMBinding):
                  model_name: str = "",
                  service_key: str = None,
                  verify_ssl_certificate: bool = True,
-                 default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
+                 default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
+                 prompt_template: Optional[str] = None):
         """
         Initialize the Transformers binding.
@@ -45,6 +47,7 @@ class TransformersBinding(LollmsLLMBinding):
             service_key (str): Authentication key for the service. Defaults to None.
             verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
             default_completion_format (ELF_COMPLETION_FORMAT): Default format for completions.
+            prompt_template (Optional[str]): Custom prompt template. If None, inferred from model.
         """
         super().__init__(
             host_address=host_address,
@@ -76,6 +79,9 @@ class TransformersBinding(LollmsLLMBinding):
         self.generation_config = GenerationConfig.from_pretrained(str(model_name))
+        # Infer or set prompt template
+        self.prompt_template = prompt_template if prompt_template else self._infer_prompt_template(model_name)
         # Display device information
         device = next(self.model.parameters()).device
         device_type = "CPU" if device.type == "cpu" else "GPU"
@@ -86,26 +92,47 @@ class TransformersBinding(LollmsLLMBinding):
             [ASCIIColors.color_green, ASCIIColors.color_blue if device_type == "GPU" else ASCIIColors.color_red]
         )
+    def _infer_prompt_template(self, model_name: str) -> str:
+        """
+        Infer the prompt template based on the model name.
+        Args:
+            model_name (str): Name of the model.
+        Returns:
+            str: The inferred prompt template format string.
+        """
+        model_name = model_name.lower()
+        if "llama-2" in model_name or "llama" in model_name:
+            return "[INST] <<SYS>> {system_prompt} <</SYS>> {user_prompt} [/INST]"
+        elif "gpt" in model_name:
+            return "{system_prompt}\n{user_prompt}"  # Simple concatenation for GPT-style models
+        else:
+            # Default to a basic chat format
+            ASCIIColors.yellow(f"Warning: No specific template found for {model_name}. Using default chat format.")
+            return "[INST] {system_prompt}\n{user_prompt} [/INST]"
     def generate_text(self,
-                    prompt: str,
-                    images: Optional[List[str]] = None,
-                    n_predict: Optional[int] = None,
-                    stream: bool = False,
-                    temperature: float = 0.1,
-                    top_k: int = 50,
-                    top_p: float = 0.95,
-                    repeat_penalty: float = 0.8,
-                    repeat_last_n: int = 40,
-                    seed: Optional[int] = None,
-                    n_threads: int = 8,
-                    streaming_callback: Optional[Callable[[str, str], None]] = None,
-                    return_legacy_cache: bool = False) -> Union[str, dict]:
+                      prompt: str,
+                      images: Optional[List[str]] = None,
+                      n_predict: Optional[int] = None,
+                      stream: bool = False,
+                      temperature: float = 0.1,
+                      top_k: int = 50,
+                      top_p: float = 0.95,
+                      repeat_penalty: float = 0.8,
+                      repeat_last_n: int = 40,
+                      seed: Optional[int] = None,
+                      n_threads: int = 8,
+                      ctx_size: int | None = None,
+                      streaming_callback: Optional[Callable[[str, str], None]] = None,
+                      return_legacy_cache: bool = False,
+                      system_prompt: str = "You are a helpful assistant.") -> Union[str, dict]:
         """
         Generate text using the Transformers model, with optional image support.
         Args:
-            prompt (str): The input prompt for text generation.
+            prompt (str): The input prompt for text generation (user prompt).
             images (Optional[List[str]]): List of image file paths for multimodal generation.
             n_predict (Optional[int]): Maximum number of tokens to generate.
             stream (bool): Whether to stream the output. Defaults to False.
@@ -118,6 +145,7 @@ class TransformersBinding(LollmsLLMBinding):
             n_threads (int): Number of threads to use. Defaults to 8.
             streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
             return_legacy_cache (bool): Whether to use legacy cache format (pre-v4.47). Defaults to False.
+            system_prompt (str): System prompt to set model behavior. Defaults to "You are a helpful assistant."
         Returns:
             Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
@@ -130,6 +158,12 @@ class TransformersBinding(LollmsLLMBinding):
             if seed is not None:
                 torch.manual_seed(seed)
+            # Apply the prompt template
+            formatted_prompt = self.prompt_template.format(
+                system_prompt=system_prompt,
+                user_prompt=prompt
+            )
             # Prepare generation config
             self.generation_config.max_new_tokens = n_predict if n_predict else 2048
             self.generation_config.temperature = temperature
@@ -139,14 +173,14 @@ class TransformersBinding(LollmsLLMBinding):
             self.generation_config.pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
             # Tokenize input with attention mask
-            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
+            inputs = self.tokenizer(formatted_prompt, return_tensors="pt", padding=True)
             input_ids = inputs.input_ids.to(self.model.device)
             attention_mask = inputs.attention_mask.to(self.model.device)
             # Handle image input if provided (basic implementation)
             if images and len(images) > 0:
                 ASCIIColors.yellow("Warning: Image processing not fully implemented in this binding")
-                prompt += "\n[Image content not processed]"
+                formatted_prompt += "\n[Image content not processed]"
             # Check transformers version for cache handling
             use_legacy_cache = return_legacy_cache or version.parse(transformers.__version__) < version.parse("4.47.0")
@@ -169,10 +203,8 @@ class TransformersBinding(LollmsLLMBinding):
                 ):
                     # Handle different output formats based on version/cache setting
                     if use_legacy_cache:
-                        # Legacy format: tuple of (sequences, scores, ...)
                         sequences = output[0]
                     else:
-                        # New format: Cache instance
                         sequences = output.sequences
                     # Decode the new tokens
@@ -212,70 +244,34 @@ class TransformersBinding(LollmsLLMBinding):
             error_msg = f"Error generating text: {str(e)}"
             ASCIIColors.red(error_msg)
             return {"status": "error", "error": error_msg}
     def tokenize(self, text: str) -> list:
-        """
-        Tokenize the input text into a list of characters.
-        Args:
-            text (str): The text to tokenize.
-        Returns:
-            list: List of individual characters.
-        """
+        """Tokenize the input text into a list of characters."""
         return list(text)
     def detokenize(self, tokens: list) -> str:
-        """
-        Convert a list of tokens back to text.
-        Args:
-            tokens (list): List of tokens (characters) to detokenize.
-        Returns:
-            str: Detokenized text.
-        """
+        """Convert a list of tokens back to text."""
         return "".join(tokens)
     def embed(self, text: str, **kwargs) -> list:
-        """
-        Get embeddings for the input text using Ollama API
-        Args:
-            text (str or List[str]): Input text to embed
-            **kwargs: Additional arguments like model, truncate, options, keep_alive
-        Returns:
-            dict: Response containing embeddings
-        """
-        pass
+        """Get embeddings for the input text (placeholder)."""
+        pass
     def get_model_info(self) -> dict:
-        """
-        Return information about the current Ollama model.
-        Returns:
-            dict: Dictionary containing model name, version, and host address.
-        """
+        """Return information about the current model."""
         return {
-            "name": "ollama",
-            "version": "2.0",
+            "name": "transformers",
+            "version": transformers.__version__,
             "host_address": self.host_address,
             "model_name": self.model_name
         }
     def listModels(self):
-        """ Lists available models """
+        """Lists available models (placeholder)."""
         pass
     def load_model(self, model_name: str) -> bool:
-        """
-        Load a specific model into the Ollama binding.
-        Args:
-            model_name (str): Name of the model to load.
-        Returns:
-            bool: True if model loaded successfully.
-        """
+        """Load a specific model into the binding."""
         self.model = model_name
         self.model_name = model_name
-        return True
+        return True

lollms-client 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

lollms-client 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl