PyPI - lollms-client - Versions diffs - 0.16.0__tar.gz → 0.17.1__tar.gz - Mend

lollms-client 0.16.0tar.gz → 0.17.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lollms-client might be problematic. Click here for more details.

Files changed (70) hide show

{lollms_client-0.16.0 → lollms_client-0.17.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lollms_client
-Version: 0.16.0
+Version: 0.17.1
 Summary: A client library for LoLLMs generate endpoint
 Author-email: ParisNeo <parisneoai@gmail.com>
 License: Apache Software License

{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/simple_text_gen_with_image_test.py RENAMED Viewed

@@ -10,14 +10,14 @@ from ascii_colors import ASCIIColors, trace_exception
 # MODEL_NAME = None # Server will use its default or last loaded model
 # Option 2: Ollama binding
-# BINDING_NAME = "ollama"
-# HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
-# MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
-# Option 2: llamacpp binding
-BINDING_NAME = "llamacpp"
-MODELS_PATH = r"E:\drumber" # Change to your own models folder
-MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
+BINDING_NAME = "ollama"
+HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
+MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
+# Option 3: llamacpp binding
+# BINDING_NAME = "llamacpp"
+# MODELS_PATH = r"E:\drumber" # Change to your own models folder
+# MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
 # You can also add a clip_model_path parameter to your lc_params
 img = "E:\\drumber\\1711741182996.jpg"
 # Option 3: OpenAI binding (requires OPENAI_API_KEY environment variable or service_key)

{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_gen.py RENAMED Viewed

@@ -15,7 +15,7 @@ lc = LollmsClient("llamacpp", models_path=r"E:\drumber", model_name="llava-v1.6-
 def cb(chunk, type):
     print(chunk,end="",flush=True)
-response = lc.generate_text(prompt="One plus one equals ", stream=False, temperature=0.5, streaming_callback=cb)
+response = lc.generate_text(prompt="!@>user: Hi there\n!@>assistant: Hi there, how can I help you?!@>user: what is 1+1?\n!@>assistant: ", stream=False, temperature=0.5, streaming_callback=cb, split=True)
 print()
 print(response)
 print()

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/__init__.py RENAMED Viewed

@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
 from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
 from lollms_client.lollms_functions import FunctionCalling_Library
-__version__ = "0.16.0"
+__version__ = "0.17.1"
 # Optionally, you could define __all__ if you want to be explicit about exports
 __all__ = [

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/llamacpp/__init__.py RENAMED Viewed

@@ -475,7 +475,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
                                    temperature: float = 0.7, top_k: int = 40, top_p: float = 0.9,
                                    repeat_penalty: float = 1.1, repeat_last_n: Optional[int] = 64,
                                    seed: Optional[int] = None, stream: bool = False, use_chat_format: bool = True,
-                                   images: Optional[List[str]] = None, **extra_params) -> Dict:
+                                   images: Optional[List[str]] = None,
+                                    split:Optional[bool]=False, # put to true if the prompt is a discussion
+                                    user_keyword:Optional[str]="!@>user:",
+                                    ai_keyword:Optional[str]="!@>assistant:",
+                                   **extra_params) -> Dict:
         payload_params = {
             "temperature": self.server_args.get("temperature", 0.7), "top_k": self.server_args.get("top_k", 40),
             "top_p": self.server_args.get("top_p", 0.9), "repeat_penalty": self.server_args.get("repeat_penalty", 1.1),
@@ -495,6 +500,10 @@ class LlamaCppServerBinding(LollmsLLMBinding):
             messages = []
             if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt})
             user_content: Union[str, List[Dict[str, Any]]] = prompt
+            if split:
+                messages += self.split_discussion(user_content,user_keyword=user_keyword, ai_keyword=ai_keyword)
+            else:
+                messages.append({"role": "user", "content": user_content})
             if images and self.clip_model_path: # Use the binding's current clip_model_path
                 image_parts = []
                 for img_path in images:
@@ -503,8 +512,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
                         image_type = Path(img_path).suffix[1:].lower() or "png"; image_type = "jpeg" if image_type == "jpg" else image_type
                         image_parts.append({"type": "image_url", "image_url": {"url": f"data:image/{image_type};base64,{encoded_string}"}})
                     except Exception as ex: trace_exception(ex)
-                user_content = [{"type": "text", "text": prompt}] + image_parts # type: ignore
-            messages.append({"role": "user", "content": user_content})
+                messages[-1]["content"] =[{"type": "text", "text": messages[-1]["content"]}] +  image_parts # type: ignore
             final_payload = {"messages": messages, "stream": stream, **payload_params}
             if 'n_predict' in final_payload: final_payload['max_tokens'] = final_payload.pop('n_predict')
             return final_payload
@@ -521,16 +529,57 @@ class LlamaCppServerBinding(LollmsLLMBinding):
                 if image_data_list: final_payload["image_data"] = image_data_list
             return final_payload
-    def generate_text(self, prompt: str, images: Optional[List[str]] = None, system_prompt: str = "",
-                     n_predict: Optional[int] = None, stream: bool = False, temperature: float = None,
-                     top_k: int = None, top_p: float = None, repeat_penalty: float = None,
-                     repeat_last_n: Optional[int] = None, seed: Optional[int] = None,
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None,
-                     use_chat_format_override: Optional[bool] = None, **generation_kwargs) -> Union[str, Dict[str, any]]:
+    def generate_text(self,
+                     prompt: str,
+                     images: Optional[List[str]] = None,
+                     system_prompt: str = "",
+                     n_predict: Optional[int] = None,
+                     stream: Optional[bool] = None,
+                     temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
+                     top_k: int = 40,          # Ollama default is 40
+                     top_p: float = 0.9,       # Ollama default is 0.9
+                     repeat_penalty: float = 1.1, # Ollama default is 1.1
+                     repeat_last_n: int = 64,  # Ollama default is 64
+                     seed: Optional[int] = None,
+                     n_threads: Optional[int] = None,
+                     ctx_size: int | None = None,
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     **generation_kwargs
+                     ) -> Union[str, dict]:
+        """
+        Generate text using the active LLM binding, using instance defaults if parameters are not provided.
+        Args:
+            prompt (str): The input prompt for text generation.
+            images (Optional[List[str]]): List of image file paths for multimodal generation.
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (int | None): Context size override for this generation.
+            streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
+                - First parameter (str): The chunk of text received.
+                - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
+        Returns:
+            Union[str, dict]: Generated text or error dictionary if failed.
+        """
         if not self.server_process or not self.server_process.is_healthy:
              return {"status": False, "error": "Llama.cpp server is not running or not healthy."}
-        _use_chat_format = use_chat_format_override if use_chat_format_override is not None else (self.default_completion_format == ELF_COMPLETION_FORMAT.Chat)
+        _use_chat_format = True
         payload = self._prepare_generation_payload(
             prompt=prompt, system_prompt=system_prompt, n_predict=n_predict,
             temperature=temperature if temperature is not None else self.server_args.get("temperature",0.7),
@@ -539,7 +588,8 @@ class LlamaCppServerBinding(LollmsLLMBinding):
             repeat_penalty=repeat_penalty if repeat_penalty is not None else self.server_args.get("repeat_penalty",1.1),
             repeat_last_n=repeat_last_n if repeat_last_n is not None else self.server_args.get("repeat_last_n",64),
             seed=seed if seed is not None else self.server_args.get("seed", -1), stream=stream,
-            use_chat_format=_use_chat_format, images=images, **generation_kwargs
+            use_chat_format=_use_chat_format, images=images,
+            split= split, user_keyword=user_keyword, ai_keyword=ai_keyword, **generation_kwargs
         )
         endpoint = "/v1/chat/completions" if _use_chat_format else "/completion"
         request_url = self._get_request_url(endpoint)

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/lollms/__init__.py RENAMED Viewed

@@ -46,43 +46,50 @@ class LollmsLLMBinding(LollmsLLMBinding):
         self.personality = personality
         self.model = None
-    def generate_text(self,
+    def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
                      system_prompt: str = "",
                      n_predict: Optional[int] = None,
-                     stream: bool = False,
-                     temperature: float = 0.1,
-                     top_k: int = 50,
-                     top_p: float = 0.95,
-                     repeat_penalty: float = 0.8,
-                     repeat_last_n: int = 40,
+                     stream: Optional[bool] = None,
+                     temperature: Optional[float] = None,
+                     top_k: Optional[int] = None,
+                     top_p: Optional[float] = None,
+                     repeat_penalty: Optional[float] = None,
+                     repeat_last_n: Optional[int] = None,
                      seed: Optional[int] = None,
-                     n_threads: int = 8,
+                     n_threads: Optional[int] = None,
                      ctx_size: int | None = None,
-                     streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     ) -> Union[str, dict]:
         """
-        Generate text using the LOLLMS service, with optional image support.
+        Generate text using the active LLM binding, using instance defaults if parameters are not provided.
         Args:
             prompt (str): The input prompt for text generation.
             images (Optional[List[str]]): List of image file paths for multimodal generation.
-                If provided, uses the /lollms_generate_with_images endpoint.
-            n_predict (Optional[int]): Maximum number of tokens to generate.
-            stream (bool): Whether to stream the output. Defaults to False.
-            temperature (float): Sampling temperature. Defaults to 0.1.
-            top_k (int): Top-k sampling parameter. Defaults to 50.
-            top_p (float): Top-p sampling parameter. Defaults to 0.95.
-            repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
-            repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
-            seed (Optional[int]): Random seed for generation.
-            n_threads (int): Number of threads to use. Defaults to 8.
-            streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
-                - First parameter (str): The chunk of text received from the stream.
-                - Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (int | None): Context size override for this generation.
+            streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
+                - First parameter (str): The chunk of text received.
+                - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
         Returns:
-            Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
+            Union[str, dict]: Generated text or error dictionary if failed.
         """
         # Determine endpoint based on presence of images
         endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/ollama/__init__.py RENAMED Viewed

@@ -109,47 +109,53 @@ class OllamaBinding(LollmsLLMBinding):
             self.ollama_client = None # Ensure it's None if initialization fails
             # Optionally re-raise or handle so the binding is clearly unusable
             raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
-    def generate_text(self,
+    def generate_text(self,
                      prompt: str,
-                     images: Optional[List[str]] = None, # List of image file paths
+                     images: Optional[List[str]] = None,
                      system_prompt: str = "",
                      n_predict: Optional[int] = None,
-                     stream: bool = False,
+                     stream: Optional[bool] = None,
                      temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
                      top_k: int = 40,          # Ollama default is 40
                      top_p: float = 0.9,       # Ollama default is 0.9
                      repeat_penalty: float = 1.1, # Ollama default is 1.1
                      repeat_last_n: int = 64,  # Ollama default is 64
                      seed: Optional[int] = None,
-                     n_threads: Optional[int] = None, # Ollama calls this num_thread
-                     ctx_size: Optional[int] = None,  # Ollama calls this num_ctx
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None
-                     ) -> Union[str, Dict[str, any]]:
+                     n_threads: Optional[int] = None,
+                     ctx_size: int | None = None,
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     ) -> Union[str, dict]:
         """
-        Generate text using the Ollama service, with optional image support.
+        Generate text using the active LLM binding, using instance defaults if parameters are not provided.
         Args:
             prompt (str): The input prompt for text generation.
             images (Optional[List[str]]): List of image file paths for multimodal generation.
-            n_predict (Optional[int]): Maximum number of tokens to generate (num_predict).
-            stream (bool): Whether to stream the output. Defaults to False.
-            temperature (float): Sampling temperature.
-            top_k (int): Top-k sampling parameter.
-            top_p (float): Top-p sampling parameter.
-            repeat_penalty (float): Penalty for repeated tokens.
-            repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
-            seed (Optional[int]): Random seed for generation.
-            n_threads (Optional[int]): Number of threads to use (num_thread).
-            ctx_size (Optional[int]): Context window size (num_ctx).
-            streaming_callback (Optional[Callable[[str, int], bool]]): Callback for streaming output.
-                - First parameter (str): The chunk of text received from the stream.
-                - Second parameter (int): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
-                Return False to stop streaming.
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (int | None): Context size override for this generation.
+            streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
+                - First parameter (str): The chunk of text received.
+                - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
         Returns:
-            Union[str, Dict[str, any]]: Generated text if successful, or a dictionary with status and error if failed.
+            Union[str, dict]: Generated text or error dictionary if failed.
         """
         if not self.ollama_client:
              return {"status": False, "error": "Ollama client not initialized."}
@@ -175,8 +181,15 @@ class OllamaBinding(LollmsLLMBinding):
                     # If images were base64 strings, they would need decoding to bytes first.
                     processed_images.append(img_path)
-                messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
+                messages = [
+                            {'role': 'system', 'content':system_prompt},
+                        ]
+                if split:
+                    messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
+                    if processed_images:
+                        messages[-1]["images"]=processed_images
+                else:
+                    messages.append({'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None})
                 if stream:
                     response_stream = self.ollama_client.chat(
                         model=self.model_name,
@@ -201,7 +214,14 @@ class OllamaBinding(LollmsLLMBinding):
                     )
                     return response_dict.get('message', {}).get('content', '')
             else: # Text-only
-                messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt}]
+                messages = [
+                            {'role': 'system', 'content':system_prompt},
+                        ]
+                if split:
+                    messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
+                else:
+                    messages.append({'role': 'user', 'content': prompt})
                 if stream:
                     response_stream = self.ollama_client.chat(
                         model=self.model_name,

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openai/__init__.py RENAMED Viewed

@@ -55,42 +55,50 @@ class OpenAIBinding(LollmsLLMBinding):
         self.completion_format = ELF_COMPLETION_FORMAT.Chat
-    def generate_text(self,
-                    prompt: str,
-                    images: Optional[List[str]] = None,
-                    system_prompt: str = "",
-                    n_predict: Optional[int] = None,
-                    stream: bool = False,
-                    temperature: float = 0.1,
-                    top_k: int = 50,
-                    top_p: float = 0.95,
-                    repeat_penalty: float = 0.8,
-                    repeat_last_n: int = 40,
-                    seed: Optional[int] = None,
-                    n_threads: int = 8,
-                    ctx_size: int | None = None,
-                    streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
+    def generate_text(self,
+                     prompt: str,
+                     images: Optional[List[str]] = None,
+                     system_prompt: str = "",
+                     n_predict: Optional[int] = None,
+                     stream: Optional[bool] = None,
+                     temperature: float = 0.7,
+                     top_k: int = 40,
+                     top_p: float = 0.9,
+                     repeat_penalty: float = 1.1,
+                     repeat_last_n: int = 64,
+                     seed: Optional[int] = None,
+                     n_threads: Optional[int] = None,
+                     ctx_size: int | None = None,
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     ) -> Union[str, dict]:
         """
-        Generate text based on the provided prompt and parameters.
+        Generate text using the active LLM binding, using instance defaults if parameters are not provided.
         Args:
             prompt (str): The input prompt for text generation.
             images (Optional[List[str]]): List of image file paths for multimodal generation.
-            n_predict (Optional[int]): Maximum number of tokens to generate.
-            stream (bool): Whether to stream the output. Defaults to False.
-            temperature (float): Sampling temperature. Defaults to 0.1.
-            top_k (int): Top-k sampling parameter. Defaults to 50.
-            top_p (float): Top-p sampling parameter. Defaults to 0.95.
-            repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
-            repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
-            seed (Optional[int]): Random seed for generation.
-            n_threads (int): Number of threads to use. Defaults to 8.
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (int | None): Context size override for this generation.
             streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
                 - First parameter (str): The chunk of text received.
                 - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
         Returns:
-            str: Generated text or error dictionary if failed.
+            Union[str, dict]: Generated text or error dictionary if failed.
         """
         count = 0
         output = ""
@@ -101,16 +109,17 @@ class OpenAIBinding(LollmsLLMBinding):
                 {
                     "role": "system",
                     "content": system_prompt,
-                },
-                {
-                    "role": "user",
-                    "content": [
+                }
+            ]
+            if split:
+                messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
+                if images:
+                    messages[-1]["content"] = [
                         {
                             "type": "text",
-                            "text": prompt
+                            "text": messages[-1]["content"]
                         }
-                    ] + [
+                    ]+[
                         {
                             "type": "image_url",
                             "image_url": {
@@ -119,8 +128,26 @@ class OpenAIBinding(LollmsLLMBinding):
                         }
                         for image_path in images
                     ]
-                }
-            ]
+            else:
+                messages.append({
+                        'role': 'user',
+                        'content': [
+                                        {
+                                            "type": "text",
+                                            "text": prompt
+                                        }
+                                    ] + [
+                                        {
+                                            "type": "image_url",
+                                            "image_url": {
+                                                "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
+                                            }
+                                        }
+                                        for image_path in images
+                                    ]
+                    }
+                )
         else:
             messages = [{"role": "user", "content": prompt}]

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openllm/__init__.py RENAMED Viewed

@@ -154,7 +154,10 @@ class OpenLLMBinding(LollmsLLMBinding):
                      seed: Optional[int] = None,
                      # n_threads: Optional[int] = None, # Server-side config for OpenLLM
                      # ctx_size: Optional[int] = None,  # Server-side config, though some models might allow via llm_config
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None
+                     streaming_callback: Optional[Callable[[str, int], bool]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
                      ) -> Union[str, Dict[str, any]]:
         if not self.openllm_client:

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/pythonllamacpp/__init__.py RENAMED Viewed

@@ -216,6 +216,9 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
                      streaming_callback: Optional[Callable[[str, int], bool]] = None,
                      use_chat_format: bool = True,
                      grammar: Optional[Union[str, LlamaGrammar]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
                      **generation_kwargs
                      ) -> Union[str, Dict[str, any]]:

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/tensor_rt/__init__.py RENAMED Viewed

@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
                      repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
                      seed: Optional[int] = None,
                      n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None
+                     streaming_callback: Optional[Callable[[str, int], bool]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
                      ) -> Union[str, Dict[str, any]]:
         if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/transformers/__init__.py RENAMED Viewed

@@ -312,6 +312,9 @@ class HuggingFaceHubBinding(LollmsLLMBinding):
                      seed: Optional[int] = None,
                      stop_words: Optional[List[str]] = None, # Added custom stop_words
                      streaming_callback: Optional[Callable[[str, int], bool]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
                      use_chat_format_override: Optional[bool] = None,
                      **generation_kwargs
                      ) -> Union[str, Dict[str, Any]]:

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/vllm/__init__.py RENAMED Viewed

@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
                      repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
                      seed: Optional[int] = None,
                      n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
-                     streaming_callback: Optional[Callable[[str, int], bool]] = None
+                     streaming_callback: Optional[Callable[[str, int], bool]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
                      ) -> Union[str, Dict[str, any]]:
         if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_core.py RENAMED Viewed

@@ -11,7 +11,7 @@ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingM
 from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
 from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
-import json
+import re
 from enum import Enum
 import base64
 import requests
@@ -61,11 +61,12 @@ class LollmsClient():
                  ctx_size: Optional[int] = 8192,
                  n_predict: Optional[int] = 4096,
                  stream: bool = False,
-                 temperature: float = 0.1,
-                 top_k: int = 50,
-                 top_p: float = 0.95,
-                 repeat_penalty: float = 0.8,
-                 repeat_last_n: int = 40,
+                 temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
+                 top_k: int = 40,          # Ollama default is 40
+                 top_p: float = 0.9,       # Ollama default is 0.9
+                 repeat_penalty: float = 1.1, # Ollama default is 1.1
+                 repeat_last_n: int = 64,  # Ollama default is 64
                  seed: Optional[int] = None,
                  n_threads: int = 8,
                  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
@@ -362,7 +363,11 @@ class LollmsClient():
                      seed: Optional[int] = None,
                      n_threads: Optional[int] = None,
                      ctx_size: int | None = None,
-                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     ) -> Union[str, dict]:
         """
         Generate text using the active LLM binding, using instance defaults if parameters are not provided.
@@ -380,6 +385,9 @@ class LollmsClient():
             n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
             ctx_size (int | None): Context size override for this generation.
             streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
         Returns:
             Union[str, dict]: Generated text or error dictionary if failed.
@@ -399,7 +407,10 @@ class LollmsClient():
                 seed=seed if seed is not None else self.default_seed,
                 n_threads=n_threads if n_threads is not None else self.default_n_threads,
                 ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
-                streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
+                streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
+                split= split,
+                user_keyword=user_keyword,
+                ai_keyword=ai_keyword
             )
         raise RuntimeError("LLM binding not initialized.")
@@ -981,7 +992,6 @@ Do not split the code in multiple tags.
         Ranks answers for a question from best to worst using LLM JSON generation.
         (Implementation requires self.generate_code which uses self.generate_text)
         """
-        # ... (Implementation as provided before, relies on self.generate_code) ...
         if not callback:
             callback = self.sink
@@ -1567,6 +1577,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
         callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
         return final_output
 def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
     """
     Chunks text based on token count.
@@ -1646,3 +1657,5 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
                 break
     return chunks

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_llm_binding.py RENAMED Viewed

@@ -2,13 +2,14 @@
 from abc import ABC, abstractmethod
 import importlib
 from pathlib import Path
-from typing import Optional, Callable, List
+from typing import Optional, Callable, List, Union
 from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
 import importlib
 from pathlib import Path
 from typing import Optional
 from ascii_colors import trace_exception
+from lollms_client.lollms_types import MSG_TYPE
+import re
 class LollmsLLMBinding(ABC):
     """Abstract base class for all LOLLMS LLM bindings"""
@@ -25,41 +26,50 @@ class LollmsLLMBinding(ABC):
         self.model_name = None #Must be set by the instance
     @abstractmethod
-    def generate_text(self,
+    def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
                      system_prompt: str = "",
                      n_predict: Optional[int] = None,
-                     stream: bool = False,
-                     temperature: float = 0.1,
-                     top_k: int = 50,
-                     top_p: float = 0.95,
-                     repeat_penalty: float = 0.8,
-                     repeat_last_n: int = 40,
+                     stream: Optional[bool] = None,
+                     temperature: Optional[float] = None,
+                     top_k: Optional[int] = None,
+                     top_p: Optional[float] = None,
+                     repeat_penalty: Optional[float] = None,
+                     repeat_last_n: Optional[int] = None,
                      seed: Optional[int] = None,
-                     n_threads: int = 8,
-                     streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
+                     n_threads: Optional[int] = None,
+                     ctx_size: int | None = None,
+                     streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                     split:Optional[bool]=False, # put to true if the prompt is a discussion
+                     user_keyword:Optional[str]="!@>user:",
+                     ai_keyword:Optional[str]="!@>assistant:",
+                     ) -> Union[str, dict]:
         """
-        Generate text based on the provided prompt and parameters.
+        Generate text using the active LLM binding, using instance defaults if parameters are not provided.
         Args:
             prompt (str): The input prompt for text generation.
             images (Optional[List[str]]): List of image file paths for multimodal generation.
-            n_predict (Optional[int]): Maximum number of tokens to generate.
-            stream (bool): Whether to stream the output. Defaults to False.
-            temperature (float): Sampling temperature. Defaults to 0.1.
-            top_k (int): Top-k sampling parameter. Defaults to 50.
-            top_p (float): Top-p sampling parameter. Defaults to 0.95.
-            repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
-            repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
-            seed (Optional[int]): Random seed for generation.
-            n_threads (int): Number of threads to use. Defaults to 8.
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (int | None): Context size override for this generation.
             streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
                 - First parameter (str): The chunk of text received.
                 - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
+            split:Optional[bool]: put to true if the prompt is a discussion
+            user_keyword:Optional[str]: when splitting we use this to extract user prompt
+            ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
         Returns:
-            str: Generated text or error dictionary if failed.
+            Union[str, dict]: Generated text or error dictionary if failed.
         """
         pass
@@ -146,6 +156,52 @@ class LollmsLLMBinding(ABC):
         pass
+    def split_discussion(self, lollms_prompt_string: str, system_keyword="!@>system:", user_keyword="!@>user:", ai_keyword="!@>assistant:") -> list:
+        """
+        Splits a LoLLMs prompt into a list of OpenAI-style messages.
+        If the very first chunk has no prefix, it's assigned to "system".
+        """
+        # Regex to split on any of the three prefixes (lookahead)
+        pattern = r"(?={}|{}|{})".format(
+            re.escape(system_keyword),
+            re.escape(user_keyword),
+            re.escape(ai_keyword)
+        )
+        parts = re.split(pattern, lollms_prompt_string)
+        messages = []
+        for part in parts:
+            part = part.strip()
+            if not part:
+                continue
+            # Determine role and strip prefix if present
+            if part.startswith(system_keyword):
+                role = "system"
+                content = part[len(system_keyword):].strip()
+            elif part.startswith(user_keyword):
+                role = "user"
+                content = part[len(user_keyword):].strip()
+            elif part.startswith(ai_keyword):
+                role = "assistant"
+                content = part[len(ai_keyword):].strip()
+            else:
+                # No prefix: if it's the first valid chunk, treat as system
+                if not messages:
+                    role = "system"
+                    content = part
+                else:
+                    # otherwise skip unrecognized segments
+                    continue
+            messages.append({"role": role, "content": content})
+            if messages[-1]["content"]=="":
+                del messages[-1]
+        return messages
 class LollmsLLMBindingManager:
     """Manages binding discovery and instantiation"""

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_utilities.py RENAMED Viewed

@@ -1,10 +1,12 @@
 import urllib
 import numpy
 from pathlib import Path
-from pipmaster import PackageManager
+import pipmaster as pm
 from PIL import Image
 import io
 import base64
+import re
+import numpy as np
 class PromptReshaper:
     def __init__(self, template:str):
         self.template = template
@@ -122,8 +124,8 @@ def remove_text_from_string(string: str, text_to_find:str):
 def process_ai_output(output, images, output_folder):
-    if not PackageManager.is_installed("cv2"):
-        PackageManager.install("opencv-python")
+    if not pm.is_installed("opencv-python"):
+        pm.install("opencv-python")
     import cv2
     images = [cv2.imread(str(img)) for img in images]
     # Find all bounding box entries in the output

{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lollms_client
-Version: 0.16.0
+Version: 0.17.1
 Summary: A client library for LoLLMs generate endpoint
 Author-email: ParisNeo <parisneoai@gmail.com>
 License: Apache Software License