PyPI - lollms-client - Versions diffs - 0.13.1__tar.gz → 0.14.0__tar.gz - Mend

lollms-client 0.13.1tar.gz → 0.14.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lollms-client might be problematic. Click here for more details.

Files changed (58) hide show

{lollms_client-0.13.1 → lollms_client-0.14.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lollms_client
-Version: 0.13.1
+Version: 0.14.0
 Summary: A client library for LoLLMs generate endpoint
 Author-email: ParisNeo <parisneoai@gmail.com>
 License: Apache Software License

{lollms_client-0.13.1 → lollms_client-0.14.0}/examples/simple_text_gen_test.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from lollms_client import LollmsClient, ELF_COMPLETION_FORMAT
+from lollms_client import LollmsClient
 from lollms_client.lollms_types import MSG_TYPE # For callback signature
 from ascii_colors import ASCIIColors, trace_exception

{lollms_client-0.13.1 → lollms_client-0.14.0}/examples/text_gen.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from lollms_client import LollmsClient
 # Initialize the LollmsClient instance
-lc = LollmsClient("http://localhost:9600")
+lc = LollmsClient("lollms")
 # Generate Text
 # response = lc.generate_text(prompt="Once upon a time", stream=False, temperature=0.5)
 # print(response)

lollms_client-0.14.0/examples/text_gen_system_prompt.py ADDED Viewed

@@ -0,0 +1,28 @@
+from lollms_client import LollmsClient
+# Initialize the LollmsClient instance
+lc = LollmsClient("ollama",model_name="mistral-nemo:latest")
+# Generate Text
+# response = lc.generate_text(prompt="Once upon a time", stream=False, temperature=0.5)
+# print(response)
+# # Generate Completion
+# response = lc.generate_completion(prompt="What is the capital of France", stream=False, temperature=0.5)
+# print(response)
+def cb(chunk, type):
+    print(chunk,end="",flush=True)
+response = lc.generate_text(prompt="One plus one equals ", system_prompt="You are a playful dude who never really answers questions correctly. always answer with quirky style.", stream=False, temperature=0.5, streaming_callback=cb)
+print()
+print(response)
+print()
+# List Mounted Personalities
+response = lc.listMountedPersonalities()
+print(response)
+# List Models
+response = lc.listModels()
+print(response)

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/__init__.py RENAMED Viewed

@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
 from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
 from lollms_client.lollms_functions import FunctionCalling_Library
-__version__ = "0.13.1"
+__version__ = "0.14.0"
 # Optionally, you could define __all__ if you want to be explicit about exports
 __all__ = [

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/lollms/__init__.py RENAMED Viewed

@@ -49,6 +49,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
+                     system_prompt: str = "",
                      n_predict: Optional[int] = None,
                      stream: bool = False,
                      temperature: float = 0.1,
@@ -106,7 +107,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
         # Prepare request data
         data = {
-            "prompt": prompt,
+            "prompt":"!@>system: "+system_prompt+"\n"+"!@>user: "+prompt if system_prompt else prompt,
             "model_name": self.model_name,
             "personality": self.personality,
             "n_predict": n_predict,

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/ollama/__init__.py RENAMED Viewed

@@ -10,10 +10,11 @@ from typing import Optional, Callable, List, Union, Dict
 from ascii_colors import ASCIIColors, trace_exception
 import pipmaster as pm
-pm.ensure_packages(["ollama","pillow"])
+pm.ensure_packages(["ollama","pillow","tiktoken"])
 import ollama
+import tiktoken
 BindingName = "OllamaBinding"
@@ -112,9 +113,9 @@ class OllamaBinding(LollmsLLMBinding):
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None, # List of image file paths
+                     system_prompt: str = "",
                      n_predict: Optional[int] = None,
                      stream: bool = False,
-                     system_prompt = '',
                      temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
                      top_k: int = 40,          # Ollama default is 40
                      top_p: float = 0.9,       # Ollama default is 0.9
@@ -200,15 +201,16 @@ class OllamaBinding(LollmsLLMBinding):
                     )
                     return response_dict.get('message', {}).get('content', '')
             else: # Text-only
+                messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt}]
                 if stream:
-                    response_stream = self.ollama_client.generate(
+                    response_stream = self.ollama_client.chat(
                         model=self.model_name,
-                        prompt=prompt,
+                        messages=messages,
                         stream=True,
                         options=options if options else None
                     )
                     for chunk_dict in response_stream:
-                        chunk_content = chunk_dict.get('response', '')
+                        chunk_content = chunk_dict.message.content
                         if chunk_content:
                             full_response_text += chunk_content
                             if streaming_callback:
@@ -216,13 +218,13 @@ class OllamaBinding(LollmsLLMBinding):
                                     break
                     return full_response_text
                 else: # Not streaming
-                    response_dict = self.ollama_client.generate(
+                    response_dict = self.ollama_client.chat(
                         model=self.model_name,
-                        prompt=prompt,
+                        messages=messages,
                         stream=False,
                         options=options if options else None
                     )
-                    return response_dict.get('response', '')
+                    return response_dict.message.content
         except ollama.ResponseError as e:
             error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
             ASCIIColors.error(error_message)
@@ -236,53 +238,31 @@ class OllamaBinding(LollmsLLMBinding):
             trace_exception(ex)
             return {"status": False, "error": error_message}
-    def tokenize(self, text: str) -> List[Union[int, str]]:
+    def tokenize(self, text: str) -> list:
         """
-        Tokenize the input text. For Ollama, this is complex as tokenization is model-specific
-        and best done by the server. This method provides a basic character-level tokenization
-        as a fallback or placeholder, or one could attempt to call /api/tokenize if desired.
-        The `count_tokens` method is more accurate for Ollama.
+        Tokenize the input text into a list of characters.
         Args:
             text (str): The text to tokenize.
         Returns:
-            list: List of tokens (characters or token IDs if /api/tokenize is used).
+            list: List of individual characters.
         """
-        # Basic character-level tokenization
-        # return list(text)
-        # For actual token IDs (slower, makes a network request):
-        api_url = f"{self.host_address.rstrip('/')}/api/tokenize"
-        payload = {"model": self.model_name, "prompt": text}
-        try:
-            response = requests.post(api_url, json=payload, timeout=10, verify=self.verify_ssl_certificate, headers=self.ollama_client_headers)
-            response.raise_for_status()
-            return response.json().get("tokens", [])
-        except Exception as e:
-            ASCIIColors.warning(f"Failed to tokenize text with Ollama server, falling back to char tokens: {e}")
-            return list(text)
-    def detokenize(self, tokens: List[Union[int,str]]) -> str:
+        ## Since ollama has no endpoints to tokenize the text, we use tiktoken to have a rough estimate
+        return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
+    def detokenize(self, tokens: list) -> str:
         """
-        Convert a list of tokens back to text. If tokens are characters, joins them.
-        If tokens are IDs, this is non-trivial without the model's tokenizer.
+        Convert a list of tokens back to text.
         Args:
-            tokens (list): List of tokens to detokenize.
+            tokens (list): List of tokens (characters) to detokenize.
         Returns:
             str: Detokenized text.
         """
-        if not tokens:
-            return ""
-        if isinstance(tokens[0], str): # Assuming character tokens
-            return "".join(tokens)
-        else:
-            # Detokenizing IDs from Ollama is not straightforward client-side without specific tokenizer.
-            # This is a placeholder. For Ollama, detokenization usually happens server-side.
-            ASCIIColors.warning("Detokenizing integer tokens is not accurately supported by this Ollama client binding. Returning joined string of token IDs.")
-            return "".join(map(str, tokens))
+        ## Since ollama has no endpoints to tokenize the text, we use tiktoken to have a rough estimate
+        return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
     def count_tokens(self, text: str) -> int:
         """
@@ -297,8 +277,8 @@ class OllamaBinding(LollmsLLMBinding):
         if not self.model_name:
             ASCIIColors.warning("Cannot count tokens, model_name is not set.")
             return -1
-        return count_tokens_ollama(text, self.model_name, self.ollama_client)
+        #return count_tokens_ollama(text, self.model_name, self.ollama_client)
+        return len(self.tokenize(text))
     def embed(self, text: str, **kwargs) -> List[float]:
         """
         Get embeddings for the input text using Ollama API.

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/openai/__init__.py RENAMED Viewed

@@ -58,6 +58,7 @@ class OpenAIBinding(LollmsLLMBinding):
     def generate_text(self,
                     prompt: str,
                     images: Optional[List[str]] = None,
+                    system_prompt: str = "",
                     n_predict: Optional[int] = None,
                     stream: bool = False,
                     temperature: float = 0.1,
@@ -98,6 +99,11 @@ class OpenAIBinding(LollmsLLMBinding):
         if images:
             messages = [
                 {
+                    "role": "system",
+                    "content": system_prompt,
+                },
+                {
                     "role": "user",
                     "content": [
                         {

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/tensor_rt/__init__.py RENAMED Viewed

@@ -331,6 +331,7 @@ class VLLMBinding(LollmsLLMBinding):
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
+                     system_prompt: str = "",
                      n_predict: Optional[int] = 1024,
                      stream: bool = False, # vLLM's generate is blocking, stream is pseudo
                      temperature: float = 0.7,
@@ -381,7 +382,7 @@ class VLLMBinding(LollmsLLMBinding):
                 # If providing multi_modal_data, usually prompt_token_ids are also needed.
                 # This can get complex as it depends on how the model expects images to be interleaved.
                 # For a simple case where image comes first:
-                encoded_prompt_ids = self.tokenizer.encode(prompt)
+                encoded_prompt_ids = self.tokenizer.encode(system_prompt+"\n"+prompt if system_prompt else prompt)
                 gen_kwargs["prompt_token_ids"] = [encoded_prompt_ids] # List of lists
                 gen_kwargs["multi_modal_data"] = [{"image": mm_data_content}] # List of dicts
                 gen_kwargs["prompts"] = None # Don't use prompts if prompt_token_ids is used
@@ -389,7 +390,7 @@ class VLLMBinding(LollmsLLMBinding):
             except Exception as e_mm:
                 return {"status": False, "error": f"Multimodal prep error: {e_mm}"}
         else:
-            gen_kwargs["prompts"] = [prompt]
+            gen_kwargs["prompts"] = [system_prompt+"\n"+prompt if system_prompt else prompt]
         try:
             outputs = self.llm_engine.generate(**gen_kwargs, sampling_params=sampling_params)

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/transformers/__init__.py RENAMED Viewed

@@ -112,6 +112,7 @@ class TransformersBinding(LollmsLLMBinding):
     def generate_text(self,
                       prompt: str,
                       images: Optional[List[str]] = None,
+                      system_prompt: str = "",
                       n_predict: Optional[int] = None,
                       stream: bool = False,
                       temperature: float = 0.1,
@@ -123,8 +124,7 @@ class TransformersBinding(LollmsLLMBinding):
                       n_threads: int = 8,
                       ctx_size: int | None = None,
                       streaming_callback: Optional[Callable[[str, str], None]] = None,
-                      return_legacy_cache: bool = False,
-                      system_prompt: str = "You are a helpful assistant.") -> Union[str, dict]:
+                      return_legacy_cache: bool = False) -> Union[str, dict]:
         """
         Generate text using the Transformers model, with optional image support.

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/llm_bindings/vllm/__init__.py RENAMED Viewed

@@ -331,6 +331,7 @@ class VLLMBinding(LollmsLLMBinding):
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
+                     system_prompt: str = "",
                      n_predict: Optional[int] = 1024,
                      stream: bool = False, # vLLM's generate is blocking, stream is pseudo
                      temperature: float = 0.7,
@@ -381,7 +382,7 @@ class VLLMBinding(LollmsLLMBinding):
                 # If providing multi_modal_data, usually prompt_token_ids are also needed.
                 # This can get complex as it depends on how the model expects images to be interleaved.
                 # For a simple case where image comes first:
-                encoded_prompt_ids = self.tokenizer.encode(prompt)
+                encoded_prompt_ids = self.tokenizer.encode(system_prompt+"\n"+prompt if system_prompt else prompt)
                 gen_kwargs["prompt_token_ids"] = [encoded_prompt_ids] # List of lists
                 gen_kwargs["multi_modal_data"] = [{"image": mm_data_content}] # List of dicts
                 gen_kwargs["prompts"] = None # Don't use prompts if prompt_token_ids is used
@@ -389,7 +390,7 @@ class VLLMBinding(LollmsLLMBinding):
             except Exception as e_mm:
                 return {"status": False, "error": f"Multimodal prep error: {e_mm}"}
         else:
-            gen_kwargs["prompts"] = [prompt]
+            gen_kwargs["prompts"] = [system_prompt+"\n"+prompt if system_prompt else prompt]
         try:
             outputs = self.llm_engine.generate(**gen_kwargs, sampling_params=sampling_params)

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/lollms_core.py RENAMED Viewed

@@ -329,6 +329,7 @@ class LollmsClient():
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
+                     system_prompt: str = "",
                      n_predict: Optional[int] = None,
                      stream: Optional[bool] = None,
                      temperature: Optional[float] = None,
@@ -365,6 +366,7 @@ class LollmsClient():
             return self.binding.generate_text(
                 prompt=prompt,
                 images=images,
+                system_prompt=system_prompt,
                 n_predict=n_predict if n_predict is not None else self.default_n_predict,
                 stream=stream if stream is not None else self.default_stream,
                 temperature=temperature if temperature is not None else self.default_temperature,

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client/lollms_llm_binding.py RENAMED Viewed

@@ -32,6 +32,7 @@ class LollmsLLMBinding(ABC):
     def generate_text(self,
                      prompt: str,
                      images: Optional[List[str]] = None,
+                     system_prompt: str = "",
                      n_predict: Optional[int] = None,
                      stream: bool = False,
                      temperature: float = 0.1,

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lollms_client
-Version: 0.13.1
+Version: 0.14.0
 Summary: A client library for LoLLMs generate endpoint
 Author-email: ParisNeo <parisneoai@gmail.com>
 License: Apache Software License

{lollms_client-0.13.1 → lollms_client-0.14.0}/lollms_client.egg-info/SOURCES.txt RENAMED Viewed

@@ -7,6 +7,7 @@ examples/text_2_audio.py
 examples/text_2_image.py
 examples/text_and_image_2_audio.py
 examples/text_gen.py
+examples/text_gen_system_prompt.py
 examples/article_summary/article_summary.py
 examples/deep_analyze/deep_analyse.py
 examples/deep_analyze/deep_analyze_multiple_files.py