PyPI - lollms-client - Versions diffs - 0.20.2__py3-none-any.whl → 0.20.4__py3-none-any.whl - Mend

lollms-client 0.20.2py3-none-any.whl → 0.20.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lollms-client might be problematic. Click here for more details.

Files changed (16) hide show

examples/gradio_chat_app.py +228 -0
examples/internet_search_with_rag.py +1 -2
lollms_client/__init__.py +1 -1
lollms_client/llm_bindings/llamacpp/__init__.py +104 -0
lollms_client/llm_bindings/lollms/__init__.py +102 -1
lollms_client/llm_bindings/ollama/__init__.py +99 -0
lollms_client/llm_bindings/openai/__init__.py +109 -0
lollms_client/lollms_core.py +72 -5
lollms_client/lollms_discussion.py +473 -33
lollms_client/lollms_llm_binding.py +43 -0
lollms_client/lollms_types.py +3 -0
{lollms_client-0.20.2.dist-info → lollms_client-0.20.4.dist-info}/METADATA +1 -1
{lollms_client-0.20.2.dist-info → lollms_client-0.20.4.dist-info}/RECORD +16 -15
{lollms_client-0.20.2.dist-info → lollms_client-0.20.4.dist-info}/WHEEL +0 -0
{lollms_client-0.20.2.dist-info → lollms_client-0.20.4.dist-info}/licenses/LICENSE +0 -0
{lollms_client-0.20.2.dist-info → lollms_client-0.20.4.dist-info}/top_level.txt +0 -0

lollms_client/llm_bindings/openai/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from lollms_client.lollms_llm_binding import LollmsLLMBinding
 from lollms_client.lollms_types import MSG_TYPE
 from lollms_client.lollms_utilities import encode_image
 from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
+from lollms_client.lollms_discussion import LollmsDiscussion
 from typing import Optional, Callable, List, Union
 from ascii_colors import ASCIIColors, trace_exception
 from typing import List, Dict
@@ -207,6 +208,114 @@ class OpenAIBinding(LollmsLLMBinding):
         return output
+    def chat(self,
+             discussion: LollmsDiscussion,
+             branch_tip_id: Optional[str] = None,
+             n_predict: Optional[int] = None,
+             stream: Optional[bool] = None,
+             temperature: float = 0.7,
+             top_k: int = 40,
+             top_p: float = 0.9,
+             repeat_penalty: float = 1.1,
+             repeat_last_n: int = 64,
+             seed: Optional[int] = None,
+             n_threads: Optional[int] = None,
+             ctx_size: Optional[int] = None,
+             streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
+             ) -> Union[str, dict]:
+        """
+        Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
+        Args:
+            discussion (LollmsDiscussion): The discussion object containing the conversation history.
+            branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
+            n_predict (Optional[int]): Maximum number of tokens to generate.
+            stream (Optional[bool]): Whether to stream the output.
+            temperature (float): Sampling temperature.
+            top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
+            top_p (float): Top-p sampling parameter.
+            repeat_penalty (float): Frequency penalty for repeated tokens.
+            seed (Optional[int]): Random seed for generation.
+            streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
+        Returns:
+            Union[str, dict]: The generated text or an error dictionary.
+        """
+        # 1. Export the discussion to the OpenAI chat format
+        # This handles system prompts, user/assistant roles, and multi-modal content automatically.
+        messages = discussion.export("openai_chat", branch_tip_id)
+        # Build the request parameters
+        params = {
+            "model": self.model_name,
+            "messages": messages,
+            "max_tokens": n_predict,
+            "n": 1,
+            "temperature": temperature,
+            "top_p": top_p,
+            "frequency_penalty": repeat_penalty,
+            "stream": stream
+        }
+        # Add seed if available, as it's supported by newer OpenAI models
+        if seed is not None:
+            params["seed"] = seed
+        # Remove None values, as the API expects them to be absent
+        params = {k: v for k, v in params.items() if v is not None}
+        output = ""
+        # 2. Call the API
+        try:
+            # Check if we should use the chat completions or legacy completions endpoint
+            if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
+                completion = self.client.chat.completions.create(**params)
+                if stream:
+                    for chunk in completion:
+                        # The streaming response for chat has a different structure
+                        delta = chunk.choices[0].delta
+                        if delta.content:
+                            word = delta.content
+                            if streaming_callback is not None:
+                                if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
+                                    break
+                            output += word
+                else:
+                    output = completion.choices[0].message.content
+            else: # Fallback to legacy completion format (not recommended for chat)
+                # We need to format the messages list into a single string prompt
+                legacy_prompt = discussion.export("openai_completion", branch_tip_id)
+                legacy_params = {
+                    "model": self.model_name,
+                    "prompt": legacy_prompt,
+                    "max_tokens": n_predict,
+                    "n": 1,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "frequency_penalty": repeat_penalty,
+                    "stream": stream
+                }
+                completion = self.client.completions.create(**legacy_params)
+                if stream:
+                    for chunk in completion:
+                        word = chunk.choices[0].text
+                        if streaming_callback is not None:
+                            if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
+                                break
+                        output += word
+                else:
+                    output = completion.choices[0].text
+        except Exception as e:
+            # Handle API errors gracefully
+            error_message = f"An error occurred with the OpenAI API: {e}"
+            if streaming_callback:
+                streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
+            return {"status": "error", "message": error_message}
+        return output
     def tokenize(self, text: str) -> list:
         """
         Tokenize the input text into a list of characters.

lollms_client/lollms_core.py CHANGED Viewed

@@ -12,6 +12,7 @@ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingM
 from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
 from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
+from lollms_client.lollms_discussion import LollmsDiscussion
 import json, re
 from enum import Enum
 import base64
@@ -386,6 +387,7 @@ class LollmsClient():
                      split:Optional[bool]=False, # put to true if the prompt is a discussion
                      user_keyword:Optional[str]="!@>user:",
                      ai_keyword:Optional[str]="!@>assistant:",
+                     **kwargs
                      ) -> Union[str, dict]:
         """
         Generate text using the active LLM binding, using instance defaults if parameters are not provided.
@@ -434,6 +436,64 @@ class LollmsClient():
         raise RuntimeError("LLM binding not initialized.")
+    def chat(self,
+             discussion: LollmsDiscussion,
+             branch_tip_id: Optional[str] = None,
+             n_predict: Optional[int] = None,
+             stream: Optional[bool] = None,
+             temperature: Optional[float] = None,
+             top_k: Optional[int] = None,
+             top_p: Optional[float] = None,
+             repeat_penalty: Optional[float] = None,
+             repeat_last_n: Optional[int] = None,
+             seed: Optional[int] = None,
+             n_threads: Optional[int] = None,
+             ctx_size: Optional[int] = None,
+             streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
+             ) -> Union[str, dict]:
+        """
+        High-level method to perform a chat generation using a LollmsDiscussion object.
+        This is the recommended method for conversational interactions. It uses the
+        discussion object to correctly format the context for the model, including
+        system prompts, roles, and multi-modal content.
+        Args:
+            discussion (LollmsDiscussion): The discussion object to use for context.
+            branch_tip_id (Optional[str]): The ID of the message to use as the end of the conversation branch. If None, the active branch is used.
+            n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
+            stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
+            temperature (Optional[float]): Sampling temperature. Uses instance default if None.
+            top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
+            top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
+            repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
+            repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
+            seed (Optional[int]): Random seed for generation. Uses instance default if None.
+            n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
+            ctx_size (Optional[int]): Context size override for this generation.
+            streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
+        Returns:
+            Union[str, dict]: Generated text or an error dictionary if failed.
+        """
+        if self.binding:
+            return self.binding.chat(
+                discussion=discussion,
+                branch_tip_id=branch_tip_id,
+                n_predict=n_predict if n_predict is not None else self.default_n_predict,
+                stream=stream if stream is not None else self.default_stream,
+                temperature=temperature if temperature is not None else self.default_temperature,
+                top_k=top_k if top_k is not None else self.default_top_k,
+                top_p=top_p if top_p is not None else self.default_top_p,
+                repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
+                repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
+                seed=seed if seed is not None else self.default_seed,
+                n_threads=n_threads if n_threads is not None else self.default_n_threads,
+                ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
+                streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
+            )
+        raise RuntimeError("LLM binding not initialized.")
     def embed(self, text, **kwargs):
         """
         Generate embeddings for the input text using the active LLM binding.
@@ -666,7 +726,7 @@ Respond with a JSON object containing ONE of the following structures:
 """ # No {self.ai_full_header} here, generate_code will get raw JSON
             if streaming_callback:
-                streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "decision_making"}, turn_history)
+                streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
             # Use generate_code to get structured JSON output from LLM
             # Note: generate_code itself uses generate_text. We are asking for JSON here.
@@ -679,7 +739,7 @@ Respond with a JSON object containing ONE of the following structures:
                 # streaming_callback=None, # Decisions are usually not streamed chunk by chunk
             )
             if streaming_callback:
-                streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "decision_making"}, turn_history)
+                streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
             if not raw_llm_decision_json:
@@ -733,10 +793,11 @@ Respond with a JSON object containing ONE of the following structures:
                     current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
                     break # Or ask LLM to try again without this faulty decision in history
-                tool_call_info = {"type": "tool_call_request", "name": tool_name, "params": tool_params}
+                tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
                 turn_history.append(tool_call_info)
                 if streaming_callback:
                      streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
+                     streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
                 # Interactive execution if enabled
                 if interactive_tool_execution:
@@ -760,15 +821,17 @@ Respond with a JSON object containing ONE of the following structures:
                 if streaming_callback:
-                    streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "tool_execution", "tool_name": tool_name}, turn_history)
+                    streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
                 tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
                 tool_call_info["result"] = tool_result # Add result to this call's info
                 tool_calls_made_this_turn.append(tool_call_info) # Log the completed call
+                if streaming_callback:
+                     streaming_callback(f"", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
                 if streaming_callback:
-                    streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
+                    streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
                 # Add tool execution result to conversation for the LLM
                 # The format of this message can influence how the LLM uses the tool output.
@@ -972,12 +1035,14 @@ Respond with a JSON object containing ONE of the following structures:
             hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
             previous_queries.append(current_query_for_rag)
             new_unique = 0
+            documents = []
             for chunk in retrieved:
                 doc = chunk.get("file_path", "Unknown")
                 content = str(chunk.get("chunk_text", ""))
                 sim = float(chunk.get("similarity_percent", 0.0))
                 detail = {"document": doc, "similarity": sim, "content": content,
                           "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
+                documents.append(doc)
                 hop_details["retrieved_chunks_details"].append(detail)
                 key = f"{doc}::{content[:100]}"
                 if key not in all_unique_retrieved_chunks_map:
@@ -987,6 +1052,8 @@ Respond with a JSON object containing ONE of the following structures:
             if hop_count > 0 and new_unique == 0:
                 hop_details["status"] = "No *new* unique chunks retrieved"
             rag_hops_details_list.append(hop_details)
+            if streaming_callback:
+                streaming_callback(f"Retreived {len(retrieved)} data chunks from {set(documents)}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retreival {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
             if streaming_callback:
                 streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)

lollms-client 0.20.2__py3-none-any.whl → 0.20.4__py3-none-any.whl

Potentially problematic release.

lollms-client 0.20.2py3-none-any.whl → 0.20.4py3-none-any.whl