PyPI - lollms-client - Versions diffs - 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl - Mend

lollms-client 1.5.6py3-none-any.whl → 1.7.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

lollms_client/__init__.py +1 -1
lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
lollms_client/llm_bindings/claude/__init__.py +125 -34
lollms_client/llm_bindings/gemini/__init__.py +261 -159
lollms_client/llm_bindings/grok/__init__.py +52 -14
lollms_client/llm_bindings/groq/__init__.py +2 -2
lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
lollms_client/llm_bindings/litellm/__init__.py +1 -1
lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
lollms_client/llm_bindings/lollms/__init__.py +76 -21
lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
lollms_client/llm_bindings/mistral/__init__.py +2 -2
lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
lollms_client/llm_bindings/ollama/__init__.py +307 -89
lollms_client/llm_bindings/open_router/__init__.py +2 -2
lollms_client/llm_bindings/openai/__init__.py +81 -20
lollms_client/llm_bindings/openllm/__init__.py +362 -506
lollms_client/llm_bindings/openwebui/__init__.py +333 -171
lollms_client/llm_bindings/perplexity/__init__.py +2 -2
lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
lollms_client/llm_bindings/transformers/__init__.py +428 -632
lollms_client/llm_bindings/vllm/__init__.py +1 -1
lollms_client/lollms_agentic.py +4 -2
lollms_client/lollms_base_binding.py +61 -0
lollms_client/lollms_core.py +512 -1890
lollms_client/lollms_discussion.py +25 -11
lollms_client/lollms_llm_binding.py +112 -261
lollms_client/lollms_mcp_binding.py +34 -75
lollms_client/lollms_stt_binding.py +85 -52
lollms_client/lollms_tti_binding.py +23 -37
lollms_client/lollms_ttm_binding.py +24 -42
lollms_client/lollms_tts_binding.py +28 -17
lollms_client/lollms_ttv_binding.py +24 -42
lollms_client/lollms_types.py +4 -2
lollms_client/stt_bindings/whisper/__init__.py +108 -23
lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
lollms_client/tti_bindings/diffusers/__init__.py +418 -810
lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
lollms_client/tti_bindings/gemini/__init__.py +182 -239
lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
lollms_client/tti_bindings/lollms/__init__.py +4 -1
lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
lollms_client/tti_bindings/openai/__init__.py +10 -11
lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
lollms_client/ttm_bindings/lollms/__init__.py +4 -17
lollms_client/ttm_bindings/replicate/__init__.py +7 -4
lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
lollms_client/tts_bindings/bark/__init__.py +7 -10
lollms_client/tts_bindings/lollms/__init__.py +6 -1
lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
lollms_client/tts_bindings/xtts/__init__.py +157 -74
lollms_client/tts_bindings/xtts/server/main.py +241 -280
{lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/METADATA +113 -5
lollms_client-1.7.10.dist-info/RECORD +89 -0
lollms_client-1.5.6.dist-info/RECORD +0 -87
{lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
{lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0

lollms_client/__init__.py CHANGED Viewed

@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
 from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
 from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
-__version__ = "1.5.6" # Updated version
+__version__ = "1.7.10" # Updated version
 # Optionally, you could define __all__ if you want to be explicit about exports
 __all__ = [

lollms_client/llm_bindings/azure_openai/__init__.py CHANGED Viewed

@@ -238,7 +238,7 @@ class AzureOpenAIBinding(LollmsLLMBinding):
             "supports_vision": True, # Assume modern deployments support vision
         }
-    def listModels(self) -> List[Dict[str, str]]:
+    def list_models(self) -> List[Dict[str, str]]:
         """
         List Models is not supported via the Azure OpenAI API.
         Deployments are managed in the Azure Portal. This method returns an empty list.
@@ -280,7 +280,7 @@ if __name__ == '__main__':
         # --- List Models ---
         ASCIIColors.cyan("\n--- Listing Models ---")
-        models = binding.listModels()
+        models = binding.list_models()
         if not models:
             ASCIIColors.green("Correctly returned an empty list for models, as expected for Azure.")

lollms_client/llm_bindings/claude/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# bindings/claude/__init__.py
 import base64
 import os
 from io import BytesIO
@@ -14,7 +15,6 @@ from ascii_colors import ASCIIColors, trace_exception
 import pipmaster as pm
 # Ensure the required packages are installed
-# Added 'requests' for dynamic model listing
 pm.ensure_packages(["anthropic", "pillow", "tiktoken", "requests"])
 import anthropic
@@ -28,8 +28,9 @@ ANTHROPIC_API_BASE_URL = "https://api.anthropic.com/v1"
 # A hardcoded list to be used as a fallback if the API call fails
 _FALLBACK_MODELS = [
-    {'model_name': 'claude-3-opus-20240229', 'display_name': 'Claude 3 Opus', 'description': 'Most powerful model for highly complex tasks.', 'owned_by': 'Anthropic'},
+    {'model_name': 'claude-3-7-sonnet-20250219', 'display_name': 'Claude 3.7 Sonnet', 'description': 'Most intelligent model with extended thinking capabilities.', 'owned_by': 'Anthropic'},
     {'model_name': 'claude-3-5-sonnet-20240620', 'display_name': 'Claude 3.5 Sonnet', 'description': 'Our most intelligent model, a new industry standard.', 'owned_by': 'Anthropic'},
+    {'model_name': 'claude-3-opus-20240229', 'display_name': 'Claude 3 Opus', 'description': 'Most powerful model for highly complex tasks.', 'owned_by': 'Anthropic'},
     {'model_name': 'claude-3-sonnet-20240229', 'display_name': 'Claude 3 Sonnet', 'description': 'Ideal balance of intelligence and speed for enterprise workloads.', 'owned_by': 'Anthropic'},
     {'model_name': 'claude-3-haiku-20240307', 'display_name': 'Claude 3 Haiku', 'description': 'Fastest and most compact model for near-instant responsiveness.', 'owned_by': 'Anthropic'},
     {'model_name': 'claude-2.1', 'display_name': 'Claude 2.1', 'description': 'Legacy model with a 200K token context window.', 'owned_by': 'Anthropic'},
@@ -124,6 +125,9 @@ class ClaudeBinding(LollmsLLMBinding):
                      split:Optional[bool]=False, # Not used in this direct method
                      user_keyword:Optional[str]="!@>user:", # Not used
                      ai_keyword:Optional[str]="!@>assistant:", # Not used
+                     think: Optional[bool] = False,
+                     reasoning_effort: Optional[str] = "low", # low, medium, high
+                     reasoning_summary: Optional[bool] = False, # auto
                      ) -> Union[str, dict]:
         """
         Generate text using the Claude model.
@@ -131,8 +135,34 @@ class ClaudeBinding(LollmsLLMBinding):
         if not self.client:
             return {"status": False, "error": "Anthropic client not initialized."}
+        # Handling Thinking / Reasoning
+        thinking_config = None
+        if think:
+            # Map reasoning_effort to budget_tokens
+            budget = 1024 # default/low
+            if reasoning_effort == "medium":
+                budget = 8192
+            elif reasoning_effort == "high":
+                budget = 16000
+            # Constraint: max_tokens (n_predict) must be > budget_tokens
+            # If default n_predict (2048) is too low for reasoning, boost it.
+            required_min_tokens = budget + 2048 # Buffer for output
+            if n_predict is None or n_predict < required_min_tokens:
+                n_predict = required_min_tokens
+                ASCIIColors.info(f"Adjusting n_predict to {n_predict} to accommodate thinking budget of {budget}")
+            thinking_config = {"type": "enabled", "budget_tokens": budget}
+            # Temperature must be removed or handled differently when thinking is enabled?
+            # Anthropic API usually allows temperature with thinking, but strict 1.0 might be enforced by API for some models.
+            # We'll leave it unless it errors. Note: Some documentation says temp should be 1.0 or not present for reasoning models,
+            # but Claude 3.7 supports it. We will let the API handle it.
         api_params = self._construct_parameters(temperature, top_p, top_k, n_predict)
+        if thinking_config:
+            api_params["thinking"] = thinking_config
+            # Ensure max_tokens is set in params (it is set by _construct_parameters via n_predict)
         message_content = []
         if prompt and prompt.strip():
             message_content.append({"type": "text", "text": prompt})
@@ -140,7 +170,6 @@ class ClaudeBinding(LollmsLLMBinding):
         if images:
             for image_data in images:
                 try:
-                    # ... (image processing code is unchanged)
                     if is_image_path(image_data):
                         with open(image_data, "rb") as image_file:
                             b64_data = base64.b64encode(image_file.read()).decode('utf-8')
@@ -166,8 +195,6 @@ class ClaudeBinding(LollmsLLMBinding):
         messages = [{"role": "user", "content": message_content}]
         full_response_text = ""
-        # ---- CHANGE START ----
-        # Conditionally build the request arguments to avoid sending an empty `system` parameter.
         request_args = {
             "model": self.model_name,
             "messages": messages,
@@ -175,22 +202,49 @@ class ClaudeBinding(LollmsLLMBinding):
         }
         if system_prompt and system_prompt.strip():
             request_args["system"] = system_prompt
-        # ---- CHANGE END ----
         try:
             if stream:
+                # Use raw stream iteration to catch thinking events
                 with self.client.messages.stream(**request_args) as stream_response:
-                    for chunk in stream_response.text_stream:
-                        full_response_text += chunk
-                        if streaming_callback:
-                            if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
-                                break
+                    in_thinking_block = False
+                    for event in stream_response:
+                        if event.type == "content_block_start" and event.content_block.type == "thinking":
+                            full_response_text += "<think>\n"
+                            if streaming_callback:
+                                streaming_callback("<think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
+                            in_thinking_block = True
+                        elif event.type == "content_block_delta" and event.delta.type == "thinking_delta":
+                            chunk = event.delta.thinking
+                            full_response_text += chunk
+                            if streaming_callback:
+                                streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
+                        elif event.type == "content_block_stop" and in_thinking_block:
+                            full_response_text += "\n</think>\n"
+                            if streaming_callback:
+                                streaming_callback("\n</think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
+                            in_thinking_block = False
+                        elif event.type == "content_block_delta" and event.delta.type == "text_delta":
+                            chunk = event.delta.text
+                            full_response_text += chunk
+                            if streaming_callback:
+                                if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
+                                    break
                 return full_response_text
             else:
                 response = self.client.messages.create(**request_args)
                 if response.stop_reason == "error":
                      return {"status": False, "error": f"API returned an error: {response.stop_reason}"}
-                return response.content[0].text
+                # Reconstruct full text including thinking
+                output_parts = []
+                for block in response.content:
+                    if block.type == "thinking":
+                        output_parts.append(f"<think>\n{block.thinking}\n</think>\n")
+                    elif block.type == "text":
+                        output_parts.append(block.text)
+                return "".join(output_parts)
         except Exception as ex:
             error_message = f"An unexpected error occurred with Claude API: {str(ex)}"
@@ -210,7 +264,10 @@ class ClaudeBinding(LollmsLLMBinding):
              seed: Optional[int] = None, # Not supported
              n_threads: Optional[int] = None, # Not supported
              ctx_size: Optional[int] = None, # Not supported
-             streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
+             streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+             think: Optional[bool] = False,
+             reasoning_effort: Optional[str] = "low", # low, medium, high
+             reasoning_summary: Optional[bool] = False, # auto
              ) -> Union[str, dict]:
         """
         Conduct a chat session with the Claude model using a LollmsDiscussion object.
@@ -222,7 +279,6 @@ class ClaudeBinding(LollmsLLMBinding):
         messages = discussion.get_messages(branch_tip_id)
         history = []
-        # ... (history building code is unchanged)
         for msg in messages:
             role = 'user' if msg.sender_type == "user" else 'assistant'
             content_parts = []
@@ -252,11 +308,28 @@ class ClaudeBinding(LollmsLLMBinding):
         if not history:
             return {"status": "error", "message": "Cannot start chat with an empty discussion."}
+        # Handling Thinking / Reasoning
+        thinking_config = None
+        if think:
+            budget = 1024
+            if reasoning_effort == "medium":
+                budget = 8192
+            elif reasoning_effort == "high":
+                budget = 16000
+            required_min_tokens = budget + 2048
+            if n_predict is None or n_predict < required_min_tokens:
+                n_predict = required_min_tokens
+                ASCIIColors.info(f"Adjusting n_predict to {n_predict} for thinking budget {budget}")
+            thinking_config = {"type": "enabled", "budget_tokens": budget}
         api_params = self._construct_parameters(temperature, top_p, top_k, n_predict)
+        if thinking_config:
+            api_params["thinking"] = thinking_config
         full_response_text = ""
-        # ---- CHANGE START ----
-        # Conditionally build the request arguments to avoid sending an empty `system` parameter.
         request_args = {
             "model": self.model_name,
             "messages": history,
@@ -264,29 +337,49 @@ class ClaudeBinding(LollmsLLMBinding):
         }
         if system_prompt and system_prompt.strip():
             request_args["system"] = system_prompt
-        # ---- CHANGE END ----
         try:
             if stream:
                 with self.client.messages.stream(**request_args) as stream_response:
-                    for chunk in stream_response.text_stream:
-                        full_response_text += chunk
-                        if streaming_callback:
-                            if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
-                                break
+                    in_thinking_block = False
+                    for event in stream_response:
+                        if event.type == "content_block_start" and event.content_block.type == "thinking":
+                            full_response_text += "<think>\n"
+                            if streaming_callback: streaming_callback("<think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
+                            in_thinking_block = True
+                        elif event.type == "content_block_delta" and event.delta.type == "thinking_delta":
+                            chunk = event.delta.thinking
+                            full_response_text += chunk
+                            if streaming_callback: streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
+                        elif event.type == "content_block_stop" and in_thinking_block:
+                            full_response_text += "\n</think>\n"
+                            if streaming_callback: streaming_callback("\n</think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
+                            in_thinking_block = False
+                        elif event.type == "content_block_delta" and event.delta.type == "text_delta":
+                            chunk = event.delta.text
+                            full_response_text += chunk
+                            if streaming_callback:
+                                if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
+                                    break
                 return full_response_text
             else:
                 response = self.client.messages.create(**request_args)
                 if response.stop_reason == "error":
                      return {"status": "error", "message": f"API returned an error: {response.stop_reason}"}
-                return response.content[0].text
+                output_parts = []
+                for block in response.content:
+                    if block.type == "thinking":
+                        output_parts.append(f"<think>\n{block.thinking}\n</think>\n")
+                    elif block.type == "text":
+                        output_parts.append(block.text)
+                return "".join(output_parts)
         except Exception as ex:
             error_message = f"An unexpected error occurred with Claude API: {str(ex)}"
             trace_exception(ex)
             return {"status": "error", "message": error_message}
-    # ... (Rest of the file is unchanged) ...
     def tokenize(self, text: str) -> list:
         """
         Tokenize the input text.
@@ -329,7 +422,7 @@ class ClaudeBinding(LollmsLLMBinding):
                 model=self.model_name,
                 messages=[{"role": "user", "content": text}]
             )
-            return response.token_count # Updated to correct response attribute
+            return response.input_tokens # Updated to correct response attribute (it's usually 'input_tokens' in CountTokensResponse)
         except Exception as e:
             trace_exception(e)
             ASCIIColors.error(f"Failed to count tokens with Claude API: {e}")
@@ -354,7 +447,7 @@ class ClaudeBinding(LollmsLLMBinding):
             "supports_vision": "claude-3" in self.model_name,
         }
-    def listModels(self) -> List[Dict[str, str]]:
+    def list_models(self) -> List[Dict[str, str]]:
         """
         Lists available models from the Anthropic API.
         Caches the result to avoid repeated API calls.
@@ -368,8 +461,6 @@ class ClaudeBinding(LollmsLLMBinding):
             self._cached_models = _FALLBACK_MODELS
             return self._cached_models
-        # This part is complex and likely correct, leaving as is.
-        # It's good practice.
         headers = {
             "x-api-key": self.service_key,
             "anthropic-version": "2023-06-01",
@@ -437,8 +528,8 @@ if __name__ == '__main__':
     ASCIIColors.yellow("--- Testing ClaudeBinding ---")
     # --- Configuration ---
-    test_model_name = "claude-3-haiku-20240307" # Use Haiku for speed in testing
-    test_vision_model_name = "claude-3-sonnet-20240229"
+    test_model_name = "claude-3-7-sonnet-20250219" # Use Haiku for speed in testing
+    test_vision_model_name = "claude-3-5-sonnet-20240620"
     full_streamed_text = ""
@@ -451,7 +542,7 @@ if __name__ == '__main__':
         # --- List Models ---
         ASCIIColors.cyan("\n--- Listing Models (dynamic) ---")
-        models = binding.listModels()
+        models = binding.list_models()
         if models:
             ASCIIColors.green(f"Found {len(models)} models.")
             for m in models:
@@ -472,7 +563,7 @@ if __name__ == '__main__':
         ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
         prompt_text = "Explain the importance of bees in one paragraph."
         ASCIIColors.info(f"Prompt: {prompt_text}")
-        generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt=" ")
+        generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt=" ", think=True)
         if isinstance(generated_text, str):
             ASCIIColors.green(f"Generated text:\n{generated_text}")
         else:
@@ -488,7 +579,7 @@ if __name__ == '__main__':
             return True
         ASCIIColors.info(f"Prompt: {prompt_text}")
-        result = binding.generate_text(prompt_text, n_predict=150, stream=True, streaming_callback=stream_callback)
+        result = binding.generate_text(prompt_text, n_predict=150, stream=True, streaming_callback=stream_callback, think=True)
         full_streamed_text = "".join(captured_chunks)
         print("\n--- End of Stream ---")
         ASCIIColors.green(f"Full streamed text (for verification): {result}")

lollms-client 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl

lollms-client 1.5.6py3-none-any.whl → 1.7.10py3-none-any.whl