PyPI - lollms-client - Versions diffs - 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

lollms-client 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lollms-client might be problematic. Click here for more details.

Files changed (12) hide show

lollms_client/llm_bindings/mistral/__init__.py CHANGED Viewed

@@ -11,11 +11,11 @@ import pipmaster as pm
 # Ensure the required packages are installed
 pm.ensure_packages(["mistralai", "pillow", "tiktoken"])
-from mistralai.client import MistralClient
-from mistralai.models.chat_completion import ChatMessage
+from mistralai import Mistral
 from PIL import Image, ImageDraw
 import tiktoken
 BindingName = "MistralBinding"
 class MistralBinding(LollmsLLMBinding):
@@ -28,7 +28,7 @@ class MistralBinding(LollmsLLMBinding):
     def __init__(self,
                  model_name: str = "mistral-large-latest",
-                 mistral_api_key: str = None,
+                 service_key: str|None = None,
                  **kwargs
                  ):
         """
@@ -40,13 +40,13 @@ class MistralBinding(LollmsLLMBinding):
         """
         super().__init__(binding_name=BindingName)
         self.model_name = model_name
-        self.mistral_api_key = mistral_api_key or os.getenv("MISTRAL_API_KEY")
+        self.mistral_api_key = service_key or os.getenv("MISTRAL_API_KEY")
         if not self.mistral_api_key:
             raise ValueError("Mistral API key is required. Set it via 'mistral_api_key' or MISTRAL_API_KEY env var.")
         try:
-            self.client = MistralClient(api_key=self.mistral_api_key)
+            self.client = Mistral(api_key=self.mistral_api_key)
         except Exception as e:
             ASCIIColors.error(f"Failed to configure Mistral client: {e}")
             self.client = None
@@ -64,35 +64,67 @@ class MistralBinding(LollmsLLMBinding):
         if n_predict is not None: params['max_tokens'] = n_predict
         if seed is not None: params['random_seed'] = seed # Mistral uses 'random_seed'
         return params
-    def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[ChatMessage]:
-        """Prepares the message list for the Mistral API from a LollmsDiscussion."""
+    def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[Dict[str, any]]:
+        """Prepares the message list for the API from a LollmsDiscussion."""
         history = []
         if discussion.system_prompt:
-            # Mistral prefers the system prompt as the first message with a user/assistant turn.
-            # A lone system message is not ideal. We will prepend it to the first user message.
-            # However, for API consistency, we will treat it as a separate message if it exists.
-            # The official client will likely handle this.
-            history.append(ChatMessage(role="system", content=discussion.system_prompt))
+            history.append({"role": "system", "content": discussion.system_prompt})
         for msg in discussion.get_messages(branch_tip_id):
             role = 'user' if msg.sender_type == "user" else 'assistant'
-            # Note: Mistral API currently does not support image inputs via the chat endpoint.
+            # Note: Vision support depends on the specific model being called via OpenRouter.
+            # We will not implement it in this generic binding to avoid complexity,
+            # as different models might expect different formats.
             if msg.content:
-                history.append(ChatMessage(role=role, content=msg.content))
+                history.append({'role': role, 'content': msg.content})
         return history
-    def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
+    def generate_text(self,
+                    prompt: str,
+                    images: Optional[List[str]] = None,
+                    system_prompt: str = "",
+                    n_predict: Optional[int] = None,
+                    stream: Optional[bool] = None,
+                    temperature: float = 0.7,  # Ollama default is 0.8, common default 0.7
+                    top_k: int = 40,          # Ollama default is 40
+                    top_p: float = 0.9,       # Ollama default is 0.9
+                    repeat_penalty: float = 1.1,  # Ollama default is 1.1
+                    repeat_last_n: int = 64,  # Ollama default is 64
+                    seed: Optional[int] = None,
+                    n_threads: Optional[int] = None,
+                    ctx_size: int | None = None,
+                    streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                    split: Optional[bool] = False,  # put to true if the prompt is a discussion
+                    user_keyword: Optional[str] = "!@>user:",
+                    ai_keyword: Optional[str] = "!@>assistant:",
+                    **kwargs
+                    ) -> Union[str, dict]:
         """
-        Generate text using Mistral. This is a wrapper around the chat method.
+        Generate text using OpenRouter. This is a wrapper around the chat method.
         """
-        temp_discussion = LollmsDiscussion.from_messages([
-            LollmsMessage.new_message(sender_type="user", content=prompt)
-        ])
-        if kwargs.get("system_prompt"):
-            temp_discussion.system_prompt = kwargs.get("system_prompt")
+        temp_discussion = LollmsDiscussion(None)
+        temp_discussion.add_message(sender="user", content=prompt, images=images or [])
+        if system_prompt:
+            temp_discussion.system_prompt = system_prompt
-        return self.chat(temp_discussion, **kwargs)
+        return self.chat(temp_discussion,
+                        n_predict=n_predict,
+                        stream=stream,
+                        temperature=temperature,
+                        top_k=top_k,
+                        top_p=top_p,
+                        repeat_penalty=repeat_penalty,
+                        repeat_last_n=repeat_last_n,
+                        seed=seed,
+                        n_threads=n_threads,
+                        ctx_size=ctx_size,
+                        streaming_callback=streaming_callback,
+                        split=split,
+                        user_keyword=user_keyword,
+                        ai_keyword=ai_keyword,
+                        **kwargs)
     def chat(self,
              discussion: LollmsDiscussion,
@@ -117,7 +149,7 @@ class MistralBinding(LollmsLLMBinding):
         try:
             if stream:
-                response = self.client.chat_stream(
+                response = self.client.chat.stream(
                     model=self.model_name,
                     messages=messages,
                     **api_params
@@ -131,7 +163,7 @@ class MistralBinding(LollmsLLMBinding):
                                 break
                 return full_response_text
             else:
-                response = self.client.chat(
+                response = self.client.chat.complete(
                     model=self.model_name,
                     messages=messages,
                     **api_params
@@ -201,7 +233,7 @@ class MistralBinding(LollmsLLMBinding):
             return []
         try:
             ASCIIColors.debug("Listing Mistral models...")
-            models = self.client.list_models()
+            models = self.client.models.list()
             model_info_list = []
             for m in models.data:
                 model_info_list.append({
@@ -264,7 +296,6 @@ if __name__ == '__main__':
         ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
         full_streamed_text = ""
         def stream_callback(chunk: str, msg_type: int):
-            nonlocal full_streamed_text
             ASCIIColors.green(chunk, end="", flush=True)
             full_streamed_text += chunk
             return True

lollms_client/llm_bindings/open_router/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ class OpenRouterBinding(LollmsLLMBinding):
     def __init__(self,
                  model_name: str = "google/gemini-flash-1.5", # A good, fast default
-                 open_router_api_key: str = None,
+                 service_key: str|None = None,
                  **kwargs
                  ):
         """
@@ -37,11 +37,11 @@ class OpenRouterBinding(LollmsLLMBinding):
         Args:
             model_name (str): The name of the model to use from OpenRouter (e.g., 'anthropic/claude-3-haiku-20240307').
-            open_router_api_key (str): The API key for the OpenRouter service.
+            service_key (str): The API key for the OpenRouter service.
         """
         super().__init__(binding_name=BindingName)
         self.model_name = model_name
-        self.api_key = open_router_api_key or os.getenv("OPENROUTER_API_KEY")
+        self.api_key = service_key or os.getenv("OPENROUTER_API_KEY")
         if not self.api_key:
             raise ValueError("OpenRouter API key is required. Set it via 'open_router_api_key' or OPENROUTER_API_KEY env var.")
@@ -84,17 +84,50 @@ class OpenRouterBinding(LollmsLLMBinding):
                 history.append({'role': role, 'content': msg.content})
         return history
-    def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
+    def generate_text(self,
+                    prompt: str,
+                    images: Optional[List[str]] = None,
+                    system_prompt: str = "",
+                    n_predict: Optional[int] = None,
+                    stream: Optional[bool] = None,
+                    temperature: float = 0.7,  # Ollama default is 0.8, common default 0.7
+                    top_k: int = 40,          # Ollama default is 40
+                    top_p: float = 0.9,       # Ollama default is 0.9
+                    repeat_penalty: float = 1.1,  # Ollama default is 1.1
+                    repeat_last_n: int = 64,  # Ollama default is 64
+                    seed: Optional[int] = None,
+                    n_threads: Optional[int] = None,
+                    ctx_size: int | None = None,
+                    streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
+                    split: Optional[bool] = False,  # put to true if the prompt is a discussion
+                    user_keyword: Optional[str] = "!@>user:",
+                    ai_keyword: Optional[str] = "!@>assistant:",
+                    **kwargs
+                    ) -> Union[str, dict]:
         """
         Generate text using OpenRouter. This is a wrapper around the chat method.
         """
-        temp_discussion = LollmsDiscussion.from_messages([
-            LollmsMessage.new_message(sender_type="user", content=prompt)
-        ])
-        if kwargs.get("system_prompt"):
-            temp_discussion.system_prompt = kwargs.get("system_prompt")
+        temp_discussion = LollmsDiscussion(None)
+        temp_discussion.add_message(sender="user", content=prompt, images=images or [])
+        if system_prompt:
+            temp_discussion.system_prompt = system_prompt
-        return self.chat(temp_discussion, **kwargs)
+        return self.chat(temp_discussion,
+                        n_predict=n_predict,
+                        stream=stream,
+                        temperature=temperature,
+                        top_k=top_k,
+                        top_p=top_p,
+                        repeat_penalty=repeat_penalty,
+                        repeat_last_n=repeat_last_n,
+                        seed=seed,
+                        n_threads=n_threads,
+                        ctx_size=ctx_size,
+                        streaming_callback=streaming_callback,
+                        split=split,
+                        user_keyword=user_keyword,
+                        ai_keyword=ai_keyword,
+                        **kwargs)
     def chat(self,
              discussion: LollmsDiscussion,

lollms_client/lollms_discussion.py CHANGED Viewed

@@ -1835,4 +1835,9 @@ class LollmsDiscussion:
         del self.images[index]
         del self.active_images[index]
-        self.touch()
+        self.touch()
+    @property
+    def system_prompt(self) -> str:
+        """Returns the system prompt for this discussion."""
+        return self._system_prompt

lollms_client/lollms_llm_binding.py CHANGED Viewed

@@ -154,8 +154,409 @@ class LollmsLLMBinding(ABC):
         """
         pass
-    def get_ctx_size(self, model_name:str|None=None):
-        # if model_name is none use current model name
+    def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
+        """
+        Retrieves context size for a model from a hardcoded list.
+        This method checks if the model name contains a known base model identifier
+        (e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
+        as a failsafe when the context size cannot be retrieved directly from the
+        Ollama API.
+        """
+        if model_name is None:
+            model_name = self.model_name
+        # Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
+        # should appear, as they will be checked first due to the sorting logic below.
+        known_contexts = {
+            'agentica-org/deepcoder-14b-preview': 8192,
+            'agentica-org/deepcoder-14b-preview:free': 8192,
+            'ai21/jamba-large-1.7': 256000,
+            'ai21/jamba-mini-1.7': 256000,
+            'aion-labs/aion-1.0': 8192,
+            'aion-labs/aion-1.0-mini': 8192,
+            'aion-labs/aion-rp-llama-3.1-8b': 131072,
+            'alfredpros/codellama-7b-instruct-solidity': 16384,
+            'alpindale/goliath-120b': 4096,
+            'amazon/nova-lite-v1': 32768,
+            'amazon/nova-micro-v1': 32768,
+            'amazon/nova-pro-v1': 32768,
+            'anthracite-org/magnum-v2-72b': 131072,
+            'anthracite-org/magnum-v4-72b': 131072,
+            'anthropic/claude-3-haiku': 200000,
+            'anthropic/claude-3-haiku:beta': 200000,
+            'anthropic/claude-3-opus': 200000,
+            'anthropic/claude-3-opus:beta': 200000,
+            'anthropic/claude-3.5-haiku': 200000,
+            'anthropic/claude-3.5-haiku-20241022': 200000,
+            'anthropic/claude-3.5-haiku:beta': 200000,
+            'anthropic/claude-3.5-sonnet': 200000,
+            'anthropic/claude-3.5-sonnet-20240620': 200000,
+            'anthropic/claude-3.5-sonnet-20240620:beta': 200000,
+            'anthropic/claude-3.5-sonnet:beta': 200000,
+            'anthropic/claude-3.7-sonnet': 200000,
+            'anthropic/claude-3.7-sonnet:beta': 200000,
+            'anthropic/claude-3.7-sonnet:thinking': 200000,
+            'anthropic/claude-opus-4': 200000,
+            'anthropic/claude-opus-4.1': 200000,
+            'anthropic/claude-sonnet-4': 200000,
+            'arcee-ai/coder-large': 32768,
+            'arcee-ai/maestro-reasoning': 32768,
+            'arcee-ai/spotlight': 32768,
+            'arcee-ai/virtuoso-large': 32768,
+            'arliai/qwq-32b-arliai-rpr-v1': 8192,
+            'arliai/qwq-32b-arliai-rpr-v1:free': 8192,
+            'baidu/ernie-4.5-300b-a47b': 128000,
+            'bytedance/ui-tars-1.5-7b': 8192,
+            'cognitivecomputations/dolphin-mistral-24b-venice-edition:free': 32768,
+            'cognitivecomputations/dolphin-mixtral-8x22b': 65536,
+            'cognitivecomputations/dolphin3.0-mistral-24b': 32768,
+            'cognitivecomputations/dolphin3.0-mistral-24b:free': 32768,
+            'cognitivecomputations/dolphin3.0-r1-mistral-24b': 32768,
+            'cognitivecomputations/dolphin3.0-r1-mistral-24b:free': 32768,
+            'cohere/command': 8192,
+            'cohere/command-a': 8192,
+            'cohere/command-r': 128000,
+            'cohere/command-r-03-2024': 128000,
+            'cohere/command-r-08-2024': 128000,
+            'cohere/command-r-plus': 128000,
+            'cohere/command-r-plus-04-2024': 128000,
+            'cohere/command-r-plus-08-2024': 128000,
+            'cohere/command-r7b-12-2024': 128000,
+            'deepseek/deepseek-chat': 32768,
+            'deepseek/deepseek-chat-v3-0324': 32768,
+            'deepseek/deepseek-chat-v3-0324:free': 32768,
+            'deepseek/deepseek-prover-v2': 131072,
+            'deepseek/deepseek-r1': 32768,
+            'deepseek/deepseek-r1-0528': 32768,
+            'deepseek/deepseek-r1-0528-qwen3-8b': 32768,
+            'deepseek/deepseek-r1-0528-qwen3-8b:free': 32768,
+            'deepseek/deepseek-r1-0528:free': 32768,
+            'deepseek/deepseek-r1-distill-llama-70b': 131072,
+            'deepseek/deepseek-r1-distill-llama-70b:free': 131072,
+            'deepseek/deepseek-r1-distill-llama-8b': 131072,
+            'deepseek/deepseek-r1-distill-qwen-1.5b': 32768,
+            'deepseek/deepseek-r1-distill-qwen-14b': 32768,
+            'deepseek/deepseek-r1-distill-qwen-14b:free': 32768,
+            'deepseek/deepseek-r1-distill-qwen-32b': 32768,
+            'deepseek/deepseek-r1-distill-qwen-7b': 32768,
+            'deepseek/deepseek-r1:free': 32768,
+            'deepseek/deepseek-v3-base': 32768,
+            'eleutherai/llemma_7b': 8192,
+            'featherless/qwerky-72b:free': 8192,
+            'google/gemini-2.0-flash-001': 1000000,
+            'google/gemini-2.0-flash-exp:free': 1000000,
+            'google/gemini-2.0-flash-lite-001': 1000000,
+            'google/gemini-2.5-flash': 1000000,
+            'google/gemini-2.5-flash-lite': 1000000,
+            'google/gemini-2.5-flash-lite-preview-06-17': 1000000,
+            'google/gemini-2.5-pro': 2000000,
+            'google/gemini-2.5-pro-exp-03-25': 2000000,
+            'google/gemini-2.5-pro-preview': 2000000,
+            'google/gemini-2.5-pro-preview-05-06': 2000000,
+            'google/gemini-flash-1.5': 1000000,
+            'google/gemini-flash-1.5-8b': 1000000,
+            'google/gemini-pro-1.5': 2000000,
+            'google/gemma-2-27b-it': 8192,
+            'google/gemma-2-9b-it': 8192,
+            'google/gemma-2-9b-it:free': 8192,
+            'google/gemma-3-12b-it': 131072,
+            'google/gemma-3-12b-it:free': 131072,
+            'google/gemma-3-27b-it': 131072,
+            'google/gemma-3-27b-it:free': 131072,
+            'google/gemma-3-4b-it': 131072,
+            'google/gemma-3-4b-it:free': 131072,
+            'google/gemma-3n-e2b-it:free': 131072,
+            'google/gemma-3n-e4b-it': 131072,
+            'google/gemma-3n-e4b-it:free': 131072,
+            'gryphe/mythomax-l2-13b': 4096,
+            'inception/mercury': 32768,
+            'inception/mercury-coder': 32768,
+            'infermatic/mn-inferor-12b': 8192,
+            'inflection/inflection-3-pi': 128000,
+            'inflection/inflection-3-productivity': 128000,
+            'liquid/lfm-3b': 8192,
+            'liquid/lfm-40b': 8192,
+            'liquid/lfm-7b': 8192,
+            'mancer/weaver': 8192,
+            'meta-llama/llama-3-70b-instruct': 8192,
+            'meta-llama/llama-3-8b-instruct': 8192,
+            'meta-llama/llama-3.1-405b': 131072,
+            'meta-llama/llama-3.1-405b-instruct': 131072,
+            'meta-llama/llama-3.1-405b-instruct:free': 131072,
+            'meta-llama/llama-3.1-70b-instruct': 131072,
+            'meta-llama/llama-3.1-8b-instruct': 131072,
+            'meta-llama/llama-3.2-11b-vision-instruct': 131072,
+            'meta-llama/llama-3.2-11b-vision-instruct:free': 131072,
+            'meta-llama/llama-3.2-1b-instruct': 131072,
+            'meta-llama/llama-3.2-3b-instruct': 131072,
+            'meta-llama/llama-3.2-3b-instruct:free': 131072,
+            'meta-llama/llama-3.2-90b-vision-instruct': 131072,
+            'meta-llama/llama-3.3-70b-instruct': 131072,
+            'meta-llama/llama-3.3-70b-instruct:free': 131072,
+            'meta-llama/llama-4-maverick': 131072,
+            'meta-llama/llama-4-scout': 131072,
+            'meta-llama/llama-guard-2-8b': 8192,
+            'meta-llama/llama-guard-3-8b': 131072,
+            'meta-llama/llama-guard-4-12b': 131072,
+            'microsoft/mai-ds-r1': 32768,
+            'microsoft/mai-ds-r1:free': 32768,
+            'microsoft/phi-3-medium-128k-instruct': 131072,
+            'microsoft/phi-3-mini-128k-instruct': 131072,
+            'microsoft/phi-3.5-mini-128k-instruct': 131072,
+            'microsoft/phi-4': 131072,
+            'microsoft/phi-4-multimodal-instruct': 131072,
+            'microsoft/phi-4-reasoning-plus': 131072,
+            'microsoft/wizardlm-2-8x22b': 65536,
+            'minimax/minimax-01': 200000,
+            'minimax/minimax-m1': 200000,
+            'mistralai/codestral-2501': 32768,
+            'mistralai/codestral-2508': 32768,
+            'mistralai/devstral-medium': 32768,
+            'mistralai/devstral-small': 32768,
+            'mistralai/devstral-small-2505': 32768,
+            'mistralai/devstral-small-2505:free': 32768,
+            'mistralai/magistral-medium-2506': 32768,
+            'mistralai/magistral-medium-2506:thinking': 32768,
+            'mistralai/magistral-small-2506': 32768,
+            'mistralai/ministral-3b': 32768,
+            'mistralai/ministral-8b': 32768,
+            'mistralai/mistral-7b-instruct': 32768,
+            'mistralai/mistral-7b-instruct-v0.1': 8192,
+            'mistralai/mistral-7b-instruct-v0.2': 32768,
+            'mistralai/mistral-7b-instruct-v0.3': 32768,
+            'mistralai/mistral-7b-instruct:free': 32768,
+            'mistralai/mistral-large': 32768,
+            'mistralai/mistral-large-2407': 128000,
+            'mistralai/mistral-large-2411': 128000,
+            'mistralai/mistral-medium-3': 32768,
+            'mistralai/mistral-nemo': 128000,
+            'mistralai/mistral-nemo:free': 128000,
+            'mistralai/mistral-saba': 32768,
+            'mistralai/mistral-small': 32768,
+            'mistralai/mistral-small-24b-instruct-2501': 32768,
+            'mistralai/mistral-small-24b-instruct-2501:free': 32768,
+            'mistralai/mistral-small-3.1-24b-instruct': 32768,
+            'mistralai/mistral-small-3.1-24b-instruct:free': 32768,
+            'mistralai/mistral-small-3.2-24b-instruct': 32768,
+            'mistralai/mistral-small-3.2-24b-instruct:free': 32768,
+            'mistralai/mistral-tiny': 32768,
+            'mistralai/mixtral-8x22b-instruct': 65536,
+            'mistralai/mixtral-8x7b-instruct': 32768,
+            'mistralai/pixtral-12b': 128000,
+            'mistralai/pixtral-large-2411': 128000,
+            'moonshotai/kimi-dev-72b:free': 200000,
+            'moonshotai/kimi-k2': 200000,
+            'moonshotai/kimi-k2:free': 200000,
+            'moonshotai/kimi-vl-a3b-thinking': 200000,
+            'moonshotai/kimi-vl-a3b-thinking:free': 200000,
+            'morph/morph-v3-fast': 8192,
+            'morph/morph-v3-large': 8192,
+            'neversleep/llama-3-lumimaid-70b': 8192,
+            'neversleep/llama-3.1-lumimaid-8b': 131072,
+            'neversleep/noromaid-20b': 32768,
+            'nousresearch/deephermes-3-llama-3-8b-preview:free': 8192,
+            'nousresearch/deephermes-3-mistral-24b-preview': 32768,
+            'nousresearch/hermes-2-pro-llama-3-8b': 8192,
+            'nousresearch/hermes-3-llama-3.1-405b': 131072,
+            'nousresearch/hermes-3-llama-3.1-70b': 131072,
+            'nousresearch/nous-hermes-2-mixtral-8x7b-dpo': 32768,
+            'nvidia/llama-3.1-nemotron-70b-instruct': 131072,
+            'nvidia/llama-3.1-nemotron-ultra-253b-v1': 131072,
+            'nvidia/llama-3.1-nemotron-ultra-253b-v1:free': 131072,
+            'nvidia/llama-3.3-nemotron-super-49b-v1': 131072,
+            'openai/chatgpt-4o-latest': 128000,
+            'openai/codex-mini': 2048,
+            'openai/gpt-3.5-turbo': 4096,
+            'openai/gpt-3.5-turbo-0613': 4096,
+            'openai/gpt-3.5-turbo-16k': 16384,
+            'openai/gpt-3.5-turbo-instruct': 4096,
+            'openai/gpt-4': 8192,
+            'openai/gpt-4-0314': 8192,
+            'openai/gpt-4-1106-preview': 128000,
+            'openai/gpt-4-turbo': 128000,
+            'openai/gpt-4-turbo-preview': 128000,
+            'openai/gpt-4.1': 128000,
+            'openai/gpt-4.1-mini': 128000,
+            'openai/gpt-4.1-nano': 128000,
+            'openai/gpt-4o': 128000,
+            'openai/gpt-4o-2024-05-13': 128000,
+            'openai/gpt-4o-2024-08-06': 128000,
+            'openai/gpt-4o-2024-11-20': 128000,
+            'openai/gpt-4o-mini': 128000,
+            'openai/gpt-4o-mini-2024-07-18': 128000,
+            'openai/gpt-4o-mini-search-preview': 128000,
+            'openai/gpt-4o-search-preview': 128000,
+            'openai/gpt-4o:extended': 128000,
+            'openai/gpt-5': 200000,
+            'openai/gpt-5-chat': 200000,
+            'openai/gpt-5-mini': 200000,
+            'openai/gpt-5-nano': 200000,
+            'openai/gpt-oss-120b': 128000,
+            'openai/gpt-oss-20b': 128000,
+            'openai/gpt-oss-20b:free': 128000,
+            'openai/o1': 128000,
+            'openai/o1-mini': 128000,
+            'openai/o1-mini-2024-09-12': 128000,
+            'openai/o1-pro': 128000,
+            'openai/o3': 200000,
+            'openai/o3-mini': 200000,
+            'openai/o3-mini-high': 200000,
+            'openai/o3-pro': 200000,
+            'openai/o4-mini': 128000,
+            'openai/o4-mini-high': 128000,
+            'opengvlab/internvl3-14b': 8192,
+            'openrouter/auto': 8192,
+            'perplexity/r1-1776': 32768,
+            'perplexity/sonar': 32768,
+            'perplexity/sonar-deep-research': 32768,
+            'perplexity/sonar-pro': 32768,
+            'perplexity/sonar-reasoning': 32768,
+            'perplexity/sonar-reasoning-pro': 32768,
+            'pygmalionai/mythalion-13b': 4096,
+            'qwen/qwen-2-72b-instruct': 32768,
+            'qwen/qwen-2.5-72b-instruct': 131072,
+            'qwen/qwen-2.5-72b-instruct:free': 131072,
+            'qwen/qwen-2.5-7b-instruct': 131072,
+            'qwen/qwen-2.5-coder-32b-instruct': 131072,
+            'qwen/qwen-2.5-coder-32b-instruct:free': 131072,
+            'qwen/qwen-2.5-vl-7b-instruct': 131072,
+            'qwen/qwen-max': 32768,
+            'qwen/qwen-plus': 32768,
+            'qwen/qwen-turbo': 8192,
+            'qwen/qwen-vl-max': 32768,
+            'qwen/qwen-vl-plus': 32768,
+            'qwen/qwen2.5-vl-32b-instruct': 131072,
+            'qwen/qwen2.5-vl-32b-instruct:free': 131072,
+            'qwen/qwen2.5-vl-72b-instruct': 131072,
+            'qwen/qwen2.5-vl-72b-instruct:free': 131072,
+            'qwen/qwen3-14b': 32768,
+            'qwen/qwen3-14b:free': 32768,
+            'qwen/qwen3-235b-a22b': 32768,
+            'qwen/qwen3-235b-a22b-2507': 32768,
+            'qwen/qwen3-235b-a22b-thinking-2507': 32768,
+            'qwen/qwen3-235b-a22b:free': 32768,
+            'qwen/qwen3-30b-a3b': 32768,
+            'qwen/qwen3-30b-a3b-instruct-2507': 32768,
+            'qwen/qwen3-30b-a3b:free': 32768,
+            'qwen/qwen3-32b': 32768,
+            'qwen/qwen3-4b:free': 32768,
+            'qwen/qwen3-8b': 32768,
+            'qwen/qwen3-8b:free': 32768,
+            'qwen/qwen3-coder': 32768,
+            'qwen/qwen3-coder:free': 32768,
+            'qwen/qwq-32b': 32768,
+            'qwen/qwq-32b-preview': 32768,
+            'qwen/qwq-32b:free': 32768,
+            'raifle/sorcererlm-8x22b': 65536,
+            'rekaai/reka-flash-3:free': 128000,
+            'sao10k/l3-euryale-70b': 8192,
+            'sao10k/l3-lunaris-8b': 8192,
+            'sao10k/l3.1-euryale-70b': 131072,
+            'sao10k/l3.3-euryale-70b': 131072,
+            'sarvamai/sarvam-m:free': 8192,
+            'scb10x/llama3.1-typhoon2-70b-instruct': 131072,
+            'shisa-ai/shisa-v2-llama3.3-70b': 131072,
+            'shisa-ai/shisa-v2-llama3.3-70b:free': 131072,
+            'sophosympatheia/midnight-rose-70b': 4096,
+            'switchpoint/router': 8192,
+            'tencent/hunyuan-a13b-instruct': 8192,
+            'tencent/hunyuan-a13b-instruct:free': 8192,
+            'thedrummer/anubis-70b-v1.1': 8192,
+            'thedrummer/anubis-pro-105b-v1': 8192,
+            'thedrummer/rocinante-12b': 8192,
+            'thedrummer/skyfall-36b-v2': 8192,
+            'thedrummer/unslopnemo-12b': 128000,
+            'thedrummer/valkyrie-49b-v1': 8192,
+            'thudm/glm-4-32b': 2000000,
+            'thudm/glm-4.1v-9b-thinking': 2000000,
+            'thudm/glm-z1-32b:free': 2000000,
+            'tngtech/deepseek-r1t-chimera': 32768,
+            'tngtech/deepseek-r1t-chimera:free': 32768,
+            'tngtech/deepseek-r1t2-chimera:free': 32768,
+            'undi95/remm-slerp-l2-13b': 4096,
+            'x-ai/grok-2-1212': 128000,
+            'x-ai/grok-2-vision-1212': 128000,
+            'x-ai/grok-3': 128000,
+            'x-ai/grok-3-beta': 128000,
+            'x-ai/grok-3-mini': 128000,
+            'x-ai/grok-3-mini-beta': 128000,
+            'x-ai/grok-4': 128000,
+            'x-ai/grok-vision-beta': 128000,
+            'z-ai/glm-4-32b': 2000000,
+            'z-ai/glm-4.5': 2000000,
+            'z-ai/glm-4.5-air': 2000000,
+            'z-ai/glm-4.5-air:free': 2000000,
+            'llama3.1': 131072,   # Llama 3.1 extended context
+            'llama3.2': 131072,   # Llama 3.2 extended context
+            'llama3.3': 131072,   # Assuming similar to 3.1/3.2
+            'llama3': 8192,       # Llama 3 default
+            'llama2': 4096,       # Llama 2 default
+            'mixtral8x22b': 65536, # Mixtral 8x22B default
+            'mixtral': 32768,     # Mixtral 8x7B default
+            'mistral': 32768,     # Mistral 7B v0.2+ default
+            'gemma3': 131072,     # Gemma 3 with 128K context
+            'gemma2': 8192,       # Gemma 2 default
+            'gemma': 8192,        # Gemma default
+            'phi3': 131072,       # Phi-3 variants often use 128K (mini/medium extended)
+            'phi2': 2048,         # Phi-2 default
+            'phi': 2048,          # Phi default (older)
+            'qwen2.5': 131072,    # Qwen2.5 with 128K
+            'qwen2': 32768,       # Qwen2 default for 7B
+            'qwen': 8192,         # Qwen default
+            'codellama': 16384,   # CodeLlama extended
+            'codegemma': 8192,    # CodeGemma default
+            'deepseek-coder-v2': 131072,  # DeepSeek-Coder V2 with 128K
+            'deepseek-coder': 16384,  # DeepSeek-Coder V1 default
+            'deepseek-v2': 131072,    # DeepSeek-V2 with 128K
+            'deepseek-llm': 4096,     # DeepSeek-LLM default
+            'yi1.5': 32768,       # Yi-1.5 with 32K
+            'yi': 4096,           # Yi base default
+            'command-r': 131072,  # Command-R with 128K
+            'wizardlm2': 32768,   # WizardLM2 (Mistral-based)
+            'wizardlm': 16384,    # WizardLM default
+            'zephyr': 65536,      # Zephyr beta (Mistral-based extended)
+            'vicuna': 2048,       # Vicuna default (up to 16K in some variants)
+            'falcon': 2048,       # Falcon default
+            'starcoder': 8192,    # StarCoder default
+            'stablelm': 4096,     # StableLM default
+            'orca2': 4096,        # Orca 2 default
+            'orca': 4096,         # Orca default
+            'dolphin': 32768,     # Dolphin (often Mistral-based)
+            'openhermes': 8192,   # OpenHermes default
+            'gpt-oss': 128000,  # GPT-OSS with 128K context
+            'gpt-3.5-turbo': 4096, # GPT-3.5 Turbo default
+            'gpt-4': 8192,        # GPT-4 default
+            'grok-2': 128000,
+            'grok-2-1212': 128000,
+            'grok-2-vision-1212': 128000,
+            'grok-3': 128000,
+            'grok-3-fast': 128000,
+            'grok-3-beta': 128000,
+            'grok-3-mini': 128000,
+            'grok-3-mini-beta': 128000,
+            'grok-3-mini-fast': 128000,
+            'grok-4-0709': 128000,
+            'grok-4': 128000,
+            'grok-vision-beta': 128000,
+        }
+        normalized_model_name = model_name.lower().strip()
+        # Sort keys by length in descending order. This ensures that a more specific
+        # name like 'llama3.1' is checked before a less specific name like 'llama3'.
+        sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
+        for base_name in sorted_base_models:
+            if base_name in normalized_model_name:
+                context_size = known_contexts[base_name]
+                ASCIIColors.warning(
+                    f"Using hardcoded context size for model '{model_name}' "
+                    f"based on base name '{base_name}': {context_size}"
+                )
+                return context_size
+        ASCIIColors.warning(f"Context size not found for model '{model_name}' in the hardcoded list.")
         return None

lollms-client 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

Potentially problematic release.

lollms-client 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl