lollms-client 0.32.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -11,11 +11,11 @@ import pipmaster as pm
11
11
  # Ensure the required packages are installed
12
12
  pm.ensure_packages(["mistralai", "pillow", "tiktoken"])
13
13
 
14
- from mistralai.client import MistralClient
15
- from mistralai.models.chat_completion import ChatMessage
14
+ from mistralai import Mistral
16
15
  from PIL import Image, ImageDraw
17
16
  import tiktoken
18
17
 
18
+
19
19
  BindingName = "MistralBinding"
20
20
 
21
21
  class MistralBinding(LollmsLLMBinding):
@@ -28,7 +28,7 @@ class MistralBinding(LollmsLLMBinding):
28
28
 
29
29
  def __init__(self,
30
30
  model_name: str = "mistral-large-latest",
31
- mistral_api_key: str = None,
31
+ service_key: str|None = None,
32
32
  **kwargs
33
33
  ):
34
34
  """
@@ -40,13 +40,13 @@ class MistralBinding(LollmsLLMBinding):
40
40
  """
41
41
  super().__init__(binding_name=BindingName)
42
42
  self.model_name = model_name
43
- self.mistral_api_key = mistral_api_key or os.getenv("MISTRAL_API_KEY")
43
+ self.mistral_api_key = service_key or os.getenv("MISTRAL_API_KEY")
44
44
 
45
45
  if not self.mistral_api_key:
46
46
  raise ValueError("Mistral API key is required. Set it via 'mistral_api_key' or MISTRAL_API_KEY env var.")
47
47
 
48
48
  try:
49
- self.client = MistralClient(api_key=self.mistral_api_key)
49
+ self.client = Mistral(api_key=self.mistral_api_key)
50
50
  except Exception as e:
51
51
  ASCIIColors.error(f"Failed to configure Mistral client: {e}")
52
52
  self.client = None
@@ -64,35 +64,67 @@ class MistralBinding(LollmsLLMBinding):
64
64
  if n_predict is not None: params['max_tokens'] = n_predict
65
65
  if seed is not None: params['random_seed'] = seed # Mistral uses 'random_seed'
66
66
  return params
67
+
67
68
 
68
- def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[ChatMessage]:
69
- """Prepares the message list for the Mistral API from a LollmsDiscussion."""
69
+ def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[Dict[str, any]]:
70
+ """Prepares the message list for the API from a LollmsDiscussion."""
70
71
  history = []
71
72
  if discussion.system_prompt:
72
- # Mistral prefers the system prompt as the first message with a user/assistant turn.
73
- # A lone system message is not ideal. We will prepend it to the first user message.
74
- # However, for API consistency, we will treat it as a separate message if it exists.
75
- # The official client will likely handle this.
76
- history.append(ChatMessage(role="system", content=discussion.system_prompt))
73
+ history.append({"role": "system", "content": discussion.system_prompt})
77
74
 
78
75
  for msg in discussion.get_messages(branch_tip_id):
79
76
  role = 'user' if msg.sender_type == "user" else 'assistant'
80
- # Note: Mistral API currently does not support image inputs via the chat endpoint.
77
+ # Note: Vision support depends on the specific model being called via OpenRouter.
78
+ # We will not implement it in this generic binding to avoid complexity,
79
+ # as different models might expect different formats.
81
80
  if msg.content:
82
- history.append(ChatMessage(role=role, content=msg.content))
81
+ history.append({'role': role, 'content': msg.content})
83
82
  return history
84
83
 
85
- def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
84
+ def generate_text(self,
85
+ prompt: str,
86
+ images: Optional[List[str]] = None,
87
+ system_prompt: str = "",
88
+ n_predict: Optional[int] = None,
89
+ stream: Optional[bool] = None,
90
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
91
+ top_k: int = 40, # Ollama default is 40
92
+ top_p: float = 0.9, # Ollama default is 0.9
93
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
94
+ repeat_last_n: int = 64, # Ollama default is 64
95
+ seed: Optional[int] = None,
96
+ n_threads: Optional[int] = None,
97
+ ctx_size: int | None = None,
98
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
99
+ split: Optional[bool] = False, # put to true if the prompt is a discussion
100
+ user_keyword: Optional[str] = "!@>user:",
101
+ ai_keyword: Optional[str] = "!@>assistant:",
102
+ **kwargs
103
+ ) -> Union[str, dict]:
86
104
  """
87
- Generate text using Mistral. This is a wrapper around the chat method.
105
+ Generate text using OpenRouter. This is a wrapper around the chat method.
88
106
  """
89
- temp_discussion = LollmsDiscussion.from_messages([
90
- LollmsMessage.new_message(sender_type="user", content=prompt)
91
- ])
92
- if kwargs.get("system_prompt"):
93
- temp_discussion.system_prompt = kwargs.get("system_prompt")
107
+ temp_discussion = LollmsDiscussion(None)
108
+ temp_discussion.add_message(sender="user", content=prompt, images=images or [])
109
+ if system_prompt:
110
+ temp_discussion.system_prompt = system_prompt
94
111
 
95
- return self.chat(temp_discussion, **kwargs)
112
+ return self.chat(temp_discussion,
113
+ n_predict=n_predict,
114
+ stream=stream,
115
+ temperature=temperature,
116
+ top_k=top_k,
117
+ top_p=top_p,
118
+ repeat_penalty=repeat_penalty,
119
+ repeat_last_n=repeat_last_n,
120
+ seed=seed,
121
+ n_threads=n_threads,
122
+ ctx_size=ctx_size,
123
+ streaming_callback=streaming_callback,
124
+ split=split,
125
+ user_keyword=user_keyword,
126
+ ai_keyword=ai_keyword,
127
+ **kwargs)
96
128
 
97
129
  def chat(self,
98
130
  discussion: LollmsDiscussion,
@@ -117,7 +149,7 @@ class MistralBinding(LollmsLLMBinding):
117
149
 
118
150
  try:
119
151
  if stream:
120
- response = self.client.chat_stream(
152
+ response = self.client.chat.stream(
121
153
  model=self.model_name,
122
154
  messages=messages,
123
155
  **api_params
@@ -131,7 +163,7 @@ class MistralBinding(LollmsLLMBinding):
131
163
  break
132
164
  return full_response_text
133
165
  else:
134
- response = self.client.chat(
166
+ response = self.client.chat.complete(
135
167
  model=self.model_name,
136
168
  messages=messages,
137
169
  **api_params
@@ -201,7 +233,7 @@ class MistralBinding(LollmsLLMBinding):
201
233
  return []
202
234
  try:
203
235
  ASCIIColors.debug("Listing Mistral models...")
204
- models = self.client.list_models()
236
+ models = self.client.models.list()
205
237
  model_info_list = []
206
238
  for m in models.data:
207
239
  model_info_list.append({
@@ -264,7 +296,6 @@ if __name__ == '__main__':
264
296
  ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
265
297
  full_streamed_text = ""
266
298
  def stream_callback(chunk: str, msg_type: int):
267
- nonlocal full_streamed_text
268
299
  ASCIIColors.green(chunk, end="", flush=True)
269
300
  full_streamed_text += chunk
270
301
  return True
@@ -598,6 +598,94 @@ class OllamaBinding(LollmsLLMBinding):
598
598
  ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
599
599
  return True
600
600
 
601
+ def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
602
+ """
603
+ Retrieves the context size for an Ollama model.
604
+
605
+ The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
606
+ otherwise it falls back to the model's default context length from its architecture details.
607
+ As a final failsafe, uses a hardcoded list of known popular models' context lengths.
608
+ """
609
+ if model_name is None:
610
+ model_name = self.model_name
611
+
612
+ try:
613
+ info = ollama.show(model_name)
614
+
615
+ # Parse num_ctx from the 'parameters' string (e.g., "PARAMETER num_ctx 4096")
616
+ parameters = info.get('parameters', '')
617
+ num_ctx = None
618
+ for param in parameters.split('\n'):
619
+ if param.strip().startswith('num_ctx'):
620
+ num_ctx = int(param.split()[1])
621
+ break
622
+
623
+ if num_ctx is not None:
624
+ return num_ctx
625
+
626
+ # Fall back to model_info context_length (e.g., 'llama.context_length')
627
+ model_info = info.get('model_info', {})
628
+ arch = model_info.get('general.architecture', '')
629
+ context_key = f'{arch}.context_length' if arch else 'general.context_length'
630
+ context_length = model_info.get(context_key)
631
+
632
+ if context_length is not None:
633
+ return int(context_length)
634
+
635
+ except Exception as e:
636
+ ASCIIColors.warning(f"Error fetching model info: {str(e)}")
637
+
638
+ # Failsafe: Hardcoded context sizes for popular Ollama models
639
+ known_contexts = {
640
+ 'llama2': 4096, # Llama 2 default
641
+ 'llama3': 8192, # Llama 3 default
642
+ 'llama3.1': 131072, # Llama 3.1 extended context
643
+ 'llama3.2': 131072, # Llama 3.2 extended context
644
+ 'llama3.3': 131072, # Assuming similar to 3.1/3.2
645
+ 'mistral': 32768, # Mistral 7B v0.2+ default
646
+ 'mixtral': 32768, # Mixtral 8x7B default
647
+ 'mixtral8x22b': 65536, # Mixtral 8x22B default
648
+ 'gemma': 8192, # Gemma default
649
+ 'gemma2': 8192, # Gemma 2 default
650
+ 'gemma3': 131072, # Gemma 3 with 128K context
651
+ 'phi': 2048, # Phi default (older)
652
+ 'phi2': 2048, # Phi-2 default
653
+ 'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
654
+ 'qwen': 8192, # Qwen default
655
+ 'qwen2': 32768, # Qwen2 default for 7B
656
+ 'qwen2.5': 131072, # Qwen2.5 with 128K
657
+ 'codellama': 16384, # CodeLlama extended
658
+ 'codegemma': 8192, # CodeGemma default
659
+ 'deepseek-coder': 16384, # DeepSeek-Coder V1 default
660
+ 'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
661
+ 'deepseek-llm': 4096, # DeepSeek-LLM default
662
+ 'deepseek-v2': 131072, # DeepSeek-V2 with 128K
663
+ 'yi': 4096, # Yi base default
664
+ 'yi1.5': 32768, # Yi-1.5 with 32K
665
+ 'command-r': 131072, # Command-R with 128K
666
+ 'vicuna': 2048, # Vicuna default (up to 16K in some variants)
667
+ 'wizardlm': 16384, # WizardLM default
668
+ 'wizardlm2': 32768, # WizardLM2 (Mistral-based)
669
+ 'zephyr': 65536, # Zephyr beta (Mistral-based extended)
670
+ 'falcon': 2048, # Falcon default
671
+ 'starcoder': 8192, # StarCoder default
672
+ 'stablelm': 4096, # StableLM default
673
+ 'orca': 4096, # Orca default
674
+ 'orca2': 4096, # Orca 2 default
675
+ 'dolphin': 32768, # Dolphin (often Mistral-based)
676
+ 'openhermes': 8192, # OpenHermes default
677
+ }
678
+
679
+ # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
680
+ base_name = model_name.split(':')[0].lower().strip()
681
+
682
+ if base_name in known_contexts:
683
+ ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
684
+ return known_contexts[base_name]
685
+
686
+ ASCIIColors.warning(f"Context size not found for model '{model_name}'")
687
+ return None
688
+
601
689
  if __name__ == '__main__':
602
690
  global full_streamed_text
603
691
  # Example Usage (requires an Ollama server running)
@@ -29,7 +29,7 @@ class OpenRouterBinding(LollmsLLMBinding):
29
29
 
30
30
  def __init__(self,
31
31
  model_name: str = "google/gemini-flash-1.5", # A good, fast default
32
- open_router_api_key: str = None,
32
+ service_key: str|None = None,
33
33
  **kwargs
34
34
  ):
35
35
  """
@@ -37,11 +37,11 @@ class OpenRouterBinding(LollmsLLMBinding):
37
37
 
38
38
  Args:
39
39
  model_name (str): The name of the model to use from OpenRouter (e.g., 'anthropic/claude-3-haiku-20240307').
40
- open_router_api_key (str): The API key for the OpenRouter service.
40
+ service_key (str): The API key for the OpenRouter service.
41
41
  """
42
42
  super().__init__(binding_name=BindingName)
43
43
  self.model_name = model_name
44
- self.api_key = open_router_api_key or os.getenv("OPENROUTER_API_KEY")
44
+ self.api_key = service_key or os.getenv("OPENROUTER_API_KEY")
45
45
 
46
46
  if not self.api_key:
47
47
  raise ValueError("OpenRouter API key is required. Set it via 'open_router_api_key' or OPENROUTER_API_KEY env var.")
@@ -84,17 +84,50 @@ class OpenRouterBinding(LollmsLLMBinding):
84
84
  history.append({'role': role, 'content': msg.content})
85
85
  return history
86
86
 
87
- def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
87
+ def generate_text(self,
88
+ prompt: str,
89
+ images: Optional[List[str]] = None,
90
+ system_prompt: str = "",
91
+ n_predict: Optional[int] = None,
92
+ stream: Optional[bool] = None,
93
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
94
+ top_k: int = 40, # Ollama default is 40
95
+ top_p: float = 0.9, # Ollama default is 0.9
96
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
97
+ repeat_last_n: int = 64, # Ollama default is 64
98
+ seed: Optional[int] = None,
99
+ n_threads: Optional[int] = None,
100
+ ctx_size: int | None = None,
101
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
102
+ split: Optional[bool] = False, # put to true if the prompt is a discussion
103
+ user_keyword: Optional[str] = "!@>user:",
104
+ ai_keyword: Optional[str] = "!@>assistant:",
105
+ **kwargs
106
+ ) -> Union[str, dict]:
88
107
  """
89
108
  Generate text using OpenRouter. This is a wrapper around the chat method.
90
109
  """
91
- temp_discussion = LollmsDiscussion.from_messages([
92
- LollmsMessage.new_message(sender_type="user", content=prompt)
93
- ])
94
- if kwargs.get("system_prompt"):
95
- temp_discussion.system_prompt = kwargs.get("system_prompt")
110
+ temp_discussion = LollmsDiscussion(None)
111
+ temp_discussion.add_message(sender="user", content=prompt, images=images or [])
112
+ if system_prompt:
113
+ temp_discussion.system_prompt = system_prompt
96
114
 
97
- return self.chat(temp_discussion, **kwargs)
115
+ return self.chat(temp_discussion,
116
+ n_predict=n_predict,
117
+ stream=stream,
118
+ temperature=temperature,
119
+ top_k=top_k,
120
+ top_p=top_p,
121
+ repeat_penalty=repeat_penalty,
122
+ repeat_last_n=repeat_last_n,
123
+ seed=seed,
124
+ n_threads=n_threads,
125
+ ctx_size=ctx_size,
126
+ streaming_callback=streaming_callback,
127
+ split=split,
128
+ user_keyword=user_keyword,
129
+ ai_keyword=ai_keyword,
130
+ **kwargs)
98
131
 
99
132
  def chat(self,
100
133
  discussion: LollmsDiscussion,
@@ -1835,4 +1835,9 @@ class LollmsDiscussion:
1835
1835
 
1836
1836
  del self.images[index]
1837
1837
  del self.active_images[index]
1838
- self.touch()
1838
+ self.touch()
1839
+
1840
+ @property
1841
+ def system_prompt(self) -> str:
1842
+ """Returns the system prompt for this discussion."""
1843
+ return self._system_prompt