lollms-client 0.32.0__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/groq/__init__.py +2 -3
- lollms_client/llm_bindings/llamacpp/__init__.py +207 -328
- lollms_client/llm_bindings/mistral/__init__.py +57 -26
- lollms_client/llm_bindings/ollama/__init__.py +88 -0
- lollms_client/llm_bindings/open_router/__init__.py +43 -10
- lollms_client/lollms_discussion.py +6 -1
- lollms_client/lollms_llm_binding.py +403 -2
- {lollms_client-0.32.0.dist-info → lollms_client-0.33.0.dist-info}/METADATA +1 -1
- {lollms_client-0.32.0.dist-info → lollms_client-0.33.0.dist-info}/RECORD +13 -13
- {lollms_client-0.32.0.dist-info → lollms_client-0.33.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.32.0.dist-info → lollms_client-0.33.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.32.0.dist-info → lollms_client-0.33.0.dist-info}/top_level.txt +0 -0
|
@@ -11,11 +11,11 @@ import pipmaster as pm
|
|
|
11
11
|
# Ensure the required packages are installed
|
|
12
12
|
pm.ensure_packages(["mistralai", "pillow", "tiktoken"])
|
|
13
13
|
|
|
14
|
-
from mistralai
|
|
15
|
-
from mistralai.models.chat_completion import ChatMessage
|
|
14
|
+
from mistralai import Mistral
|
|
16
15
|
from PIL import Image, ImageDraw
|
|
17
16
|
import tiktoken
|
|
18
17
|
|
|
18
|
+
|
|
19
19
|
BindingName = "MistralBinding"
|
|
20
20
|
|
|
21
21
|
class MistralBinding(LollmsLLMBinding):
|
|
@@ -28,7 +28,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
28
28
|
|
|
29
29
|
def __init__(self,
|
|
30
30
|
model_name: str = "mistral-large-latest",
|
|
31
|
-
|
|
31
|
+
service_key: str|None = None,
|
|
32
32
|
**kwargs
|
|
33
33
|
):
|
|
34
34
|
"""
|
|
@@ -40,13 +40,13 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
40
40
|
"""
|
|
41
41
|
super().__init__(binding_name=BindingName)
|
|
42
42
|
self.model_name = model_name
|
|
43
|
-
self.mistral_api_key =
|
|
43
|
+
self.mistral_api_key = service_key or os.getenv("MISTRAL_API_KEY")
|
|
44
44
|
|
|
45
45
|
if not self.mistral_api_key:
|
|
46
46
|
raise ValueError("Mistral API key is required. Set it via 'mistral_api_key' or MISTRAL_API_KEY env var.")
|
|
47
47
|
|
|
48
48
|
try:
|
|
49
|
-
self.client =
|
|
49
|
+
self.client = Mistral(api_key=self.mistral_api_key)
|
|
50
50
|
except Exception as e:
|
|
51
51
|
ASCIIColors.error(f"Failed to configure Mistral client: {e}")
|
|
52
52
|
self.client = None
|
|
@@ -64,35 +64,67 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
64
64
|
if n_predict is not None: params['max_tokens'] = n_predict
|
|
65
65
|
if seed is not None: params['random_seed'] = seed # Mistral uses 'random_seed'
|
|
66
66
|
return params
|
|
67
|
+
|
|
67
68
|
|
|
68
|
-
def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[
|
|
69
|
-
"""Prepares the message list for the
|
|
69
|
+
def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[Dict[str, any]]:
|
|
70
|
+
"""Prepares the message list for the API from a LollmsDiscussion."""
|
|
70
71
|
history = []
|
|
71
72
|
if discussion.system_prompt:
|
|
72
|
-
|
|
73
|
-
# A lone system message is not ideal. We will prepend it to the first user message.
|
|
74
|
-
# However, for API consistency, we will treat it as a separate message if it exists.
|
|
75
|
-
# The official client will likely handle this.
|
|
76
|
-
history.append(ChatMessage(role="system", content=discussion.system_prompt))
|
|
73
|
+
history.append({"role": "system", "content": discussion.system_prompt})
|
|
77
74
|
|
|
78
75
|
for msg in discussion.get_messages(branch_tip_id):
|
|
79
76
|
role = 'user' if msg.sender_type == "user" else 'assistant'
|
|
80
|
-
# Note:
|
|
77
|
+
# Note: Vision support depends on the specific model being called via OpenRouter.
|
|
78
|
+
# We will not implement it in this generic binding to avoid complexity,
|
|
79
|
+
# as different models might expect different formats.
|
|
81
80
|
if msg.content:
|
|
82
|
-
history.append(
|
|
81
|
+
history.append({'role': role, 'content': msg.content})
|
|
83
82
|
return history
|
|
84
83
|
|
|
85
|
-
def generate_text(self,
|
|
84
|
+
def generate_text(self,
|
|
85
|
+
prompt: str,
|
|
86
|
+
images: Optional[List[str]] = None,
|
|
87
|
+
system_prompt: str = "",
|
|
88
|
+
n_predict: Optional[int] = None,
|
|
89
|
+
stream: Optional[bool] = None,
|
|
90
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
91
|
+
top_k: int = 40, # Ollama default is 40
|
|
92
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
93
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
94
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
95
|
+
seed: Optional[int] = None,
|
|
96
|
+
n_threads: Optional[int] = None,
|
|
97
|
+
ctx_size: int | None = None,
|
|
98
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
99
|
+
split: Optional[bool] = False, # put to true if the prompt is a discussion
|
|
100
|
+
user_keyword: Optional[str] = "!@>user:",
|
|
101
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
102
|
+
**kwargs
|
|
103
|
+
) -> Union[str, dict]:
|
|
86
104
|
"""
|
|
87
|
-
Generate text using
|
|
105
|
+
Generate text using OpenRouter. This is a wrapper around the chat method.
|
|
88
106
|
"""
|
|
89
|
-
temp_discussion = LollmsDiscussion
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
temp_discussion.system_prompt = kwargs.get("system_prompt")
|
|
107
|
+
temp_discussion = LollmsDiscussion(None)
|
|
108
|
+
temp_discussion.add_message(sender="user", content=prompt, images=images or [])
|
|
109
|
+
if system_prompt:
|
|
110
|
+
temp_discussion.system_prompt = system_prompt
|
|
94
111
|
|
|
95
|
-
return self.chat(temp_discussion,
|
|
112
|
+
return self.chat(temp_discussion,
|
|
113
|
+
n_predict=n_predict,
|
|
114
|
+
stream=stream,
|
|
115
|
+
temperature=temperature,
|
|
116
|
+
top_k=top_k,
|
|
117
|
+
top_p=top_p,
|
|
118
|
+
repeat_penalty=repeat_penalty,
|
|
119
|
+
repeat_last_n=repeat_last_n,
|
|
120
|
+
seed=seed,
|
|
121
|
+
n_threads=n_threads,
|
|
122
|
+
ctx_size=ctx_size,
|
|
123
|
+
streaming_callback=streaming_callback,
|
|
124
|
+
split=split,
|
|
125
|
+
user_keyword=user_keyword,
|
|
126
|
+
ai_keyword=ai_keyword,
|
|
127
|
+
**kwargs)
|
|
96
128
|
|
|
97
129
|
def chat(self,
|
|
98
130
|
discussion: LollmsDiscussion,
|
|
@@ -117,7 +149,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
117
149
|
|
|
118
150
|
try:
|
|
119
151
|
if stream:
|
|
120
|
-
response = self.client.
|
|
152
|
+
response = self.client.chat.stream(
|
|
121
153
|
model=self.model_name,
|
|
122
154
|
messages=messages,
|
|
123
155
|
**api_params
|
|
@@ -131,7 +163,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
131
163
|
break
|
|
132
164
|
return full_response_text
|
|
133
165
|
else:
|
|
134
|
-
response = self.client.chat(
|
|
166
|
+
response = self.client.chat.complete(
|
|
135
167
|
model=self.model_name,
|
|
136
168
|
messages=messages,
|
|
137
169
|
**api_params
|
|
@@ -201,7 +233,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
201
233
|
return []
|
|
202
234
|
try:
|
|
203
235
|
ASCIIColors.debug("Listing Mistral models...")
|
|
204
|
-
models = self.client.
|
|
236
|
+
models = self.client.models.list()
|
|
205
237
|
model_info_list = []
|
|
206
238
|
for m in models.data:
|
|
207
239
|
model_info_list.append({
|
|
@@ -264,7 +296,6 @@ if __name__ == '__main__':
|
|
|
264
296
|
ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
|
|
265
297
|
full_streamed_text = ""
|
|
266
298
|
def stream_callback(chunk: str, msg_type: int):
|
|
267
|
-
nonlocal full_streamed_text
|
|
268
299
|
ASCIIColors.green(chunk, end="", flush=True)
|
|
269
300
|
full_streamed_text += chunk
|
|
270
301
|
return True
|
|
@@ -598,6 +598,94 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
598
598
|
ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
|
|
599
599
|
return True
|
|
600
600
|
|
|
601
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
602
|
+
"""
|
|
603
|
+
Retrieves the context size for an Ollama model.
|
|
604
|
+
|
|
605
|
+
The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
|
|
606
|
+
otherwise it falls back to the model's default context length from its architecture details.
|
|
607
|
+
As a final failsafe, uses a hardcoded list of known popular models' context lengths.
|
|
608
|
+
"""
|
|
609
|
+
if model_name is None:
|
|
610
|
+
model_name = self.model_name
|
|
611
|
+
|
|
612
|
+
try:
|
|
613
|
+
info = ollama.show(model_name)
|
|
614
|
+
|
|
615
|
+
# Parse num_ctx from the 'parameters' string (e.g., "PARAMETER num_ctx 4096")
|
|
616
|
+
parameters = info.get('parameters', '')
|
|
617
|
+
num_ctx = None
|
|
618
|
+
for param in parameters.split('\n'):
|
|
619
|
+
if param.strip().startswith('num_ctx'):
|
|
620
|
+
num_ctx = int(param.split()[1])
|
|
621
|
+
break
|
|
622
|
+
|
|
623
|
+
if num_ctx is not None:
|
|
624
|
+
return num_ctx
|
|
625
|
+
|
|
626
|
+
# Fall back to model_info context_length (e.g., 'llama.context_length')
|
|
627
|
+
model_info = info.get('model_info', {})
|
|
628
|
+
arch = model_info.get('general.architecture', '')
|
|
629
|
+
context_key = f'{arch}.context_length' if arch else 'general.context_length'
|
|
630
|
+
context_length = model_info.get(context_key)
|
|
631
|
+
|
|
632
|
+
if context_length is not None:
|
|
633
|
+
return int(context_length)
|
|
634
|
+
|
|
635
|
+
except Exception as e:
|
|
636
|
+
ASCIIColors.warning(f"Error fetching model info: {str(e)}")
|
|
637
|
+
|
|
638
|
+
# Failsafe: Hardcoded context sizes for popular Ollama models
|
|
639
|
+
known_contexts = {
|
|
640
|
+
'llama2': 4096, # Llama 2 default
|
|
641
|
+
'llama3': 8192, # Llama 3 default
|
|
642
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
643
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
644
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
645
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
646
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
647
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
648
|
+
'gemma': 8192, # Gemma default
|
|
649
|
+
'gemma2': 8192, # Gemma 2 default
|
|
650
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
651
|
+
'phi': 2048, # Phi default (older)
|
|
652
|
+
'phi2': 2048, # Phi-2 default
|
|
653
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
654
|
+
'qwen': 8192, # Qwen default
|
|
655
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
656
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
657
|
+
'codellama': 16384, # CodeLlama extended
|
|
658
|
+
'codegemma': 8192, # CodeGemma default
|
|
659
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
660
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
661
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
662
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
663
|
+
'yi': 4096, # Yi base default
|
|
664
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
665
|
+
'command-r': 131072, # Command-R with 128K
|
|
666
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
667
|
+
'wizardlm': 16384, # WizardLM default
|
|
668
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
669
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
670
|
+
'falcon': 2048, # Falcon default
|
|
671
|
+
'starcoder': 8192, # StarCoder default
|
|
672
|
+
'stablelm': 4096, # StableLM default
|
|
673
|
+
'orca': 4096, # Orca default
|
|
674
|
+
'orca2': 4096, # Orca 2 default
|
|
675
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
676
|
+
'openhermes': 8192, # OpenHermes default
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
680
|
+
base_name = model_name.split(':')[0].lower().strip()
|
|
681
|
+
|
|
682
|
+
if base_name in known_contexts:
|
|
683
|
+
ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
|
|
684
|
+
return known_contexts[base_name]
|
|
685
|
+
|
|
686
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}'")
|
|
687
|
+
return None
|
|
688
|
+
|
|
601
689
|
if __name__ == '__main__':
|
|
602
690
|
global full_streamed_text
|
|
603
691
|
# Example Usage (requires an Ollama server running)
|
|
@@ -29,7 +29,7 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
29
29
|
|
|
30
30
|
def __init__(self,
|
|
31
31
|
model_name: str = "google/gemini-flash-1.5", # A good, fast default
|
|
32
|
-
|
|
32
|
+
service_key: str|None = None,
|
|
33
33
|
**kwargs
|
|
34
34
|
):
|
|
35
35
|
"""
|
|
@@ -37,11 +37,11 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
39
|
model_name (str): The name of the model to use from OpenRouter (e.g., 'anthropic/claude-3-haiku-20240307').
|
|
40
|
-
|
|
40
|
+
service_key (str): The API key for the OpenRouter service.
|
|
41
41
|
"""
|
|
42
42
|
super().__init__(binding_name=BindingName)
|
|
43
43
|
self.model_name = model_name
|
|
44
|
-
self.api_key =
|
|
44
|
+
self.api_key = service_key or os.getenv("OPENROUTER_API_KEY")
|
|
45
45
|
|
|
46
46
|
if not self.api_key:
|
|
47
47
|
raise ValueError("OpenRouter API key is required. Set it via 'open_router_api_key' or OPENROUTER_API_KEY env var.")
|
|
@@ -84,17 +84,50 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
84
84
|
history.append({'role': role, 'content': msg.content})
|
|
85
85
|
return history
|
|
86
86
|
|
|
87
|
-
def generate_text(self,
|
|
87
|
+
def generate_text(self,
|
|
88
|
+
prompt: str,
|
|
89
|
+
images: Optional[List[str]] = None,
|
|
90
|
+
system_prompt: str = "",
|
|
91
|
+
n_predict: Optional[int] = None,
|
|
92
|
+
stream: Optional[bool] = None,
|
|
93
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
94
|
+
top_k: int = 40, # Ollama default is 40
|
|
95
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
96
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
97
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
98
|
+
seed: Optional[int] = None,
|
|
99
|
+
n_threads: Optional[int] = None,
|
|
100
|
+
ctx_size: int | None = None,
|
|
101
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
102
|
+
split: Optional[bool] = False, # put to true if the prompt is a discussion
|
|
103
|
+
user_keyword: Optional[str] = "!@>user:",
|
|
104
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
105
|
+
**kwargs
|
|
106
|
+
) -> Union[str, dict]:
|
|
88
107
|
"""
|
|
89
108
|
Generate text using OpenRouter. This is a wrapper around the chat method.
|
|
90
109
|
"""
|
|
91
|
-
temp_discussion = LollmsDiscussion
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
temp_discussion.system_prompt = kwargs.get("system_prompt")
|
|
110
|
+
temp_discussion = LollmsDiscussion(None)
|
|
111
|
+
temp_discussion.add_message(sender="user", content=prompt, images=images or [])
|
|
112
|
+
if system_prompt:
|
|
113
|
+
temp_discussion.system_prompt = system_prompt
|
|
96
114
|
|
|
97
|
-
return self.chat(temp_discussion,
|
|
115
|
+
return self.chat(temp_discussion,
|
|
116
|
+
n_predict=n_predict,
|
|
117
|
+
stream=stream,
|
|
118
|
+
temperature=temperature,
|
|
119
|
+
top_k=top_k,
|
|
120
|
+
top_p=top_p,
|
|
121
|
+
repeat_penalty=repeat_penalty,
|
|
122
|
+
repeat_last_n=repeat_last_n,
|
|
123
|
+
seed=seed,
|
|
124
|
+
n_threads=n_threads,
|
|
125
|
+
ctx_size=ctx_size,
|
|
126
|
+
streaming_callback=streaming_callback,
|
|
127
|
+
split=split,
|
|
128
|
+
user_keyword=user_keyword,
|
|
129
|
+
ai_keyword=ai_keyword,
|
|
130
|
+
**kwargs)
|
|
98
131
|
|
|
99
132
|
def chat(self,
|
|
100
133
|
discussion: LollmsDiscussion,
|
|
@@ -1835,4 +1835,9 @@ class LollmsDiscussion:
|
|
|
1835
1835
|
|
|
1836
1836
|
del self.images[index]
|
|
1837
1837
|
del self.active_images[index]
|
|
1838
|
-
self.touch()
|
|
1838
|
+
self.touch()
|
|
1839
|
+
|
|
1840
|
+
@property
|
|
1841
|
+
def system_prompt(self) -> str:
|
|
1842
|
+
"""Returns the system prompt for this discussion."""
|
|
1843
|
+
return self._system_prompt
|