lollms-client 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/groq/__init__.py +2 -3
- lollms_client/llm_bindings/llamacpp/__init__.py +192 -384
- lollms_client/llm_bindings/mistral/__init__.py +57 -26
- lollms_client/llm_bindings/open_router/__init__.py +43 -10
- lollms_client/lollms_discussion.py +6 -1
- lollms_client/lollms_llm_binding.py +403 -2
- {lollms_client-0.32.1.dist-info → lollms_client-0.33.0.dist-info}/METADATA +1 -1
- {lollms_client-0.32.1.dist-info → lollms_client-0.33.0.dist-info}/RECORD +12 -12
- {lollms_client-0.32.1.dist-info → lollms_client-0.33.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.32.1.dist-info → lollms_client-0.33.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.32.1.dist-info → lollms_client-0.33.0.dist-info}/top_level.txt +0 -0
|
@@ -11,11 +11,11 @@ import pipmaster as pm
|
|
|
11
11
|
# Ensure the required packages are installed
|
|
12
12
|
pm.ensure_packages(["mistralai", "pillow", "tiktoken"])
|
|
13
13
|
|
|
14
|
-
from mistralai
|
|
15
|
-
from mistralai.models.chat_completion import ChatMessage
|
|
14
|
+
from mistralai import Mistral
|
|
16
15
|
from PIL import Image, ImageDraw
|
|
17
16
|
import tiktoken
|
|
18
17
|
|
|
18
|
+
|
|
19
19
|
BindingName = "MistralBinding"
|
|
20
20
|
|
|
21
21
|
class MistralBinding(LollmsLLMBinding):
|
|
@@ -28,7 +28,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
28
28
|
|
|
29
29
|
def __init__(self,
|
|
30
30
|
model_name: str = "mistral-large-latest",
|
|
31
|
-
|
|
31
|
+
service_key: str|None = None,
|
|
32
32
|
**kwargs
|
|
33
33
|
):
|
|
34
34
|
"""
|
|
@@ -40,13 +40,13 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
40
40
|
"""
|
|
41
41
|
super().__init__(binding_name=BindingName)
|
|
42
42
|
self.model_name = model_name
|
|
43
|
-
self.mistral_api_key =
|
|
43
|
+
self.mistral_api_key = service_key or os.getenv("MISTRAL_API_KEY")
|
|
44
44
|
|
|
45
45
|
if not self.mistral_api_key:
|
|
46
46
|
raise ValueError("Mistral API key is required. Set it via 'mistral_api_key' or MISTRAL_API_KEY env var.")
|
|
47
47
|
|
|
48
48
|
try:
|
|
49
|
-
self.client =
|
|
49
|
+
self.client = Mistral(api_key=self.mistral_api_key)
|
|
50
50
|
except Exception as e:
|
|
51
51
|
ASCIIColors.error(f"Failed to configure Mistral client: {e}")
|
|
52
52
|
self.client = None
|
|
@@ -64,35 +64,67 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
64
64
|
if n_predict is not None: params['max_tokens'] = n_predict
|
|
65
65
|
if seed is not None: params['random_seed'] = seed # Mistral uses 'random_seed'
|
|
66
66
|
return params
|
|
67
|
+
|
|
67
68
|
|
|
68
|
-
def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[
|
|
69
|
-
"""Prepares the message list for the
|
|
69
|
+
def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[Dict[str, any]]:
|
|
70
|
+
"""Prepares the message list for the API from a LollmsDiscussion."""
|
|
70
71
|
history = []
|
|
71
72
|
if discussion.system_prompt:
|
|
72
|
-
|
|
73
|
-
# A lone system message is not ideal. We will prepend it to the first user message.
|
|
74
|
-
# However, for API consistency, we will treat it as a separate message if it exists.
|
|
75
|
-
# The official client will likely handle this.
|
|
76
|
-
history.append(ChatMessage(role="system", content=discussion.system_prompt))
|
|
73
|
+
history.append({"role": "system", "content": discussion.system_prompt})
|
|
77
74
|
|
|
78
75
|
for msg in discussion.get_messages(branch_tip_id):
|
|
79
76
|
role = 'user' if msg.sender_type == "user" else 'assistant'
|
|
80
|
-
# Note:
|
|
77
|
+
# Note: Vision support depends on the specific model being called via OpenRouter.
|
|
78
|
+
# We will not implement it in this generic binding to avoid complexity,
|
|
79
|
+
# as different models might expect different formats.
|
|
81
80
|
if msg.content:
|
|
82
|
-
history.append(
|
|
81
|
+
history.append({'role': role, 'content': msg.content})
|
|
83
82
|
return history
|
|
84
83
|
|
|
85
|
-
def generate_text(self,
|
|
84
|
+
def generate_text(self,
|
|
85
|
+
prompt: str,
|
|
86
|
+
images: Optional[List[str]] = None,
|
|
87
|
+
system_prompt: str = "",
|
|
88
|
+
n_predict: Optional[int] = None,
|
|
89
|
+
stream: Optional[bool] = None,
|
|
90
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
91
|
+
top_k: int = 40, # Ollama default is 40
|
|
92
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
93
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
94
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
95
|
+
seed: Optional[int] = None,
|
|
96
|
+
n_threads: Optional[int] = None,
|
|
97
|
+
ctx_size: int | None = None,
|
|
98
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
99
|
+
split: Optional[bool] = False, # put to true if the prompt is a discussion
|
|
100
|
+
user_keyword: Optional[str] = "!@>user:",
|
|
101
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
102
|
+
**kwargs
|
|
103
|
+
) -> Union[str, dict]:
|
|
86
104
|
"""
|
|
87
|
-
Generate text using
|
|
105
|
+
Generate text using OpenRouter. This is a wrapper around the chat method.
|
|
88
106
|
"""
|
|
89
|
-
temp_discussion = LollmsDiscussion
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
temp_discussion.system_prompt = kwargs.get("system_prompt")
|
|
107
|
+
temp_discussion = LollmsDiscussion(None)
|
|
108
|
+
temp_discussion.add_message(sender="user", content=prompt, images=images or [])
|
|
109
|
+
if system_prompt:
|
|
110
|
+
temp_discussion.system_prompt = system_prompt
|
|
94
111
|
|
|
95
|
-
return self.chat(temp_discussion,
|
|
112
|
+
return self.chat(temp_discussion,
|
|
113
|
+
n_predict=n_predict,
|
|
114
|
+
stream=stream,
|
|
115
|
+
temperature=temperature,
|
|
116
|
+
top_k=top_k,
|
|
117
|
+
top_p=top_p,
|
|
118
|
+
repeat_penalty=repeat_penalty,
|
|
119
|
+
repeat_last_n=repeat_last_n,
|
|
120
|
+
seed=seed,
|
|
121
|
+
n_threads=n_threads,
|
|
122
|
+
ctx_size=ctx_size,
|
|
123
|
+
streaming_callback=streaming_callback,
|
|
124
|
+
split=split,
|
|
125
|
+
user_keyword=user_keyword,
|
|
126
|
+
ai_keyword=ai_keyword,
|
|
127
|
+
**kwargs)
|
|
96
128
|
|
|
97
129
|
def chat(self,
|
|
98
130
|
discussion: LollmsDiscussion,
|
|
@@ -117,7 +149,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
117
149
|
|
|
118
150
|
try:
|
|
119
151
|
if stream:
|
|
120
|
-
response = self.client.
|
|
152
|
+
response = self.client.chat.stream(
|
|
121
153
|
model=self.model_name,
|
|
122
154
|
messages=messages,
|
|
123
155
|
**api_params
|
|
@@ -131,7 +163,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
131
163
|
break
|
|
132
164
|
return full_response_text
|
|
133
165
|
else:
|
|
134
|
-
response = self.client.chat(
|
|
166
|
+
response = self.client.chat.complete(
|
|
135
167
|
model=self.model_name,
|
|
136
168
|
messages=messages,
|
|
137
169
|
**api_params
|
|
@@ -201,7 +233,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
201
233
|
return []
|
|
202
234
|
try:
|
|
203
235
|
ASCIIColors.debug("Listing Mistral models...")
|
|
204
|
-
models = self.client.
|
|
236
|
+
models = self.client.models.list()
|
|
205
237
|
model_info_list = []
|
|
206
238
|
for m in models.data:
|
|
207
239
|
model_info_list.append({
|
|
@@ -264,7 +296,6 @@ if __name__ == '__main__':
|
|
|
264
296
|
ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
|
|
265
297
|
full_streamed_text = ""
|
|
266
298
|
def stream_callback(chunk: str, msg_type: int):
|
|
267
|
-
nonlocal full_streamed_text
|
|
268
299
|
ASCIIColors.green(chunk, end="", flush=True)
|
|
269
300
|
full_streamed_text += chunk
|
|
270
301
|
return True
|
|
@@ -29,7 +29,7 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
29
29
|
|
|
30
30
|
def __init__(self,
|
|
31
31
|
model_name: str = "google/gemini-flash-1.5", # A good, fast default
|
|
32
|
-
|
|
32
|
+
service_key: str|None = None,
|
|
33
33
|
**kwargs
|
|
34
34
|
):
|
|
35
35
|
"""
|
|
@@ -37,11 +37,11 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
39
|
model_name (str): The name of the model to use from OpenRouter (e.g., 'anthropic/claude-3-haiku-20240307').
|
|
40
|
-
|
|
40
|
+
service_key (str): The API key for the OpenRouter service.
|
|
41
41
|
"""
|
|
42
42
|
super().__init__(binding_name=BindingName)
|
|
43
43
|
self.model_name = model_name
|
|
44
|
-
self.api_key =
|
|
44
|
+
self.api_key = service_key or os.getenv("OPENROUTER_API_KEY")
|
|
45
45
|
|
|
46
46
|
if not self.api_key:
|
|
47
47
|
raise ValueError("OpenRouter API key is required. Set it via 'open_router_api_key' or OPENROUTER_API_KEY env var.")
|
|
@@ -84,17 +84,50 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
84
84
|
history.append({'role': role, 'content': msg.content})
|
|
85
85
|
return history
|
|
86
86
|
|
|
87
|
-
def generate_text(self,
|
|
87
|
+
def generate_text(self,
|
|
88
|
+
prompt: str,
|
|
89
|
+
images: Optional[List[str]] = None,
|
|
90
|
+
system_prompt: str = "",
|
|
91
|
+
n_predict: Optional[int] = None,
|
|
92
|
+
stream: Optional[bool] = None,
|
|
93
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
94
|
+
top_k: int = 40, # Ollama default is 40
|
|
95
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
96
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
97
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
98
|
+
seed: Optional[int] = None,
|
|
99
|
+
n_threads: Optional[int] = None,
|
|
100
|
+
ctx_size: int | None = None,
|
|
101
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
102
|
+
split: Optional[bool] = False, # put to true if the prompt is a discussion
|
|
103
|
+
user_keyword: Optional[str] = "!@>user:",
|
|
104
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
105
|
+
**kwargs
|
|
106
|
+
) -> Union[str, dict]:
|
|
88
107
|
"""
|
|
89
108
|
Generate text using OpenRouter. This is a wrapper around the chat method.
|
|
90
109
|
"""
|
|
91
|
-
temp_discussion = LollmsDiscussion
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
temp_discussion.system_prompt = kwargs.get("system_prompt")
|
|
110
|
+
temp_discussion = LollmsDiscussion(None)
|
|
111
|
+
temp_discussion.add_message(sender="user", content=prompt, images=images or [])
|
|
112
|
+
if system_prompt:
|
|
113
|
+
temp_discussion.system_prompt = system_prompt
|
|
96
114
|
|
|
97
|
-
return self.chat(temp_discussion,
|
|
115
|
+
return self.chat(temp_discussion,
|
|
116
|
+
n_predict=n_predict,
|
|
117
|
+
stream=stream,
|
|
118
|
+
temperature=temperature,
|
|
119
|
+
top_k=top_k,
|
|
120
|
+
top_p=top_p,
|
|
121
|
+
repeat_penalty=repeat_penalty,
|
|
122
|
+
repeat_last_n=repeat_last_n,
|
|
123
|
+
seed=seed,
|
|
124
|
+
n_threads=n_threads,
|
|
125
|
+
ctx_size=ctx_size,
|
|
126
|
+
streaming_callback=streaming_callback,
|
|
127
|
+
split=split,
|
|
128
|
+
user_keyword=user_keyword,
|
|
129
|
+
ai_keyword=ai_keyword,
|
|
130
|
+
**kwargs)
|
|
98
131
|
|
|
99
132
|
def chat(self,
|
|
100
133
|
discussion: LollmsDiscussion,
|
|
@@ -1835,4 +1835,9 @@ class LollmsDiscussion:
|
|
|
1835
1835
|
|
|
1836
1836
|
del self.images[index]
|
|
1837
1837
|
del self.active_images[index]
|
|
1838
|
-
self.touch()
|
|
1838
|
+
self.touch()
|
|
1839
|
+
|
|
1840
|
+
@property
|
|
1841
|
+
def system_prompt(self) -> str:
|
|
1842
|
+
"""Returns the system prompt for this discussion."""
|
|
1843
|
+
return self._system_prompt
|
|
@@ -154,8 +154,409 @@ class LollmsLLMBinding(ABC):
|
|
|
154
154
|
"""
|
|
155
155
|
pass
|
|
156
156
|
|
|
157
|
-
def get_ctx_size(self, model_name:str
|
|
158
|
-
|
|
157
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
158
|
+
"""
|
|
159
|
+
Retrieves context size for a model from a hardcoded list.
|
|
160
|
+
|
|
161
|
+
This method checks if the model name contains a known base model identifier
|
|
162
|
+
(e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
|
|
163
|
+
as a failsafe when the context size cannot be retrieved directly from the
|
|
164
|
+
Ollama API.
|
|
165
|
+
"""
|
|
166
|
+
if model_name is None:
|
|
167
|
+
model_name = self.model_name
|
|
168
|
+
|
|
169
|
+
# Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
|
|
170
|
+
# should appear, as they will be checked first due to the sorting logic below.
|
|
171
|
+
known_contexts = {
|
|
172
|
+
'agentica-org/deepcoder-14b-preview': 8192,
|
|
173
|
+
'agentica-org/deepcoder-14b-preview:free': 8192,
|
|
174
|
+
'ai21/jamba-large-1.7': 256000,
|
|
175
|
+
'ai21/jamba-mini-1.7': 256000,
|
|
176
|
+
'aion-labs/aion-1.0': 8192,
|
|
177
|
+
'aion-labs/aion-1.0-mini': 8192,
|
|
178
|
+
'aion-labs/aion-rp-llama-3.1-8b': 131072,
|
|
179
|
+
'alfredpros/codellama-7b-instruct-solidity': 16384,
|
|
180
|
+
'alpindale/goliath-120b': 4096,
|
|
181
|
+
'amazon/nova-lite-v1': 32768,
|
|
182
|
+
'amazon/nova-micro-v1': 32768,
|
|
183
|
+
'amazon/nova-pro-v1': 32768,
|
|
184
|
+
'anthracite-org/magnum-v2-72b': 131072,
|
|
185
|
+
'anthracite-org/magnum-v4-72b': 131072,
|
|
186
|
+
'anthropic/claude-3-haiku': 200000,
|
|
187
|
+
'anthropic/claude-3-haiku:beta': 200000,
|
|
188
|
+
'anthropic/claude-3-opus': 200000,
|
|
189
|
+
'anthropic/claude-3-opus:beta': 200000,
|
|
190
|
+
'anthropic/claude-3.5-haiku': 200000,
|
|
191
|
+
'anthropic/claude-3.5-haiku-20241022': 200000,
|
|
192
|
+
'anthropic/claude-3.5-haiku:beta': 200000,
|
|
193
|
+
'anthropic/claude-3.5-sonnet': 200000,
|
|
194
|
+
'anthropic/claude-3.5-sonnet-20240620': 200000,
|
|
195
|
+
'anthropic/claude-3.5-sonnet-20240620:beta': 200000,
|
|
196
|
+
'anthropic/claude-3.5-sonnet:beta': 200000,
|
|
197
|
+
'anthropic/claude-3.7-sonnet': 200000,
|
|
198
|
+
'anthropic/claude-3.7-sonnet:beta': 200000,
|
|
199
|
+
'anthropic/claude-3.7-sonnet:thinking': 200000,
|
|
200
|
+
'anthropic/claude-opus-4': 200000,
|
|
201
|
+
'anthropic/claude-opus-4.1': 200000,
|
|
202
|
+
'anthropic/claude-sonnet-4': 200000,
|
|
203
|
+
'arcee-ai/coder-large': 32768,
|
|
204
|
+
'arcee-ai/maestro-reasoning': 32768,
|
|
205
|
+
'arcee-ai/spotlight': 32768,
|
|
206
|
+
'arcee-ai/virtuoso-large': 32768,
|
|
207
|
+
'arliai/qwq-32b-arliai-rpr-v1': 8192,
|
|
208
|
+
'arliai/qwq-32b-arliai-rpr-v1:free': 8192,
|
|
209
|
+
'baidu/ernie-4.5-300b-a47b': 128000,
|
|
210
|
+
'bytedance/ui-tars-1.5-7b': 8192,
|
|
211
|
+
'cognitivecomputations/dolphin-mistral-24b-venice-edition:free': 32768,
|
|
212
|
+
'cognitivecomputations/dolphin-mixtral-8x22b': 65536,
|
|
213
|
+
'cognitivecomputations/dolphin3.0-mistral-24b': 32768,
|
|
214
|
+
'cognitivecomputations/dolphin3.0-mistral-24b:free': 32768,
|
|
215
|
+
'cognitivecomputations/dolphin3.0-r1-mistral-24b': 32768,
|
|
216
|
+
'cognitivecomputations/dolphin3.0-r1-mistral-24b:free': 32768,
|
|
217
|
+
'cohere/command': 8192,
|
|
218
|
+
'cohere/command-a': 8192,
|
|
219
|
+
'cohere/command-r': 128000,
|
|
220
|
+
'cohere/command-r-03-2024': 128000,
|
|
221
|
+
'cohere/command-r-08-2024': 128000,
|
|
222
|
+
'cohere/command-r-plus': 128000,
|
|
223
|
+
'cohere/command-r-plus-04-2024': 128000,
|
|
224
|
+
'cohere/command-r-plus-08-2024': 128000,
|
|
225
|
+
'cohere/command-r7b-12-2024': 128000,
|
|
226
|
+
'deepseek/deepseek-chat': 32768,
|
|
227
|
+
'deepseek/deepseek-chat-v3-0324': 32768,
|
|
228
|
+
'deepseek/deepseek-chat-v3-0324:free': 32768,
|
|
229
|
+
'deepseek/deepseek-prover-v2': 131072,
|
|
230
|
+
'deepseek/deepseek-r1': 32768,
|
|
231
|
+
'deepseek/deepseek-r1-0528': 32768,
|
|
232
|
+
'deepseek/deepseek-r1-0528-qwen3-8b': 32768,
|
|
233
|
+
'deepseek/deepseek-r1-0528-qwen3-8b:free': 32768,
|
|
234
|
+
'deepseek/deepseek-r1-0528:free': 32768,
|
|
235
|
+
'deepseek/deepseek-r1-distill-llama-70b': 131072,
|
|
236
|
+
'deepseek/deepseek-r1-distill-llama-70b:free': 131072,
|
|
237
|
+
'deepseek/deepseek-r1-distill-llama-8b': 131072,
|
|
238
|
+
'deepseek/deepseek-r1-distill-qwen-1.5b': 32768,
|
|
239
|
+
'deepseek/deepseek-r1-distill-qwen-14b': 32768,
|
|
240
|
+
'deepseek/deepseek-r1-distill-qwen-14b:free': 32768,
|
|
241
|
+
'deepseek/deepseek-r1-distill-qwen-32b': 32768,
|
|
242
|
+
'deepseek/deepseek-r1-distill-qwen-7b': 32768,
|
|
243
|
+
'deepseek/deepseek-r1:free': 32768,
|
|
244
|
+
'deepseek/deepseek-v3-base': 32768,
|
|
245
|
+
'eleutherai/llemma_7b': 8192,
|
|
246
|
+
'featherless/qwerky-72b:free': 8192,
|
|
247
|
+
'google/gemini-2.0-flash-001': 1000000,
|
|
248
|
+
'google/gemini-2.0-flash-exp:free': 1000000,
|
|
249
|
+
'google/gemini-2.0-flash-lite-001': 1000000,
|
|
250
|
+
'google/gemini-2.5-flash': 1000000,
|
|
251
|
+
'google/gemini-2.5-flash-lite': 1000000,
|
|
252
|
+
'google/gemini-2.5-flash-lite-preview-06-17': 1000000,
|
|
253
|
+
'google/gemini-2.5-pro': 2000000,
|
|
254
|
+
'google/gemini-2.5-pro-exp-03-25': 2000000,
|
|
255
|
+
'google/gemini-2.5-pro-preview': 2000000,
|
|
256
|
+
'google/gemini-2.5-pro-preview-05-06': 2000000,
|
|
257
|
+
'google/gemini-flash-1.5': 1000000,
|
|
258
|
+
'google/gemini-flash-1.5-8b': 1000000,
|
|
259
|
+
'google/gemini-pro-1.5': 2000000,
|
|
260
|
+
'google/gemma-2-27b-it': 8192,
|
|
261
|
+
'google/gemma-2-9b-it': 8192,
|
|
262
|
+
'google/gemma-2-9b-it:free': 8192,
|
|
263
|
+
'google/gemma-3-12b-it': 131072,
|
|
264
|
+
'google/gemma-3-12b-it:free': 131072,
|
|
265
|
+
'google/gemma-3-27b-it': 131072,
|
|
266
|
+
'google/gemma-3-27b-it:free': 131072,
|
|
267
|
+
'google/gemma-3-4b-it': 131072,
|
|
268
|
+
'google/gemma-3-4b-it:free': 131072,
|
|
269
|
+
'google/gemma-3n-e2b-it:free': 131072,
|
|
270
|
+
'google/gemma-3n-e4b-it': 131072,
|
|
271
|
+
'google/gemma-3n-e4b-it:free': 131072,
|
|
272
|
+
'gryphe/mythomax-l2-13b': 4096,
|
|
273
|
+
'inception/mercury': 32768,
|
|
274
|
+
'inception/mercury-coder': 32768,
|
|
275
|
+
'infermatic/mn-inferor-12b': 8192,
|
|
276
|
+
'inflection/inflection-3-pi': 128000,
|
|
277
|
+
'inflection/inflection-3-productivity': 128000,
|
|
278
|
+
'liquid/lfm-3b': 8192,
|
|
279
|
+
'liquid/lfm-40b': 8192,
|
|
280
|
+
'liquid/lfm-7b': 8192,
|
|
281
|
+
'mancer/weaver': 8192,
|
|
282
|
+
'meta-llama/llama-3-70b-instruct': 8192,
|
|
283
|
+
'meta-llama/llama-3-8b-instruct': 8192,
|
|
284
|
+
'meta-llama/llama-3.1-405b': 131072,
|
|
285
|
+
'meta-llama/llama-3.1-405b-instruct': 131072,
|
|
286
|
+
'meta-llama/llama-3.1-405b-instruct:free': 131072,
|
|
287
|
+
'meta-llama/llama-3.1-70b-instruct': 131072,
|
|
288
|
+
'meta-llama/llama-3.1-8b-instruct': 131072,
|
|
289
|
+
'meta-llama/llama-3.2-11b-vision-instruct': 131072,
|
|
290
|
+
'meta-llama/llama-3.2-11b-vision-instruct:free': 131072,
|
|
291
|
+
'meta-llama/llama-3.2-1b-instruct': 131072,
|
|
292
|
+
'meta-llama/llama-3.2-3b-instruct': 131072,
|
|
293
|
+
'meta-llama/llama-3.2-3b-instruct:free': 131072,
|
|
294
|
+
'meta-llama/llama-3.2-90b-vision-instruct': 131072,
|
|
295
|
+
'meta-llama/llama-3.3-70b-instruct': 131072,
|
|
296
|
+
'meta-llama/llama-3.3-70b-instruct:free': 131072,
|
|
297
|
+
'meta-llama/llama-4-maverick': 131072,
|
|
298
|
+
'meta-llama/llama-4-scout': 131072,
|
|
299
|
+
'meta-llama/llama-guard-2-8b': 8192,
|
|
300
|
+
'meta-llama/llama-guard-3-8b': 131072,
|
|
301
|
+
'meta-llama/llama-guard-4-12b': 131072,
|
|
302
|
+
'microsoft/mai-ds-r1': 32768,
|
|
303
|
+
'microsoft/mai-ds-r1:free': 32768,
|
|
304
|
+
'microsoft/phi-3-medium-128k-instruct': 131072,
|
|
305
|
+
'microsoft/phi-3-mini-128k-instruct': 131072,
|
|
306
|
+
'microsoft/phi-3.5-mini-128k-instruct': 131072,
|
|
307
|
+
'microsoft/phi-4': 131072,
|
|
308
|
+
'microsoft/phi-4-multimodal-instruct': 131072,
|
|
309
|
+
'microsoft/phi-4-reasoning-plus': 131072,
|
|
310
|
+
'microsoft/wizardlm-2-8x22b': 65536,
|
|
311
|
+
'minimax/minimax-01': 200000,
|
|
312
|
+
'minimax/minimax-m1': 200000,
|
|
313
|
+
'mistralai/codestral-2501': 32768,
|
|
314
|
+
'mistralai/codestral-2508': 32768,
|
|
315
|
+
'mistralai/devstral-medium': 32768,
|
|
316
|
+
'mistralai/devstral-small': 32768,
|
|
317
|
+
'mistralai/devstral-small-2505': 32768,
|
|
318
|
+
'mistralai/devstral-small-2505:free': 32768,
|
|
319
|
+
'mistralai/magistral-medium-2506': 32768,
|
|
320
|
+
'mistralai/magistral-medium-2506:thinking': 32768,
|
|
321
|
+
'mistralai/magistral-small-2506': 32768,
|
|
322
|
+
'mistralai/ministral-3b': 32768,
|
|
323
|
+
'mistralai/ministral-8b': 32768,
|
|
324
|
+
'mistralai/mistral-7b-instruct': 32768,
|
|
325
|
+
'mistralai/mistral-7b-instruct-v0.1': 8192,
|
|
326
|
+
'mistralai/mistral-7b-instruct-v0.2': 32768,
|
|
327
|
+
'mistralai/mistral-7b-instruct-v0.3': 32768,
|
|
328
|
+
'mistralai/mistral-7b-instruct:free': 32768,
|
|
329
|
+
'mistralai/mistral-large': 32768,
|
|
330
|
+
'mistralai/mistral-large-2407': 128000,
|
|
331
|
+
'mistralai/mistral-large-2411': 128000,
|
|
332
|
+
'mistralai/mistral-medium-3': 32768,
|
|
333
|
+
'mistralai/mistral-nemo': 128000,
|
|
334
|
+
'mistralai/mistral-nemo:free': 128000,
|
|
335
|
+
'mistralai/mistral-saba': 32768,
|
|
336
|
+
'mistralai/mistral-small': 32768,
|
|
337
|
+
'mistralai/mistral-small-24b-instruct-2501': 32768,
|
|
338
|
+
'mistralai/mistral-small-24b-instruct-2501:free': 32768,
|
|
339
|
+
'mistralai/mistral-small-3.1-24b-instruct': 32768,
|
|
340
|
+
'mistralai/mistral-small-3.1-24b-instruct:free': 32768,
|
|
341
|
+
'mistralai/mistral-small-3.2-24b-instruct': 32768,
|
|
342
|
+
'mistralai/mistral-small-3.2-24b-instruct:free': 32768,
|
|
343
|
+
'mistralai/mistral-tiny': 32768,
|
|
344
|
+
'mistralai/mixtral-8x22b-instruct': 65536,
|
|
345
|
+
'mistralai/mixtral-8x7b-instruct': 32768,
|
|
346
|
+
'mistralai/pixtral-12b': 128000,
|
|
347
|
+
'mistralai/pixtral-large-2411': 128000,
|
|
348
|
+
'moonshotai/kimi-dev-72b:free': 200000,
|
|
349
|
+
'moonshotai/kimi-k2': 200000,
|
|
350
|
+
'moonshotai/kimi-k2:free': 200000,
|
|
351
|
+
'moonshotai/kimi-vl-a3b-thinking': 200000,
|
|
352
|
+
'moonshotai/kimi-vl-a3b-thinking:free': 200000,
|
|
353
|
+
'morph/morph-v3-fast': 8192,
|
|
354
|
+
'morph/morph-v3-large': 8192,
|
|
355
|
+
'neversleep/llama-3-lumimaid-70b': 8192,
|
|
356
|
+
'neversleep/llama-3.1-lumimaid-8b': 131072,
|
|
357
|
+
'neversleep/noromaid-20b': 32768,
|
|
358
|
+
'nousresearch/deephermes-3-llama-3-8b-preview:free': 8192,
|
|
359
|
+
'nousresearch/deephermes-3-mistral-24b-preview': 32768,
|
|
360
|
+
'nousresearch/hermes-2-pro-llama-3-8b': 8192,
|
|
361
|
+
'nousresearch/hermes-3-llama-3.1-405b': 131072,
|
|
362
|
+
'nousresearch/hermes-3-llama-3.1-70b': 131072,
|
|
363
|
+
'nousresearch/nous-hermes-2-mixtral-8x7b-dpo': 32768,
|
|
364
|
+
'nvidia/llama-3.1-nemotron-70b-instruct': 131072,
|
|
365
|
+
'nvidia/llama-3.1-nemotron-ultra-253b-v1': 131072,
|
|
366
|
+
'nvidia/llama-3.1-nemotron-ultra-253b-v1:free': 131072,
|
|
367
|
+
'nvidia/llama-3.3-nemotron-super-49b-v1': 131072,
|
|
368
|
+
'openai/chatgpt-4o-latest': 128000,
|
|
369
|
+
'openai/codex-mini': 2048,
|
|
370
|
+
'openai/gpt-3.5-turbo': 4096,
|
|
371
|
+
'openai/gpt-3.5-turbo-0613': 4096,
|
|
372
|
+
'openai/gpt-3.5-turbo-16k': 16384,
|
|
373
|
+
'openai/gpt-3.5-turbo-instruct': 4096,
|
|
374
|
+
'openai/gpt-4': 8192,
|
|
375
|
+
'openai/gpt-4-0314': 8192,
|
|
376
|
+
'openai/gpt-4-1106-preview': 128000,
|
|
377
|
+
'openai/gpt-4-turbo': 128000,
|
|
378
|
+
'openai/gpt-4-turbo-preview': 128000,
|
|
379
|
+
'openai/gpt-4.1': 128000,
|
|
380
|
+
'openai/gpt-4.1-mini': 128000,
|
|
381
|
+
'openai/gpt-4.1-nano': 128000,
|
|
382
|
+
'openai/gpt-4o': 128000,
|
|
383
|
+
'openai/gpt-4o-2024-05-13': 128000,
|
|
384
|
+
'openai/gpt-4o-2024-08-06': 128000,
|
|
385
|
+
'openai/gpt-4o-2024-11-20': 128000,
|
|
386
|
+
'openai/gpt-4o-mini': 128000,
|
|
387
|
+
'openai/gpt-4o-mini-2024-07-18': 128000,
|
|
388
|
+
'openai/gpt-4o-mini-search-preview': 128000,
|
|
389
|
+
'openai/gpt-4o-search-preview': 128000,
|
|
390
|
+
'openai/gpt-4o:extended': 128000,
|
|
391
|
+
'openai/gpt-5': 200000,
|
|
392
|
+
'openai/gpt-5-chat': 200000,
|
|
393
|
+
'openai/gpt-5-mini': 200000,
|
|
394
|
+
'openai/gpt-5-nano': 200000,
|
|
395
|
+
'openai/gpt-oss-120b': 128000,
|
|
396
|
+
'openai/gpt-oss-20b': 128000,
|
|
397
|
+
'openai/gpt-oss-20b:free': 128000,
|
|
398
|
+
'openai/o1': 128000,
|
|
399
|
+
'openai/o1-mini': 128000,
|
|
400
|
+
'openai/o1-mini-2024-09-12': 128000,
|
|
401
|
+
'openai/o1-pro': 128000,
|
|
402
|
+
'openai/o3': 200000,
|
|
403
|
+
'openai/o3-mini': 200000,
|
|
404
|
+
'openai/o3-mini-high': 200000,
|
|
405
|
+
'openai/o3-pro': 200000,
|
|
406
|
+
'openai/o4-mini': 128000,
|
|
407
|
+
'openai/o4-mini-high': 128000,
|
|
408
|
+
'opengvlab/internvl3-14b': 8192,
|
|
409
|
+
'openrouter/auto': 8192,
|
|
410
|
+
'perplexity/r1-1776': 32768,
|
|
411
|
+
'perplexity/sonar': 32768,
|
|
412
|
+
'perplexity/sonar-deep-research': 32768,
|
|
413
|
+
'perplexity/sonar-pro': 32768,
|
|
414
|
+
'perplexity/sonar-reasoning': 32768,
|
|
415
|
+
'perplexity/sonar-reasoning-pro': 32768,
|
|
416
|
+
'pygmalionai/mythalion-13b': 4096,
|
|
417
|
+
'qwen/qwen-2-72b-instruct': 32768,
|
|
418
|
+
'qwen/qwen-2.5-72b-instruct': 131072,
|
|
419
|
+
'qwen/qwen-2.5-72b-instruct:free': 131072,
|
|
420
|
+
'qwen/qwen-2.5-7b-instruct': 131072,
|
|
421
|
+
'qwen/qwen-2.5-coder-32b-instruct': 131072,
|
|
422
|
+
'qwen/qwen-2.5-coder-32b-instruct:free': 131072,
|
|
423
|
+
'qwen/qwen-2.5-vl-7b-instruct': 131072,
|
|
424
|
+
'qwen/qwen-max': 32768,
|
|
425
|
+
'qwen/qwen-plus': 32768,
|
|
426
|
+
'qwen/qwen-turbo': 8192,
|
|
427
|
+
'qwen/qwen-vl-max': 32768,
|
|
428
|
+
'qwen/qwen-vl-plus': 32768,
|
|
429
|
+
'qwen/qwen2.5-vl-32b-instruct': 131072,
|
|
430
|
+
'qwen/qwen2.5-vl-32b-instruct:free': 131072,
|
|
431
|
+
'qwen/qwen2.5-vl-72b-instruct': 131072,
|
|
432
|
+
'qwen/qwen2.5-vl-72b-instruct:free': 131072,
|
|
433
|
+
'qwen/qwen3-14b': 32768,
|
|
434
|
+
'qwen/qwen3-14b:free': 32768,
|
|
435
|
+
'qwen/qwen3-235b-a22b': 32768,
|
|
436
|
+
'qwen/qwen3-235b-a22b-2507': 32768,
|
|
437
|
+
'qwen/qwen3-235b-a22b-thinking-2507': 32768,
|
|
438
|
+
'qwen/qwen3-235b-a22b:free': 32768,
|
|
439
|
+
'qwen/qwen3-30b-a3b': 32768,
|
|
440
|
+
'qwen/qwen3-30b-a3b-instruct-2507': 32768,
|
|
441
|
+
'qwen/qwen3-30b-a3b:free': 32768,
|
|
442
|
+
'qwen/qwen3-32b': 32768,
|
|
443
|
+
'qwen/qwen3-4b:free': 32768,
|
|
444
|
+
'qwen/qwen3-8b': 32768,
|
|
445
|
+
'qwen/qwen3-8b:free': 32768,
|
|
446
|
+
'qwen/qwen3-coder': 32768,
|
|
447
|
+
'qwen/qwen3-coder:free': 32768,
|
|
448
|
+
'qwen/qwq-32b': 32768,
|
|
449
|
+
'qwen/qwq-32b-preview': 32768,
|
|
450
|
+
'qwen/qwq-32b:free': 32768,
|
|
451
|
+
'raifle/sorcererlm-8x22b': 65536,
|
|
452
|
+
'rekaai/reka-flash-3:free': 128000,
|
|
453
|
+
'sao10k/l3-euryale-70b': 8192,
|
|
454
|
+
'sao10k/l3-lunaris-8b': 8192,
|
|
455
|
+
'sao10k/l3.1-euryale-70b': 131072,
|
|
456
|
+
'sao10k/l3.3-euryale-70b': 131072,
|
|
457
|
+
'sarvamai/sarvam-m:free': 8192,
|
|
458
|
+
'scb10x/llama3.1-typhoon2-70b-instruct': 131072,
|
|
459
|
+
'shisa-ai/shisa-v2-llama3.3-70b': 131072,
|
|
460
|
+
'shisa-ai/shisa-v2-llama3.3-70b:free': 131072,
|
|
461
|
+
'sophosympatheia/midnight-rose-70b': 4096,
|
|
462
|
+
'switchpoint/router': 8192,
|
|
463
|
+
'tencent/hunyuan-a13b-instruct': 8192,
|
|
464
|
+
'tencent/hunyuan-a13b-instruct:free': 8192,
|
|
465
|
+
'thedrummer/anubis-70b-v1.1': 8192,
|
|
466
|
+
'thedrummer/anubis-pro-105b-v1': 8192,
|
|
467
|
+
'thedrummer/rocinante-12b': 8192,
|
|
468
|
+
'thedrummer/skyfall-36b-v2': 8192,
|
|
469
|
+
'thedrummer/unslopnemo-12b': 128000,
|
|
470
|
+
'thedrummer/valkyrie-49b-v1': 8192,
|
|
471
|
+
'thudm/glm-4-32b': 2000000,
|
|
472
|
+
'thudm/glm-4.1v-9b-thinking': 2000000,
|
|
473
|
+
'thudm/glm-z1-32b:free': 2000000,
|
|
474
|
+
'tngtech/deepseek-r1t-chimera': 32768,
|
|
475
|
+
'tngtech/deepseek-r1t-chimera:free': 32768,
|
|
476
|
+
'tngtech/deepseek-r1t2-chimera:free': 32768,
|
|
477
|
+
'undi95/remm-slerp-l2-13b': 4096,
|
|
478
|
+
'x-ai/grok-2-1212': 128000,
|
|
479
|
+
'x-ai/grok-2-vision-1212': 128000,
|
|
480
|
+
'x-ai/grok-3': 128000,
|
|
481
|
+
'x-ai/grok-3-beta': 128000,
|
|
482
|
+
'x-ai/grok-3-mini': 128000,
|
|
483
|
+
'x-ai/grok-3-mini-beta': 128000,
|
|
484
|
+
'x-ai/grok-4': 128000,
|
|
485
|
+
'x-ai/grok-vision-beta': 128000,
|
|
486
|
+
'z-ai/glm-4-32b': 2000000,
|
|
487
|
+
'z-ai/glm-4.5': 2000000,
|
|
488
|
+
'z-ai/glm-4.5-air': 2000000,
|
|
489
|
+
'z-ai/glm-4.5-air:free': 2000000,
|
|
490
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
491
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
492
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
493
|
+
'llama3': 8192, # Llama 3 default
|
|
494
|
+
'llama2': 4096, # Llama 2 default
|
|
495
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
496
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
497
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
498
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
499
|
+
'gemma2': 8192, # Gemma 2 default
|
|
500
|
+
'gemma': 8192, # Gemma default
|
|
501
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
502
|
+
'phi2': 2048, # Phi-2 default
|
|
503
|
+
'phi': 2048, # Phi default (older)
|
|
504
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
505
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
506
|
+
'qwen': 8192, # Qwen default
|
|
507
|
+
'codellama': 16384, # CodeLlama extended
|
|
508
|
+
'codegemma': 8192, # CodeGemma default
|
|
509
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
510
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
511
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
512
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
513
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
514
|
+
'yi': 4096, # Yi base default
|
|
515
|
+
'command-r': 131072, # Command-R with 128K
|
|
516
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
517
|
+
'wizardlm': 16384, # WizardLM default
|
|
518
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
519
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
520
|
+
'falcon': 2048, # Falcon default
|
|
521
|
+
'starcoder': 8192, # StarCoder default
|
|
522
|
+
'stablelm': 4096, # StableLM default
|
|
523
|
+
'orca2': 4096, # Orca 2 default
|
|
524
|
+
'orca': 4096, # Orca default
|
|
525
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
526
|
+
'openhermes': 8192, # OpenHermes default
|
|
527
|
+
'gpt-oss': 128000, # GPT-OSS with 128K context
|
|
528
|
+
'gpt-3.5-turbo': 4096, # GPT-3.5 Turbo default
|
|
529
|
+
'gpt-4': 8192, # GPT-4 default
|
|
530
|
+
'grok-2': 128000,
|
|
531
|
+
'grok-2-1212': 128000,
|
|
532
|
+
'grok-2-vision-1212': 128000,
|
|
533
|
+
'grok-3': 128000,
|
|
534
|
+
'grok-3-fast': 128000,
|
|
535
|
+
'grok-3-beta': 128000,
|
|
536
|
+
'grok-3-mini': 128000,
|
|
537
|
+
'grok-3-mini-beta': 128000,
|
|
538
|
+
'grok-3-mini-fast': 128000,
|
|
539
|
+
'grok-4-0709': 128000,
|
|
540
|
+
'grok-4': 128000,
|
|
541
|
+
'grok-vision-beta': 128000,
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
normalized_model_name = model_name.lower().strip()
|
|
545
|
+
|
|
546
|
+
# Sort keys by length in descending order. This ensures that a more specific
|
|
547
|
+
# name like 'llama3.1' is checked before a less specific name like 'llama3'.
|
|
548
|
+
sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
|
|
549
|
+
|
|
550
|
+
for base_name in sorted_base_models:
|
|
551
|
+
if base_name in normalized_model_name:
|
|
552
|
+
context_size = known_contexts[base_name]
|
|
553
|
+
ASCIIColors.warning(
|
|
554
|
+
f"Using hardcoded context size for model '{model_name}' "
|
|
555
|
+
f"based on base name '{base_name}': {context_size}"
|
|
556
|
+
)
|
|
557
|
+
return context_size
|
|
558
|
+
|
|
559
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}' in the hardcoded list.")
|
|
159
560
|
return None
|
|
160
561
|
|
|
161
562
|
|