lollms-client 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -11,11 +11,11 @@ import pipmaster as pm
11
11
  # Ensure the required packages are installed
12
12
  pm.ensure_packages(["mistralai", "pillow", "tiktoken"])
13
13
 
14
- from mistralai.client import MistralClient
15
- from mistralai.models.chat_completion import ChatMessage
14
+ from mistralai import Mistral
16
15
  from PIL import Image, ImageDraw
17
16
  import tiktoken
18
17
 
18
+
19
19
  BindingName = "MistralBinding"
20
20
 
21
21
  class MistralBinding(LollmsLLMBinding):
@@ -28,7 +28,7 @@ class MistralBinding(LollmsLLMBinding):
28
28
 
29
29
  def __init__(self,
30
30
  model_name: str = "mistral-large-latest",
31
- mistral_api_key: str = None,
31
+ service_key: str|None = None,
32
32
  **kwargs
33
33
  ):
34
34
  """
@@ -40,13 +40,13 @@ class MistralBinding(LollmsLLMBinding):
40
40
  """
41
41
  super().__init__(binding_name=BindingName)
42
42
  self.model_name = model_name
43
- self.mistral_api_key = mistral_api_key or os.getenv("MISTRAL_API_KEY")
43
+ self.mistral_api_key = service_key or os.getenv("MISTRAL_API_KEY")
44
44
 
45
45
  if not self.mistral_api_key:
46
46
  raise ValueError("Mistral API key is required. Set it via 'mistral_api_key' or MISTRAL_API_KEY env var.")
47
47
 
48
48
  try:
49
- self.client = MistralClient(api_key=self.mistral_api_key)
49
+ self.client = Mistral(api_key=self.mistral_api_key)
50
50
  except Exception as e:
51
51
  ASCIIColors.error(f"Failed to configure Mistral client: {e}")
52
52
  self.client = None
@@ -64,35 +64,67 @@ class MistralBinding(LollmsLLMBinding):
64
64
  if n_predict is not None: params['max_tokens'] = n_predict
65
65
  if seed is not None: params['random_seed'] = seed # Mistral uses 'random_seed'
66
66
  return params
67
+
67
68
 
68
- def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[ChatMessage]:
69
- """Prepares the message list for the Mistral API from a LollmsDiscussion."""
69
+ def _prepare_messages(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None) -> List[Dict[str, any]]:
70
+ """Prepares the message list for the API from a LollmsDiscussion."""
70
71
  history = []
71
72
  if discussion.system_prompt:
72
- # Mistral prefers the system prompt as the first message with a user/assistant turn.
73
- # A lone system message is not ideal. We will prepend it to the first user message.
74
- # However, for API consistency, we will treat it as a separate message if it exists.
75
- # The official client will likely handle this.
76
- history.append(ChatMessage(role="system", content=discussion.system_prompt))
73
+ history.append({"role": "system", "content": discussion.system_prompt})
77
74
 
78
75
  for msg in discussion.get_messages(branch_tip_id):
79
76
  role = 'user' if msg.sender_type == "user" else 'assistant'
80
- # Note: Mistral API currently does not support image inputs via the chat endpoint.
77
+ # Note: Vision support depends on the specific model being called via OpenRouter.
78
+ # We will not implement it in this generic binding to avoid complexity,
79
+ # as different models might expect different formats.
81
80
  if msg.content:
82
- history.append(ChatMessage(role=role, content=msg.content))
81
+ history.append({'role': role, 'content': msg.content})
83
82
  return history
84
83
 
85
- def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
84
+ def generate_text(self,
85
+ prompt: str,
86
+ images: Optional[List[str]] = None,
87
+ system_prompt: str = "",
88
+ n_predict: Optional[int] = None,
89
+ stream: Optional[bool] = None,
90
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
91
+ top_k: int = 40, # Ollama default is 40
92
+ top_p: float = 0.9, # Ollama default is 0.9
93
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
94
+ repeat_last_n: int = 64, # Ollama default is 64
95
+ seed: Optional[int] = None,
96
+ n_threads: Optional[int] = None,
97
+ ctx_size: int | None = None,
98
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
99
+ split: Optional[bool] = False, # put to true if the prompt is a discussion
100
+ user_keyword: Optional[str] = "!@>user:",
101
+ ai_keyword: Optional[str] = "!@>assistant:",
102
+ **kwargs
103
+ ) -> Union[str, dict]:
86
104
  """
87
- Generate text using Mistral. This is a wrapper around the chat method.
105
+ Generate text using OpenRouter. This is a wrapper around the chat method.
88
106
  """
89
- temp_discussion = LollmsDiscussion.from_messages([
90
- LollmsMessage.new_message(sender_type="user", content=prompt)
91
- ])
92
- if kwargs.get("system_prompt"):
93
- temp_discussion.system_prompt = kwargs.get("system_prompt")
107
+ temp_discussion = LollmsDiscussion(None)
108
+ temp_discussion.add_message(sender="user", content=prompt, images=images or [])
109
+ if system_prompt:
110
+ temp_discussion.system_prompt = system_prompt
94
111
 
95
- return self.chat(temp_discussion, **kwargs)
112
+ return self.chat(temp_discussion,
113
+ n_predict=n_predict,
114
+ stream=stream,
115
+ temperature=temperature,
116
+ top_k=top_k,
117
+ top_p=top_p,
118
+ repeat_penalty=repeat_penalty,
119
+ repeat_last_n=repeat_last_n,
120
+ seed=seed,
121
+ n_threads=n_threads,
122
+ ctx_size=ctx_size,
123
+ streaming_callback=streaming_callback,
124
+ split=split,
125
+ user_keyword=user_keyword,
126
+ ai_keyword=ai_keyword,
127
+ **kwargs)
96
128
 
97
129
  def chat(self,
98
130
  discussion: LollmsDiscussion,
@@ -117,7 +149,7 @@ class MistralBinding(LollmsLLMBinding):
117
149
 
118
150
  try:
119
151
  if stream:
120
- response = self.client.chat_stream(
152
+ response = self.client.chat.stream(
121
153
  model=self.model_name,
122
154
  messages=messages,
123
155
  **api_params
@@ -131,7 +163,7 @@ class MistralBinding(LollmsLLMBinding):
131
163
  break
132
164
  return full_response_text
133
165
  else:
134
- response = self.client.chat(
166
+ response = self.client.chat.complete(
135
167
  model=self.model_name,
136
168
  messages=messages,
137
169
  **api_params
@@ -201,7 +233,7 @@ class MistralBinding(LollmsLLMBinding):
201
233
  return []
202
234
  try:
203
235
  ASCIIColors.debug("Listing Mistral models...")
204
- models = self.client.list_models()
236
+ models = self.client.models.list()
205
237
  model_info_list = []
206
238
  for m in models.data:
207
239
  model_info_list.append({
@@ -264,7 +296,6 @@ if __name__ == '__main__':
264
296
  ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
265
297
  full_streamed_text = ""
266
298
  def stream_callback(chunk: str, msg_type: int):
267
- nonlocal full_streamed_text
268
299
  ASCIIColors.green(chunk, end="", flush=True)
269
300
  full_streamed_text += chunk
270
301
  return True
@@ -29,7 +29,7 @@ class OpenRouterBinding(LollmsLLMBinding):
29
29
 
30
30
  def __init__(self,
31
31
  model_name: str = "google/gemini-flash-1.5", # A good, fast default
32
- open_router_api_key: str = None,
32
+ service_key: str|None = None,
33
33
  **kwargs
34
34
  ):
35
35
  """
@@ -37,11 +37,11 @@ class OpenRouterBinding(LollmsLLMBinding):
37
37
 
38
38
  Args:
39
39
  model_name (str): The name of the model to use from OpenRouter (e.g., 'anthropic/claude-3-haiku-20240307').
40
- open_router_api_key (str): The API key for the OpenRouter service.
40
+ service_key (str): The API key for the OpenRouter service.
41
41
  """
42
42
  super().__init__(binding_name=BindingName)
43
43
  self.model_name = model_name
44
- self.api_key = open_router_api_key or os.getenv("OPENROUTER_API_KEY")
44
+ self.api_key = service_key or os.getenv("OPENROUTER_API_KEY")
45
45
 
46
46
  if not self.api_key:
47
47
  raise ValueError("OpenRouter API key is required. Set it via 'open_router_api_key' or OPENROUTER_API_KEY env var.")
@@ -84,17 +84,50 @@ class OpenRouterBinding(LollmsLLMBinding):
84
84
  history.append({'role': role, 'content': msg.content})
85
85
  return history
86
86
 
87
- def generate_text(self, prompt: str, **kwargs) -> Union[str, dict]:
87
+ def generate_text(self,
88
+ prompt: str,
89
+ images: Optional[List[str]] = None,
90
+ system_prompt: str = "",
91
+ n_predict: Optional[int] = None,
92
+ stream: Optional[bool] = None,
93
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
94
+ top_k: int = 40, # Ollama default is 40
95
+ top_p: float = 0.9, # Ollama default is 0.9
96
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
97
+ repeat_last_n: int = 64, # Ollama default is 64
98
+ seed: Optional[int] = None,
99
+ n_threads: Optional[int] = None,
100
+ ctx_size: int | None = None,
101
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
102
+ split: Optional[bool] = False, # put to true if the prompt is a discussion
103
+ user_keyword: Optional[str] = "!@>user:",
104
+ ai_keyword: Optional[str] = "!@>assistant:",
105
+ **kwargs
106
+ ) -> Union[str, dict]:
88
107
  """
89
108
  Generate text using OpenRouter. This is a wrapper around the chat method.
90
109
  """
91
- temp_discussion = LollmsDiscussion.from_messages([
92
- LollmsMessage.new_message(sender_type="user", content=prompt)
93
- ])
94
- if kwargs.get("system_prompt"):
95
- temp_discussion.system_prompt = kwargs.get("system_prompt")
110
+ temp_discussion = LollmsDiscussion(None)
111
+ temp_discussion.add_message(sender="user", content=prompt, images=images or [])
112
+ if system_prompt:
113
+ temp_discussion.system_prompt = system_prompt
96
114
 
97
- return self.chat(temp_discussion, **kwargs)
115
+ return self.chat(temp_discussion,
116
+ n_predict=n_predict,
117
+ stream=stream,
118
+ temperature=temperature,
119
+ top_k=top_k,
120
+ top_p=top_p,
121
+ repeat_penalty=repeat_penalty,
122
+ repeat_last_n=repeat_last_n,
123
+ seed=seed,
124
+ n_threads=n_threads,
125
+ ctx_size=ctx_size,
126
+ streaming_callback=streaming_callback,
127
+ split=split,
128
+ user_keyword=user_keyword,
129
+ ai_keyword=ai_keyword,
130
+ **kwargs)
98
131
 
99
132
  def chat(self,
100
133
  discussion: LollmsDiscussion,
@@ -1835,4 +1835,9 @@ class LollmsDiscussion:
1835
1835
 
1836
1836
  del self.images[index]
1837
1837
  del self.active_images[index]
1838
- self.touch()
1838
+ self.touch()
1839
+
1840
+ @property
1841
+ def system_prompt(self) -> str:
1842
+ """Returns the system prompt for this discussion."""
1843
+ return self._system_prompt
@@ -154,8 +154,409 @@ class LollmsLLMBinding(ABC):
154
154
  """
155
155
  pass
156
156
 
157
- def get_ctx_size(self, model_name:str|None=None):
158
- # if model_name is none use current model name
157
+ def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
158
+ """
159
+ Retrieves context size for a model from a hardcoded list.
160
+
161
+ This method checks if the model name contains a known base model identifier
162
+ (e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
163
+ as a failsafe when the context size cannot be retrieved directly from the
164
+ Ollama API.
165
+ """
166
+ if model_name is None:
167
+ model_name = self.model_name
168
+
169
+ # Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
170
+ # should appear, as they will be checked first due to the sorting logic below.
171
+ known_contexts = {
172
+ 'agentica-org/deepcoder-14b-preview': 8192,
173
+ 'agentica-org/deepcoder-14b-preview:free': 8192,
174
+ 'ai21/jamba-large-1.7': 256000,
175
+ 'ai21/jamba-mini-1.7': 256000,
176
+ 'aion-labs/aion-1.0': 8192,
177
+ 'aion-labs/aion-1.0-mini': 8192,
178
+ 'aion-labs/aion-rp-llama-3.1-8b': 131072,
179
+ 'alfredpros/codellama-7b-instruct-solidity': 16384,
180
+ 'alpindale/goliath-120b': 4096,
181
+ 'amazon/nova-lite-v1': 32768,
182
+ 'amazon/nova-micro-v1': 32768,
183
+ 'amazon/nova-pro-v1': 32768,
184
+ 'anthracite-org/magnum-v2-72b': 131072,
185
+ 'anthracite-org/magnum-v4-72b': 131072,
186
+ 'anthropic/claude-3-haiku': 200000,
187
+ 'anthropic/claude-3-haiku:beta': 200000,
188
+ 'anthropic/claude-3-opus': 200000,
189
+ 'anthropic/claude-3-opus:beta': 200000,
190
+ 'anthropic/claude-3.5-haiku': 200000,
191
+ 'anthropic/claude-3.5-haiku-20241022': 200000,
192
+ 'anthropic/claude-3.5-haiku:beta': 200000,
193
+ 'anthropic/claude-3.5-sonnet': 200000,
194
+ 'anthropic/claude-3.5-sonnet-20240620': 200000,
195
+ 'anthropic/claude-3.5-sonnet-20240620:beta': 200000,
196
+ 'anthropic/claude-3.5-sonnet:beta': 200000,
197
+ 'anthropic/claude-3.7-sonnet': 200000,
198
+ 'anthropic/claude-3.7-sonnet:beta': 200000,
199
+ 'anthropic/claude-3.7-sonnet:thinking': 200000,
200
+ 'anthropic/claude-opus-4': 200000,
201
+ 'anthropic/claude-opus-4.1': 200000,
202
+ 'anthropic/claude-sonnet-4': 200000,
203
+ 'arcee-ai/coder-large': 32768,
204
+ 'arcee-ai/maestro-reasoning': 32768,
205
+ 'arcee-ai/spotlight': 32768,
206
+ 'arcee-ai/virtuoso-large': 32768,
207
+ 'arliai/qwq-32b-arliai-rpr-v1': 8192,
208
+ 'arliai/qwq-32b-arliai-rpr-v1:free': 8192,
209
+ 'baidu/ernie-4.5-300b-a47b': 128000,
210
+ 'bytedance/ui-tars-1.5-7b': 8192,
211
+ 'cognitivecomputations/dolphin-mistral-24b-venice-edition:free': 32768,
212
+ 'cognitivecomputations/dolphin-mixtral-8x22b': 65536,
213
+ 'cognitivecomputations/dolphin3.0-mistral-24b': 32768,
214
+ 'cognitivecomputations/dolphin3.0-mistral-24b:free': 32768,
215
+ 'cognitivecomputations/dolphin3.0-r1-mistral-24b': 32768,
216
+ 'cognitivecomputations/dolphin3.0-r1-mistral-24b:free': 32768,
217
+ 'cohere/command': 8192,
218
+ 'cohere/command-a': 8192,
219
+ 'cohere/command-r': 128000,
220
+ 'cohere/command-r-03-2024': 128000,
221
+ 'cohere/command-r-08-2024': 128000,
222
+ 'cohere/command-r-plus': 128000,
223
+ 'cohere/command-r-plus-04-2024': 128000,
224
+ 'cohere/command-r-plus-08-2024': 128000,
225
+ 'cohere/command-r7b-12-2024': 128000,
226
+ 'deepseek/deepseek-chat': 32768,
227
+ 'deepseek/deepseek-chat-v3-0324': 32768,
228
+ 'deepseek/deepseek-chat-v3-0324:free': 32768,
229
+ 'deepseek/deepseek-prover-v2': 131072,
230
+ 'deepseek/deepseek-r1': 32768,
231
+ 'deepseek/deepseek-r1-0528': 32768,
232
+ 'deepseek/deepseek-r1-0528-qwen3-8b': 32768,
233
+ 'deepseek/deepseek-r1-0528-qwen3-8b:free': 32768,
234
+ 'deepseek/deepseek-r1-0528:free': 32768,
235
+ 'deepseek/deepseek-r1-distill-llama-70b': 131072,
236
+ 'deepseek/deepseek-r1-distill-llama-70b:free': 131072,
237
+ 'deepseek/deepseek-r1-distill-llama-8b': 131072,
238
+ 'deepseek/deepseek-r1-distill-qwen-1.5b': 32768,
239
+ 'deepseek/deepseek-r1-distill-qwen-14b': 32768,
240
+ 'deepseek/deepseek-r1-distill-qwen-14b:free': 32768,
241
+ 'deepseek/deepseek-r1-distill-qwen-32b': 32768,
242
+ 'deepseek/deepseek-r1-distill-qwen-7b': 32768,
243
+ 'deepseek/deepseek-r1:free': 32768,
244
+ 'deepseek/deepseek-v3-base': 32768,
245
+ 'eleutherai/llemma_7b': 8192,
246
+ 'featherless/qwerky-72b:free': 8192,
247
+ 'google/gemini-2.0-flash-001': 1000000,
248
+ 'google/gemini-2.0-flash-exp:free': 1000000,
249
+ 'google/gemini-2.0-flash-lite-001': 1000000,
250
+ 'google/gemini-2.5-flash': 1000000,
251
+ 'google/gemini-2.5-flash-lite': 1000000,
252
+ 'google/gemini-2.5-flash-lite-preview-06-17': 1000000,
253
+ 'google/gemini-2.5-pro': 2000000,
254
+ 'google/gemini-2.5-pro-exp-03-25': 2000000,
255
+ 'google/gemini-2.5-pro-preview': 2000000,
256
+ 'google/gemini-2.5-pro-preview-05-06': 2000000,
257
+ 'google/gemini-flash-1.5': 1000000,
258
+ 'google/gemini-flash-1.5-8b': 1000000,
259
+ 'google/gemini-pro-1.5': 2000000,
260
+ 'google/gemma-2-27b-it': 8192,
261
+ 'google/gemma-2-9b-it': 8192,
262
+ 'google/gemma-2-9b-it:free': 8192,
263
+ 'google/gemma-3-12b-it': 131072,
264
+ 'google/gemma-3-12b-it:free': 131072,
265
+ 'google/gemma-3-27b-it': 131072,
266
+ 'google/gemma-3-27b-it:free': 131072,
267
+ 'google/gemma-3-4b-it': 131072,
268
+ 'google/gemma-3-4b-it:free': 131072,
269
+ 'google/gemma-3n-e2b-it:free': 131072,
270
+ 'google/gemma-3n-e4b-it': 131072,
271
+ 'google/gemma-3n-e4b-it:free': 131072,
272
+ 'gryphe/mythomax-l2-13b': 4096,
273
+ 'inception/mercury': 32768,
274
+ 'inception/mercury-coder': 32768,
275
+ 'infermatic/mn-inferor-12b': 8192,
276
+ 'inflection/inflection-3-pi': 128000,
277
+ 'inflection/inflection-3-productivity': 128000,
278
+ 'liquid/lfm-3b': 8192,
279
+ 'liquid/lfm-40b': 8192,
280
+ 'liquid/lfm-7b': 8192,
281
+ 'mancer/weaver': 8192,
282
+ 'meta-llama/llama-3-70b-instruct': 8192,
283
+ 'meta-llama/llama-3-8b-instruct': 8192,
284
+ 'meta-llama/llama-3.1-405b': 131072,
285
+ 'meta-llama/llama-3.1-405b-instruct': 131072,
286
+ 'meta-llama/llama-3.1-405b-instruct:free': 131072,
287
+ 'meta-llama/llama-3.1-70b-instruct': 131072,
288
+ 'meta-llama/llama-3.1-8b-instruct': 131072,
289
+ 'meta-llama/llama-3.2-11b-vision-instruct': 131072,
290
+ 'meta-llama/llama-3.2-11b-vision-instruct:free': 131072,
291
+ 'meta-llama/llama-3.2-1b-instruct': 131072,
292
+ 'meta-llama/llama-3.2-3b-instruct': 131072,
293
+ 'meta-llama/llama-3.2-3b-instruct:free': 131072,
294
+ 'meta-llama/llama-3.2-90b-vision-instruct': 131072,
295
+ 'meta-llama/llama-3.3-70b-instruct': 131072,
296
+ 'meta-llama/llama-3.3-70b-instruct:free': 131072,
297
+ 'meta-llama/llama-4-maverick': 131072,
298
+ 'meta-llama/llama-4-scout': 131072,
299
+ 'meta-llama/llama-guard-2-8b': 8192,
300
+ 'meta-llama/llama-guard-3-8b': 131072,
301
+ 'meta-llama/llama-guard-4-12b': 131072,
302
+ 'microsoft/mai-ds-r1': 32768,
303
+ 'microsoft/mai-ds-r1:free': 32768,
304
+ 'microsoft/phi-3-medium-128k-instruct': 131072,
305
+ 'microsoft/phi-3-mini-128k-instruct': 131072,
306
+ 'microsoft/phi-3.5-mini-128k-instruct': 131072,
307
+ 'microsoft/phi-4': 131072,
308
+ 'microsoft/phi-4-multimodal-instruct': 131072,
309
+ 'microsoft/phi-4-reasoning-plus': 131072,
310
+ 'microsoft/wizardlm-2-8x22b': 65536,
311
+ 'minimax/minimax-01': 200000,
312
+ 'minimax/minimax-m1': 200000,
313
+ 'mistralai/codestral-2501': 32768,
314
+ 'mistralai/codestral-2508': 32768,
315
+ 'mistralai/devstral-medium': 32768,
316
+ 'mistralai/devstral-small': 32768,
317
+ 'mistralai/devstral-small-2505': 32768,
318
+ 'mistralai/devstral-small-2505:free': 32768,
319
+ 'mistralai/magistral-medium-2506': 32768,
320
+ 'mistralai/magistral-medium-2506:thinking': 32768,
321
+ 'mistralai/magistral-small-2506': 32768,
322
+ 'mistralai/ministral-3b': 32768,
323
+ 'mistralai/ministral-8b': 32768,
324
+ 'mistralai/mistral-7b-instruct': 32768,
325
+ 'mistralai/mistral-7b-instruct-v0.1': 8192,
326
+ 'mistralai/mistral-7b-instruct-v0.2': 32768,
327
+ 'mistralai/mistral-7b-instruct-v0.3': 32768,
328
+ 'mistralai/mistral-7b-instruct:free': 32768,
329
+ 'mistralai/mistral-large': 32768,
330
+ 'mistralai/mistral-large-2407': 128000,
331
+ 'mistralai/mistral-large-2411': 128000,
332
+ 'mistralai/mistral-medium-3': 32768,
333
+ 'mistralai/mistral-nemo': 128000,
334
+ 'mistralai/mistral-nemo:free': 128000,
335
+ 'mistralai/mistral-saba': 32768,
336
+ 'mistralai/mistral-small': 32768,
337
+ 'mistralai/mistral-small-24b-instruct-2501': 32768,
338
+ 'mistralai/mistral-small-24b-instruct-2501:free': 32768,
339
+ 'mistralai/mistral-small-3.1-24b-instruct': 32768,
340
+ 'mistralai/mistral-small-3.1-24b-instruct:free': 32768,
341
+ 'mistralai/mistral-small-3.2-24b-instruct': 32768,
342
+ 'mistralai/mistral-small-3.2-24b-instruct:free': 32768,
343
+ 'mistralai/mistral-tiny': 32768,
344
+ 'mistralai/mixtral-8x22b-instruct': 65536,
345
+ 'mistralai/mixtral-8x7b-instruct': 32768,
346
+ 'mistralai/pixtral-12b': 128000,
347
+ 'mistralai/pixtral-large-2411': 128000,
348
+ 'moonshotai/kimi-dev-72b:free': 200000,
349
+ 'moonshotai/kimi-k2': 200000,
350
+ 'moonshotai/kimi-k2:free': 200000,
351
+ 'moonshotai/kimi-vl-a3b-thinking': 200000,
352
+ 'moonshotai/kimi-vl-a3b-thinking:free': 200000,
353
+ 'morph/morph-v3-fast': 8192,
354
+ 'morph/morph-v3-large': 8192,
355
+ 'neversleep/llama-3-lumimaid-70b': 8192,
356
+ 'neversleep/llama-3.1-lumimaid-8b': 131072,
357
+ 'neversleep/noromaid-20b': 32768,
358
+ 'nousresearch/deephermes-3-llama-3-8b-preview:free': 8192,
359
+ 'nousresearch/deephermes-3-mistral-24b-preview': 32768,
360
+ 'nousresearch/hermes-2-pro-llama-3-8b': 8192,
361
+ 'nousresearch/hermes-3-llama-3.1-405b': 131072,
362
+ 'nousresearch/hermes-3-llama-3.1-70b': 131072,
363
+ 'nousresearch/nous-hermes-2-mixtral-8x7b-dpo': 32768,
364
+ 'nvidia/llama-3.1-nemotron-70b-instruct': 131072,
365
+ 'nvidia/llama-3.1-nemotron-ultra-253b-v1': 131072,
366
+ 'nvidia/llama-3.1-nemotron-ultra-253b-v1:free': 131072,
367
+ 'nvidia/llama-3.3-nemotron-super-49b-v1': 131072,
368
+ 'openai/chatgpt-4o-latest': 128000,
369
+ 'openai/codex-mini': 2048,
370
+ 'openai/gpt-3.5-turbo': 4096,
371
+ 'openai/gpt-3.5-turbo-0613': 4096,
372
+ 'openai/gpt-3.5-turbo-16k': 16384,
373
+ 'openai/gpt-3.5-turbo-instruct': 4096,
374
+ 'openai/gpt-4': 8192,
375
+ 'openai/gpt-4-0314': 8192,
376
+ 'openai/gpt-4-1106-preview': 128000,
377
+ 'openai/gpt-4-turbo': 128000,
378
+ 'openai/gpt-4-turbo-preview': 128000,
379
+ 'openai/gpt-4.1': 128000,
380
+ 'openai/gpt-4.1-mini': 128000,
381
+ 'openai/gpt-4.1-nano': 128000,
382
+ 'openai/gpt-4o': 128000,
383
+ 'openai/gpt-4o-2024-05-13': 128000,
384
+ 'openai/gpt-4o-2024-08-06': 128000,
385
+ 'openai/gpt-4o-2024-11-20': 128000,
386
+ 'openai/gpt-4o-mini': 128000,
387
+ 'openai/gpt-4o-mini-2024-07-18': 128000,
388
+ 'openai/gpt-4o-mini-search-preview': 128000,
389
+ 'openai/gpt-4o-search-preview': 128000,
390
+ 'openai/gpt-4o:extended': 128000,
391
+ 'openai/gpt-5': 200000,
392
+ 'openai/gpt-5-chat': 200000,
393
+ 'openai/gpt-5-mini': 200000,
394
+ 'openai/gpt-5-nano': 200000,
395
+ 'openai/gpt-oss-120b': 128000,
396
+ 'openai/gpt-oss-20b': 128000,
397
+ 'openai/gpt-oss-20b:free': 128000,
398
+ 'openai/o1': 128000,
399
+ 'openai/o1-mini': 128000,
400
+ 'openai/o1-mini-2024-09-12': 128000,
401
+ 'openai/o1-pro': 128000,
402
+ 'openai/o3': 200000,
403
+ 'openai/o3-mini': 200000,
404
+ 'openai/o3-mini-high': 200000,
405
+ 'openai/o3-pro': 200000,
406
+ 'openai/o4-mini': 128000,
407
+ 'openai/o4-mini-high': 128000,
408
+ 'opengvlab/internvl3-14b': 8192,
409
+ 'openrouter/auto': 8192,
410
+ 'perplexity/r1-1776': 32768,
411
+ 'perplexity/sonar': 32768,
412
+ 'perplexity/sonar-deep-research': 32768,
413
+ 'perplexity/sonar-pro': 32768,
414
+ 'perplexity/sonar-reasoning': 32768,
415
+ 'perplexity/sonar-reasoning-pro': 32768,
416
+ 'pygmalionai/mythalion-13b': 4096,
417
+ 'qwen/qwen-2-72b-instruct': 32768,
418
+ 'qwen/qwen-2.5-72b-instruct': 131072,
419
+ 'qwen/qwen-2.5-72b-instruct:free': 131072,
420
+ 'qwen/qwen-2.5-7b-instruct': 131072,
421
+ 'qwen/qwen-2.5-coder-32b-instruct': 131072,
422
+ 'qwen/qwen-2.5-coder-32b-instruct:free': 131072,
423
+ 'qwen/qwen-2.5-vl-7b-instruct': 131072,
424
+ 'qwen/qwen-max': 32768,
425
+ 'qwen/qwen-plus': 32768,
426
+ 'qwen/qwen-turbo': 8192,
427
+ 'qwen/qwen-vl-max': 32768,
428
+ 'qwen/qwen-vl-plus': 32768,
429
+ 'qwen/qwen2.5-vl-32b-instruct': 131072,
430
+ 'qwen/qwen2.5-vl-32b-instruct:free': 131072,
431
+ 'qwen/qwen2.5-vl-72b-instruct': 131072,
432
+ 'qwen/qwen2.5-vl-72b-instruct:free': 131072,
433
+ 'qwen/qwen3-14b': 32768,
434
+ 'qwen/qwen3-14b:free': 32768,
435
+ 'qwen/qwen3-235b-a22b': 32768,
436
+ 'qwen/qwen3-235b-a22b-2507': 32768,
437
+ 'qwen/qwen3-235b-a22b-thinking-2507': 32768,
438
+ 'qwen/qwen3-235b-a22b:free': 32768,
439
+ 'qwen/qwen3-30b-a3b': 32768,
440
+ 'qwen/qwen3-30b-a3b-instruct-2507': 32768,
441
+ 'qwen/qwen3-30b-a3b:free': 32768,
442
+ 'qwen/qwen3-32b': 32768,
443
+ 'qwen/qwen3-4b:free': 32768,
444
+ 'qwen/qwen3-8b': 32768,
445
+ 'qwen/qwen3-8b:free': 32768,
446
+ 'qwen/qwen3-coder': 32768,
447
+ 'qwen/qwen3-coder:free': 32768,
448
+ 'qwen/qwq-32b': 32768,
449
+ 'qwen/qwq-32b-preview': 32768,
450
+ 'qwen/qwq-32b:free': 32768,
451
+ 'raifle/sorcererlm-8x22b': 65536,
452
+ 'rekaai/reka-flash-3:free': 128000,
453
+ 'sao10k/l3-euryale-70b': 8192,
454
+ 'sao10k/l3-lunaris-8b': 8192,
455
+ 'sao10k/l3.1-euryale-70b': 131072,
456
+ 'sao10k/l3.3-euryale-70b': 131072,
457
+ 'sarvamai/sarvam-m:free': 8192,
458
+ 'scb10x/llama3.1-typhoon2-70b-instruct': 131072,
459
+ 'shisa-ai/shisa-v2-llama3.3-70b': 131072,
460
+ 'shisa-ai/shisa-v2-llama3.3-70b:free': 131072,
461
+ 'sophosympatheia/midnight-rose-70b': 4096,
462
+ 'switchpoint/router': 8192,
463
+ 'tencent/hunyuan-a13b-instruct': 8192,
464
+ 'tencent/hunyuan-a13b-instruct:free': 8192,
465
+ 'thedrummer/anubis-70b-v1.1': 8192,
466
+ 'thedrummer/anubis-pro-105b-v1': 8192,
467
+ 'thedrummer/rocinante-12b': 8192,
468
+ 'thedrummer/skyfall-36b-v2': 8192,
469
+ 'thedrummer/unslopnemo-12b': 128000,
470
+ 'thedrummer/valkyrie-49b-v1': 8192,
471
+ 'thudm/glm-4-32b': 2000000,
472
+ 'thudm/glm-4.1v-9b-thinking': 2000000,
473
+ 'thudm/glm-z1-32b:free': 2000000,
474
+ 'tngtech/deepseek-r1t-chimera': 32768,
475
+ 'tngtech/deepseek-r1t-chimera:free': 32768,
476
+ 'tngtech/deepseek-r1t2-chimera:free': 32768,
477
+ 'undi95/remm-slerp-l2-13b': 4096,
478
+ 'x-ai/grok-2-1212': 128000,
479
+ 'x-ai/grok-2-vision-1212': 128000,
480
+ 'x-ai/grok-3': 128000,
481
+ 'x-ai/grok-3-beta': 128000,
482
+ 'x-ai/grok-3-mini': 128000,
483
+ 'x-ai/grok-3-mini-beta': 128000,
484
+ 'x-ai/grok-4': 128000,
485
+ 'x-ai/grok-vision-beta': 128000,
486
+ 'z-ai/glm-4-32b': 2000000,
487
+ 'z-ai/glm-4.5': 2000000,
488
+ 'z-ai/glm-4.5-air': 2000000,
489
+ 'z-ai/glm-4.5-air:free': 2000000,
490
+ 'llama3.1': 131072, # Llama 3.1 extended context
491
+ 'llama3.2': 131072, # Llama 3.2 extended context
492
+ 'llama3.3': 131072, # Assuming similar to 3.1/3.2
493
+ 'llama3': 8192, # Llama 3 default
494
+ 'llama2': 4096, # Llama 2 default
495
+ 'mixtral8x22b': 65536, # Mixtral 8x22B default
496
+ 'mixtral': 32768, # Mixtral 8x7B default
497
+ 'mistral': 32768, # Mistral 7B v0.2+ default
498
+ 'gemma3': 131072, # Gemma 3 with 128K context
499
+ 'gemma2': 8192, # Gemma 2 default
500
+ 'gemma': 8192, # Gemma default
501
+ 'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
502
+ 'phi2': 2048, # Phi-2 default
503
+ 'phi': 2048, # Phi default (older)
504
+ 'qwen2.5': 131072, # Qwen2.5 with 128K
505
+ 'qwen2': 32768, # Qwen2 default for 7B
506
+ 'qwen': 8192, # Qwen default
507
+ 'codellama': 16384, # CodeLlama extended
508
+ 'codegemma': 8192, # CodeGemma default
509
+ 'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
510
+ 'deepseek-coder': 16384, # DeepSeek-Coder V1 default
511
+ 'deepseek-v2': 131072, # DeepSeek-V2 with 128K
512
+ 'deepseek-llm': 4096, # DeepSeek-LLM default
513
+ 'yi1.5': 32768, # Yi-1.5 with 32K
514
+ 'yi': 4096, # Yi base default
515
+ 'command-r': 131072, # Command-R with 128K
516
+ 'wizardlm2': 32768, # WizardLM2 (Mistral-based)
517
+ 'wizardlm': 16384, # WizardLM default
518
+ 'zephyr': 65536, # Zephyr beta (Mistral-based extended)
519
+ 'vicuna': 2048, # Vicuna default (up to 16K in some variants)
520
+ 'falcon': 2048, # Falcon default
521
+ 'starcoder': 8192, # StarCoder default
522
+ 'stablelm': 4096, # StableLM default
523
+ 'orca2': 4096, # Orca 2 default
524
+ 'orca': 4096, # Orca default
525
+ 'dolphin': 32768, # Dolphin (often Mistral-based)
526
+ 'openhermes': 8192, # OpenHermes default
527
+ 'gpt-oss': 128000, # GPT-OSS with 128K context
528
+ 'gpt-3.5-turbo': 4096, # GPT-3.5 Turbo default
529
+ 'gpt-4': 8192, # GPT-4 default
530
+ 'grok-2': 128000,
531
+ 'grok-2-1212': 128000,
532
+ 'grok-2-vision-1212': 128000,
533
+ 'grok-3': 128000,
534
+ 'grok-3-fast': 128000,
535
+ 'grok-3-beta': 128000,
536
+ 'grok-3-mini': 128000,
537
+ 'grok-3-mini-beta': 128000,
538
+ 'grok-3-mini-fast': 128000,
539
+ 'grok-4-0709': 128000,
540
+ 'grok-4': 128000,
541
+ 'grok-vision-beta': 128000,
542
+ }
543
+
544
+ normalized_model_name = model_name.lower().strip()
545
+
546
+ # Sort keys by length in descending order. This ensures that a more specific
547
+ # name like 'llama3.1' is checked before a less specific name like 'llama3'.
548
+ sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
549
+
550
+ for base_name in sorted_base_models:
551
+ if base_name in normalized_model_name:
552
+ context_size = known_contexts[base_name]
553
+ ASCIIColors.warning(
554
+ f"Using hardcoded context size for model '{model_name}' "
555
+ f"based on base name '{base_name}': {context_size}"
556
+ )
557
+ return context_size
558
+
559
+ ASCIIColors.warning(f"Context size not found for model '{model_name}' in the hardcoded list.")
159
560
  return None
160
561
 
161
562