lollms-client 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +76 -21
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
- lollms_client/llm_bindings/ollama/__init__.py +307 -89
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +81 -20
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +333 -171
- lollms_client/llm_bindings/perplexity/__init__.py +2 -2
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +512 -1890
- lollms_client/lollms_discussion.py +25 -11
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +7 -4
- lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
- lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/METADATA +113 -5
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client-1.5.6.dist-info/RECORD +0 -87
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -26,9 +26,11 @@ GROK_API_BASE_URL = "https://api.x.ai/v1"
|
|
|
26
26
|
|
|
27
27
|
# A hardcoded list to be used as a fallback if the API call fails
|
|
28
28
|
_FALLBACK_MODELS = [
|
|
29
|
-
{'model_name': 'grok-
|
|
30
|
-
{'model_name': 'grok-
|
|
31
|
-
{'model_name': 'grok-
|
|
29
|
+
{'model_name': 'grok-2-latest', 'display_name': 'Grok 2 Latest', 'description': 'The latest conversational model from xAI.', 'owned_by': 'xAI'},
|
|
30
|
+
{'model_name': 'grok-2', 'display_name': 'Grok 2', 'description': 'Grok 2 model.', 'owned_by': 'xAI'},
|
|
31
|
+
{'model_name': 'grok-2-vision-latest', 'display_name': 'Grok 2 Vision Latest', 'description': 'Latest multimodal model from xAI.', 'owned_by': 'xAI'},
|
|
32
|
+
{'model_name': 'grok-beta', 'display_name': 'Grok Beta', 'description': 'Beta model.', 'owned_by': 'xAI'},
|
|
33
|
+
{'model_name': 'grok-vision-beta', 'display_name': 'Grok Vision Beta', 'description': 'Beta vision model.', 'owned_by': 'xAI'},
|
|
32
34
|
]
|
|
33
35
|
|
|
34
36
|
# Helper to check if a string is a valid path to an image
|
|
@@ -70,7 +72,7 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
70
72
|
service_key (str): xAI API key.
|
|
71
73
|
"""
|
|
72
74
|
super().__init__(BindingName, **kwargs)
|
|
73
|
-
self.model_name = kwargs.get("model_name", "grok-
|
|
75
|
+
self.model_name = kwargs.get("model_name", "grok-2-latest")
|
|
74
76
|
self.service_key = kwargs.get("service_key")
|
|
75
77
|
self.base_url = kwargs.get("base_url", GROK_API_BASE_URL)
|
|
76
78
|
self._cached_models: Optional[List[Dict[str, str]]] = None
|
|
@@ -101,7 +103,8 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
101
103
|
def _process_and_handle_stream(self,
|
|
102
104
|
response: requests.Response,
|
|
103
105
|
stream: bool,
|
|
104
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]]
|
|
106
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
|
|
107
|
+
think: bool = False
|
|
105
108
|
) -> Union[str, dict]:
|
|
106
109
|
"""Helper to process streaming responses from the API."""
|
|
107
110
|
full_response_text = ""
|
|
@@ -119,6 +122,21 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
119
122
|
if chunk['choices']:
|
|
120
123
|
delta = chunk['choices'][0].get('delta', {})
|
|
121
124
|
content = delta.get('content', '')
|
|
125
|
+
# Check for reasoning content (DeepSeek-style) if Grok adopts it or if proxied
|
|
126
|
+
reasoning = delta.get('reasoning_content', '')
|
|
127
|
+
|
|
128
|
+
if reasoning:
|
|
129
|
+
# If thinking is requested and we get reasoning tokens
|
|
130
|
+
if think:
|
|
131
|
+
if streaming_callback:
|
|
132
|
+
# We just stream the reasoning as is, user UI typically handles tagging or we could inject <think>
|
|
133
|
+
# Here we assume just passing the text is safer unless we track state
|
|
134
|
+
streaming_callback(reasoning, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
135
|
+
# We don't append reasoning to full_response_text usually if it's separate,
|
|
136
|
+
# unless we want to return it in the final string wrapped.
|
|
137
|
+
# Let's wrap it for the final return string.
|
|
138
|
+
full_response_text += f"<think>{reasoning}</think>" # Naive wrapping for stream accumulation
|
|
139
|
+
|
|
122
140
|
if content:
|
|
123
141
|
full_response_text += content
|
|
124
142
|
if stream and streaming_callback:
|
|
@@ -154,6 +172,9 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
154
172
|
n_threads: Optional[int] = None, # Not applicable
|
|
155
173
|
ctx_size: int | None = None, # Determined by model
|
|
156
174
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
175
|
+
think: Optional[bool] = False,
|
|
176
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
177
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
157
178
|
**kwargs
|
|
158
179
|
) -> Union[str, dict]:
|
|
159
180
|
"""
|
|
@@ -181,7 +202,9 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
181
202
|
b64_data = base64.b64encode(image_file.read()).decode('utf-8')
|
|
182
203
|
else: # Assume it's a base64 string
|
|
183
204
|
b64_data = image_data
|
|
184
|
-
|
|
205
|
+
if b64_data.startswith("data:image"):
|
|
206
|
+
b64_data = b64_data.split(",")[1]
|
|
207
|
+
media_type = "image/png" # Default assumption
|
|
185
208
|
|
|
186
209
|
user_content.append({
|
|
187
210
|
"type": "image_url",
|
|
@@ -214,7 +237,7 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
214
237
|
)
|
|
215
238
|
response.raise_for_status()
|
|
216
239
|
|
|
217
|
-
return self._process_and_handle_stream(response, stream, streaming_callback)
|
|
240
|
+
return self._process_and_handle_stream(response, stream, streaming_callback, think=think)
|
|
218
241
|
|
|
219
242
|
except requests.exceptions.RequestException as ex:
|
|
220
243
|
error_message = f"Grok API request failed: {str(ex)}"
|
|
@@ -238,6 +261,9 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
238
261
|
temperature: float = 0.7,
|
|
239
262
|
top_p: float = 0.9,
|
|
240
263
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
264
|
+
think: Optional[bool] = False,
|
|
265
|
+
reasoning_effort: Optional[str] = "low", # low, medium, high
|
|
266
|
+
reasoning_summary: Optional[bool] = False, # auto
|
|
241
267
|
**kwargs
|
|
242
268
|
) -> Union[str, dict]:
|
|
243
269
|
"""
|
|
@@ -273,6 +299,18 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
273
299
|
})
|
|
274
300
|
except Exception as e:
|
|
275
301
|
ASCIIColors.warning(f"Could not load image {file_path}: {e}")
|
|
302
|
+
else:
|
|
303
|
+
# Attempt to handle base64
|
|
304
|
+
try:
|
|
305
|
+
b64_data = file_path
|
|
306
|
+
if b64_data.startswith("data:image"):
|
|
307
|
+
b64_data = b64_data.split(",")[1]
|
|
308
|
+
content_parts.append({
|
|
309
|
+
"type": "image_url",
|
|
310
|
+
"image_url": {"url": f"data:image/png;base64,{b64_data}"}
|
|
311
|
+
})
|
|
312
|
+
except:
|
|
313
|
+
pass
|
|
276
314
|
|
|
277
315
|
# Grok API expects content to be a string for assistant, or list for user.
|
|
278
316
|
if role == 'user':
|
|
@@ -303,7 +341,7 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
303
341
|
)
|
|
304
342
|
response.raise_for_status()
|
|
305
343
|
|
|
306
|
-
return self._process_and_handle_stream(response, stream, streaming_callback)
|
|
344
|
+
return self._process_and_handle_stream(response, stream, streaming_callback, think=think)
|
|
307
345
|
|
|
308
346
|
except requests.exceptions.RequestException as ex:
|
|
309
347
|
error_message = f"Grok API request failed: {str(ex)}"
|
|
@@ -362,10 +400,10 @@ class GrokBinding(LollmsLLMBinding):
|
|
|
362
400
|
"host_address": self.base_url,
|
|
363
401
|
"model_name": self.model_name,
|
|
364
402
|
"supports_structured_output": False,
|
|
365
|
-
"supports_vision": "vision" in self.model_name or "grok-1.5"
|
|
403
|
+
"supports_vision": "vision" in self.model_name or "grok-1.5" in self.model_name or "grok-2" in self.model_name,
|
|
366
404
|
}
|
|
367
405
|
|
|
368
|
-
def
|
|
406
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
369
407
|
"""
|
|
370
408
|
Lists available models from the xAI API.
|
|
371
409
|
Caches the result to avoid repeated API calls.
|
|
@@ -433,8 +471,8 @@ if __name__ == '__main__':
|
|
|
433
471
|
ASCIIColors.yellow("--- Testing GrokBinding ---")
|
|
434
472
|
|
|
435
473
|
# --- Configuration ---
|
|
436
|
-
test_model_name = "grok-
|
|
437
|
-
test_vision_model_name = "grok-
|
|
474
|
+
test_model_name = "grok-2-latest"
|
|
475
|
+
test_vision_model_name = "grok-2-vision-latest"
|
|
438
476
|
|
|
439
477
|
try:
|
|
440
478
|
# --- Initialization ---
|
|
@@ -444,7 +482,7 @@ if __name__ == '__main__':
|
|
|
444
482
|
|
|
445
483
|
# --- List Models ---
|
|
446
484
|
ASCIIColors.cyan("\n--- Listing Models (dynamic) ---")
|
|
447
|
-
models = binding.
|
|
485
|
+
models = binding.list_models()
|
|
448
486
|
if models:
|
|
449
487
|
ASCIIColors.green(f"Found {len(models)} models.")
|
|
450
488
|
for m in models:
|
|
@@ -462,7 +500,7 @@ if __name__ == '__main__':
|
|
|
462
500
|
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
463
501
|
prompt_text = "Explain who Elon Musk is in one sentence."
|
|
464
502
|
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
465
|
-
generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt="Be very concise.")
|
|
503
|
+
generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt="Be very concise.", think=True)
|
|
466
504
|
if isinstance(generated_text, str):
|
|
467
505
|
ASCIIColors.green(f"Generated text:\n{generated_text}")
|
|
468
506
|
else:
|
|
@@ -179,7 +179,7 @@ class GroqBinding(LollmsLLMBinding):
|
|
|
179
179
|
"supports_vision": False, # Groq models do not currently support vision
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
-
def
|
|
182
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
183
183
|
"""Lists available models from the Groq service."""
|
|
184
184
|
if not self.client:
|
|
185
185
|
ASCIIColors.error("Groq client not initialized. Cannot list models.")
|
|
@@ -229,7 +229,7 @@ if __name__ == '__main__':
|
|
|
229
229
|
|
|
230
230
|
# --- List Models ---
|
|
231
231
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
232
|
-
models = binding.
|
|
232
|
+
models = binding.list_models()
|
|
233
233
|
if models:
|
|
234
234
|
ASCIIColors.green(f"Found {len(models)} models on Groq. Available models:")
|
|
235
235
|
for m in models:
|
|
@@ -196,7 +196,7 @@ class HuggingFaceInferenceAPIBinding(LollmsLLMBinding):
|
|
|
196
196
|
"supports_vision": False, # Vision models use a different API call
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
-
def
|
|
199
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
200
200
|
"""Lists text-generation models from the Hugging Face Hub."""
|
|
201
201
|
if not self.hf_api:
|
|
202
202
|
ASCIIColors.error("HF API client not initialized. Cannot list models.")
|
|
@@ -252,7 +252,7 @@ if __name__ == '__main__':
|
|
|
252
252
|
|
|
253
253
|
# --- List Models ---
|
|
254
254
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
255
|
-
models = binding.
|
|
255
|
+
models = binding.list_models()
|
|
256
256
|
if models:
|
|
257
257
|
ASCIIColors.green(f"Successfully fetched {len(models)} text-generation models.")
|
|
258
258
|
ASCIIColors.info("Top 5 most downloaded models:")
|
|
@@ -185,7 +185,7 @@ class LiteLLMBinding(LollmsLLMBinding):
|
|
|
185
185
|
ASCIIColors.error(f"--- [LiteLLM Binding] Fallback method failed: {e}")
|
|
186
186
|
return entries
|
|
187
187
|
|
|
188
|
-
def
|
|
188
|
+
def list_models(self) -> List[Dict]:
|
|
189
189
|
url = f'{self.host_address}/model/info'
|
|
190
190
|
headers = {'Authorization': f'Bearer {self.service_key}'}
|
|
191
191
|
entries = []
|
|
@@ -66,20 +66,27 @@ pm.ensure_packages(["requests", "pillow", "psutil"]) # pillow for dummy image in
|
|
|
66
66
|
if not pm.is_installed("llama-cpp-binaries"):
|
|
67
67
|
def install_llama_cpp():
|
|
68
68
|
system = platform.system()
|
|
69
|
-
python_version_simple = f"py{sys.version_info.major}"
|
|
70
|
-
|
|
71
|
-
cuda_suffix = "+cu124"
|
|
69
|
+
python_version_simple = f"py{sys.version_info.major}{sys.version_info.minor}" # e.g. py310 for 3.10
|
|
72
70
|
|
|
71
|
+
version_tag = "v0.56.0"
|
|
72
|
+
cuda_suffix = "+cu124"
|
|
73
73
|
|
|
74
74
|
if system == "Windows":
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
# Try version-specific URL first
|
|
76
|
+
url = f"https://github.com/oobabooga/llama-cpp-binaries/releases/download/{version_tag}/llama_cpp_binaries-{version_tag.lstrip('v')}{cuda_suffix}-{python_version_simple}-none-win_amd64.whl"
|
|
77
|
+
# Fallback to generic py3 if version-specific doesn't exist
|
|
78
|
+
fallback_url = f"https://github.com/oobabooga/llama-cpp-binaries/releases/download/{version_tag}/llama_cpp_binaries-{version_tag.lstrip('v')}{cuda_suffix}-py3-none-win_amd64.whl"
|
|
77
79
|
elif system == "Linux":
|
|
78
|
-
|
|
79
|
-
|
|
80
|
+
# Try version-specific URL first
|
|
81
|
+
url = f"https://github.com/oobabooga/llama-cpp-binaries/releases/download/{version_tag}/llama_cpp_binaries-{version_tag.lstrip('v')}{cuda_suffix}-{python_version_simple}-none-linux_x86_64.whl"
|
|
82
|
+
# Fallback to generic py3 if version-specific doesn't exist
|
|
83
|
+
fallback_url = f"https://github.com/oobabooga/llama-cpp-binaries/releases/download/{version_tag}/llama_cpp_binaries-{version_tag.lstrip('v')}{cuda_suffix}-py3-none-linux_x86_64.whl"
|
|
80
84
|
else:
|
|
81
|
-
ASCIIColors.
|
|
82
|
-
|
|
85
|
+
ASCIIColors.error(f"Unsupported OS for precompiled llama-cpp-binaries: {system}. "
|
|
86
|
+
"You might need to set 'llama_server_binary_path' in the binding config "
|
|
87
|
+
"to point to a manually compiled llama.cpp server binary.")
|
|
88
|
+
return False
|
|
89
|
+
|
|
83
90
|
|
|
84
91
|
ASCIIColors.info(f"Attempting to install llama-cpp-binaries from: {url}")
|
|
85
92
|
try:
|
|
@@ -628,7 +635,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
628
635
|
|
|
629
636
|
if not model_to_load:
|
|
630
637
|
self._scan_models()
|
|
631
|
-
available_models = self.
|
|
638
|
+
available_models = self.list_models()
|
|
632
639
|
if not available_models:
|
|
633
640
|
ASCIIColors.error("No model specified and no GGUF models found in models path.")
|
|
634
641
|
return False
|
|
@@ -964,7 +971,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
964
971
|
|
|
965
972
|
ASCIIColors.info(f"Scanned {len(self._model_path_map)} models from {self.models_path}.")
|
|
966
973
|
|
|
967
|
-
def
|
|
974
|
+
def list_models(self) -> List[Dict[str, Any]]:
|
|
968
975
|
self._scan_models()
|
|
969
976
|
models_found = []
|
|
970
977
|
for unique_name, model_path in self._model_path_map.items():
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# bindings/
|
|
1
|
+
# bindings/lollms/__init__.py
|
|
2
2
|
import requests
|
|
3
3
|
import json
|
|
4
4
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
@@ -94,6 +94,7 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
94
94
|
service_key (str): Authentication key for the service. Defaults to None. This is a key generated
|
|
95
95
|
on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
|
|
96
96
|
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
97
|
+
certificate_file_path (str): Path to a specific certificate file for SSL verification.
|
|
97
98
|
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
98
99
|
"""
|
|
99
100
|
super().__init__(BindingName, **kwargs)
|
|
@@ -103,11 +104,16 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
103
104
|
self.model_name=kwargs.get("model_name")
|
|
104
105
|
self.service_key=kwargs.get("service_key")
|
|
105
106
|
self.verify_ssl_certificate=kwargs.get("verify_ssl_certificate", True)
|
|
107
|
+
self.certificate_file_path=kwargs.get("certificate_file_path")
|
|
106
108
|
self.default_completion_format=kwargs.get("default_completion_format", ELF_COMPLETION_FORMAT.Chat)
|
|
107
109
|
|
|
108
110
|
if not self.service_key:
|
|
109
111
|
self.service_key = os.getenv("LOLLMS_API_KEY", self.service_key)
|
|
110
|
-
|
|
112
|
+
|
|
113
|
+
# Determine verification strategy: specific file takes precedence, otherwise boolean flag
|
|
114
|
+
verify = self.certificate_file_path if self.certificate_file_path else self.verify_ssl_certificate
|
|
115
|
+
|
|
116
|
+
self.client = openai.OpenAI(api_key=self.service_key, base_url=None if self.host_address is None else self.host_address if len(self.host_address)>0 else None, http_client=httpx.Client(verify=verify))
|
|
111
117
|
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
112
118
|
|
|
113
119
|
def lollms_listMountedPersonalities(self, host_address:str|None=None):
|
|
@@ -149,6 +155,12 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
149
155
|
"stop", "max_tokens", "presence_penalty", "frequency_penalty",
|
|
150
156
|
"logit_bias", "stream", "user", "max_completion_tokens"
|
|
151
157
|
}
|
|
158
|
+
if kwargs.get("think", False):
|
|
159
|
+
allowed_params.append("reasoning")
|
|
160
|
+
kwargs["reasoning"]={
|
|
161
|
+
"effort": allowed_params.append("reasoning_effort", "low"),
|
|
162
|
+
"summary": allowed_params.append("reasoning_summary", "auto")
|
|
163
|
+
}
|
|
152
164
|
|
|
153
165
|
params = {
|
|
154
166
|
"model": model,
|
|
@@ -190,7 +202,11 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
190
202
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
191
203
|
split: Optional[bool] = False,
|
|
192
204
|
user_keyword: Optional[str] = "!@>user:",
|
|
193
|
-
ai_keyword: Optional[str] = "!@>assistant:"
|
|
205
|
+
ai_keyword: Optional[str] = "!@>assistant:",
|
|
206
|
+
think: Optional[bool] = False,
|
|
207
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
208
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
209
|
+
**kwargs
|
|
194
210
|
) -> Union[str, dict]:
|
|
195
211
|
|
|
196
212
|
count = 0
|
|
@@ -227,12 +243,16 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
227
243
|
temperature=temperature,
|
|
228
244
|
top_p=top_p,
|
|
229
245
|
repeat_penalty=repeat_penalty,
|
|
230
|
-
seed=seed
|
|
246
|
+
seed=seed,
|
|
247
|
+
think = think,
|
|
248
|
+
reasoning_effort=reasoning_effort,
|
|
249
|
+
reasoning_summary=reasoning_summary
|
|
250
|
+
)
|
|
231
251
|
try:
|
|
232
252
|
chat_completion = self.client.chat.completions.create(**params)
|
|
233
253
|
except Exception as ex:
|
|
234
254
|
# exception for new openai models
|
|
235
|
-
params["max_completion_tokens"]=params
|
|
255
|
+
params["max_completion_tokens"]=params.get("max_tokens") or params.get("max_completion_tokens") or self.default_ctx_size
|
|
236
256
|
params["temperature"]=1
|
|
237
257
|
try: del params["max_tokens"]
|
|
238
258
|
except Exception: pass
|
|
@@ -263,7 +283,10 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
263
283
|
temperature=temperature,
|
|
264
284
|
top_p=top_p,
|
|
265
285
|
repeat_penalty=repeat_penalty,
|
|
266
|
-
seed=seed
|
|
286
|
+
seed=seed,
|
|
287
|
+
think = think,
|
|
288
|
+
reasoning_effort=reasoning_effort,
|
|
289
|
+
reasoning_summary=reasoning_summary)
|
|
267
290
|
try:
|
|
268
291
|
completion = self.client.completions.create(**params)
|
|
269
292
|
except Exception as ex:
|
|
@@ -316,6 +339,9 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
316
339
|
n_threads: Optional[int] = None,
|
|
317
340
|
ctx_size: int | None = None,
|
|
318
341
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
342
|
+
think: Optional[bool] = False,
|
|
343
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
344
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
319
345
|
**kwargs
|
|
320
346
|
) -> Union[str, dict]:
|
|
321
347
|
# Build the request parameters
|
|
@@ -364,20 +390,22 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
364
390
|
return output
|
|
365
391
|
|
|
366
392
|
def chat(self,
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
393
|
+
discussion: LollmsDiscussion,
|
|
394
|
+
branch_tip_id: Optional[str] = None,
|
|
395
|
+
n_predict: Optional[int] = None,
|
|
396
|
+
stream: Optional[bool] = None,
|
|
397
|
+
temperature: float = 0.7,
|
|
398
|
+
top_k: int = 40,
|
|
399
|
+
top_p: float = 0.9,
|
|
400
|
+
repeat_penalty: float = 1.1,
|
|
401
|
+
repeat_last_n: int = 64,
|
|
402
|
+
seed: Optional[int] = None,
|
|
403
|
+
n_threads: Optional[int] = None,
|
|
404
|
+
ctx_size: Optional[int] = None,
|
|
405
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
406
|
+
think: Optional[bool] = False,
|
|
407
|
+
**kwargs
|
|
408
|
+
) -> Union[str, dict]:
|
|
381
409
|
"""
|
|
382
410
|
Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
|
|
383
411
|
|
|
@@ -564,7 +592,7 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
564
592
|
"model_name": self.model_name
|
|
565
593
|
}
|
|
566
594
|
|
|
567
|
-
def
|
|
595
|
+
def list_models(self) -> List[Dict]:
|
|
568
596
|
# Known context lengths
|
|
569
597
|
known_context_lengths = {
|
|
570
598
|
"gpt-4o": 128000,
|
|
@@ -643,3 +671,30 @@ class LollmsBinding(LollmsLLMBinding):
|
|
|
643
671
|
self.model = model_name
|
|
644
672
|
self.model_name = model_name
|
|
645
673
|
return True
|
|
674
|
+
|
|
675
|
+
def ps(self):
|
|
676
|
+
"""
|
|
677
|
+
List models (simulating a process status command).
|
|
678
|
+
Since Lollms/OpenAI API doesn't have a specific 'ps' endpoint for running models with memory stats,
|
|
679
|
+
we list available models and populate structure with available info, leaving hardware stats empty.
|
|
680
|
+
"""
|
|
681
|
+
# Since there is no dedicated ps endpoint to see *running* models in the standard OpenAI API,
|
|
682
|
+
# we list available models and try to map relevant info.
|
|
683
|
+
models = self.list_models()
|
|
684
|
+
standardized_models = []
|
|
685
|
+
for m in models:
|
|
686
|
+
standardized_models.append({
|
|
687
|
+
"model_name": m.get("model_name"),
|
|
688
|
+
"size": None,
|
|
689
|
+
"vram_size": None,
|
|
690
|
+
"gpu_usage_percent": None,
|
|
691
|
+
"cpu_usage_percent": None,
|
|
692
|
+
"expires_at": None,
|
|
693
|
+
"parameters_size": None,
|
|
694
|
+
"quantization_level": None,
|
|
695
|
+
"parent_model": None,
|
|
696
|
+
"context_size": m.get("context_length"),
|
|
697
|
+
"owned_by": m.get("owned_by"),
|
|
698
|
+
"created": m.get("created")
|
|
699
|
+
})
|
|
700
|
+
return standardized_models
|
|
@@ -224,7 +224,7 @@ class MistralBinding(LollmsLLMBinding):
|
|
|
224
224
|
"supports_vision": False, # Mistral API does not currently support vision
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
-
def
|
|
227
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
228
228
|
"""Lists available models from the Mistral service."""
|
|
229
229
|
if not self.client:
|
|
230
230
|
ASCIIColors.error("Mistral client not initialized. Cannot list models.")
|
|
@@ -273,7 +273,7 @@ if __name__ == '__main__':
|
|
|
273
273
|
|
|
274
274
|
# --- List Models ---
|
|
275
275
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
276
|
-
models = binding.
|
|
276
|
+
models = binding.list_models()
|
|
277
277
|
if models:
|
|
278
278
|
ASCIIColors.green(f"Found {len(models)} models on Mistral. Available models:")
|
|
279
279
|
for m in models:
|