lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +151 -32
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
- lollms_client/llm_bindings/ollama/__init__.py +309 -93
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +148 -29
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +465 -0
- lollms_client/llm_bindings/perplexity/__init__.py +326 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +516 -1890
- lollms_client/lollms_discussion.py +55 -18
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_personality.py +5 -2
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +115 -0
- lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
- lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client/ttm_bindings/bark/__init__.py +0 -339
- lollms_client-1.4.1.dist-info/RECORD +0 -78
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -14,6 +14,11 @@ import pipmaster as pm
|
|
|
14
14
|
from lollms_client.lollms_utilities import ImageTokenizer
|
|
15
15
|
pm.ensure_packages(["ollama","pillow","tiktoken"])
|
|
16
16
|
import re
|
|
17
|
+
import platform
|
|
18
|
+
import subprocess
|
|
19
|
+
import urllib.request
|
|
20
|
+
import zipfile
|
|
21
|
+
import os
|
|
17
22
|
|
|
18
23
|
import ollama
|
|
19
24
|
import tiktoken
|
|
@@ -57,7 +62,9 @@ def count_tokens_ollama(
|
|
|
57
62
|
res = ollama_client.chat(
|
|
58
63
|
model=model_name,
|
|
59
64
|
messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
|
|
60
|
-
stream=False,
|
|
65
|
+
stream=False,
|
|
66
|
+
think=False,
|
|
67
|
+
options={"num_predict":1}
|
|
61
68
|
)
|
|
62
69
|
|
|
63
70
|
return res.prompt_eval_count-5
|
|
@@ -108,24 +115,28 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
108
115
|
raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
|
|
109
116
|
|
|
110
117
|
def generate_text(self,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
118
|
+
prompt: str,
|
|
119
|
+
images: Optional[List[str]] = None,
|
|
120
|
+
system_prompt: str = "",
|
|
121
|
+
n_predict: Optional[int] = None,
|
|
122
|
+
stream: Optional[bool] = None,
|
|
123
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
124
|
+
top_k: int = 40, # Ollama default is 40
|
|
125
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
126
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
127
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
128
|
+
seed: Optional[int] = None,
|
|
129
|
+
n_threads: Optional[int] = None,
|
|
130
|
+
ctx_size: int | None = None,
|
|
131
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
132
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
133
|
+
user_keyword:Optional[str]="!@>user:",
|
|
134
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
135
|
+
think: Optional[bool] = False,
|
|
136
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
137
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
138
|
+
**kwargs
|
|
139
|
+
) -> Union[str, dict]:
|
|
129
140
|
"""
|
|
130
141
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
131
142
|
|
|
@@ -168,6 +179,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
168
179
|
if ctx_size is not None: options['num_ctx'] = ctx_size
|
|
169
180
|
|
|
170
181
|
full_response_text = ""
|
|
182
|
+
think = think if "gpt-oss" not in self.model_name else reasoning_effort
|
|
183
|
+
ASCIIColors.magenta(f"Generation with think: {think}")
|
|
171
184
|
|
|
172
185
|
try:
|
|
173
186
|
if images: # Multimodal
|
|
@@ -176,6 +189,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
176
189
|
for img_path in images:
|
|
177
190
|
# Assuming img_path is a file path. ollama-python will read and encode it.
|
|
178
191
|
# If images were base64 strings, they would need decoding to bytes first.
|
|
192
|
+
if img_path.startswith("data:image/png;base64,"):
|
|
193
|
+
img_path = img_path[len("data:image/png;base64,"):]
|
|
179
194
|
processed_images.append(img_path)
|
|
180
195
|
|
|
181
196
|
messages = [
|
|
@@ -192,24 +207,37 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
192
207
|
model=self.model_name,
|
|
193
208
|
messages=messages,
|
|
194
209
|
stream=True,
|
|
210
|
+
think=think,
|
|
195
211
|
options=options if options else None
|
|
196
212
|
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if
|
|
213
|
+
in_thinking = False
|
|
214
|
+
for chunk in response_stream:
|
|
215
|
+
if chunk.message.thinking and not in_thinking:
|
|
216
|
+
full_response_text += "<think>\n"
|
|
217
|
+
in_thinking = True
|
|
218
|
+
|
|
219
|
+
if chunk.message.content:# Ensure there is content to process
|
|
220
|
+
chunk_content = chunk.message.content
|
|
221
|
+
if in_thinking:
|
|
222
|
+
full_response_text += "\n</think>\n"
|
|
223
|
+
in_thinking = False
|
|
200
224
|
full_response_text += chunk_content
|
|
201
225
|
if streaming_callback:
|
|
202
226
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
203
227
|
break # Callback requested stop
|
|
204
228
|
return full_response_text
|
|
205
229
|
else: # Not streaming
|
|
206
|
-
|
|
230
|
+
response = self.ollama_client.chat(
|
|
207
231
|
model=self.model_name,
|
|
208
232
|
messages=messages,
|
|
209
233
|
stream=False,
|
|
234
|
+
think=think,
|
|
210
235
|
options=options if options else None
|
|
211
236
|
)
|
|
212
|
-
|
|
237
|
+
full_response_text = response.message.content
|
|
238
|
+
if think:
|
|
239
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
240
|
+
return full_response_text
|
|
213
241
|
else: # Text-only
|
|
214
242
|
messages = [
|
|
215
243
|
{'role': 'system', 'content':system_prompt},
|
|
@@ -224,24 +252,38 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
224
252
|
model=self.model_name,
|
|
225
253
|
messages=messages,
|
|
226
254
|
stream=True,
|
|
255
|
+
think=think,
|
|
227
256
|
options=options if options else None
|
|
228
257
|
)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if
|
|
258
|
+
in_thinking = False
|
|
259
|
+
for chunk in response_stream:
|
|
260
|
+
if chunk.message.thinking and not in_thinking:
|
|
261
|
+
full_response_text += "<think>\n"
|
|
262
|
+
in_thinking = True
|
|
263
|
+
|
|
264
|
+
if chunk.message.content:# Ensure there is content to process
|
|
265
|
+
chunk_content = chunk.message.content
|
|
266
|
+
if in_thinking:
|
|
267
|
+
full_response_text += "\n</think>\n"
|
|
268
|
+
in_thinking = False
|
|
232
269
|
full_response_text += chunk_content
|
|
233
270
|
if streaming_callback:
|
|
234
271
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
235
|
-
break
|
|
272
|
+
break # Callback requested stop
|
|
236
273
|
return full_response_text
|
|
237
274
|
else: # Not streaming
|
|
238
|
-
|
|
275
|
+
response = self.ollama_client.chat(
|
|
239
276
|
model=self.model_name,
|
|
240
277
|
messages=messages,
|
|
241
278
|
stream=False,
|
|
279
|
+
think=think,
|
|
242
280
|
options=options if options else None
|
|
243
281
|
)
|
|
244
|
-
|
|
282
|
+
full_response_text = response.message.content
|
|
283
|
+
if think:
|
|
284
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
285
|
+
return full_response_text
|
|
286
|
+
|
|
245
287
|
except ollama.ResponseError as e:
|
|
246
288
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
247
289
|
ASCIIColors.error(error_message)
|
|
@@ -268,6 +310,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
268
310
|
n_threads: Optional[int] = None,
|
|
269
311
|
ctx_size: int | None = None,
|
|
270
312
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
313
|
+
think: Optional[bool] = False,
|
|
314
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
315
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
271
316
|
**kwargs
|
|
272
317
|
) -> Union[str, dict]:
|
|
273
318
|
if not self.ollama_client:
|
|
@@ -296,15 +341,24 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
296
341
|
for item in content:
|
|
297
342
|
if item.get("type") == "text":
|
|
298
343
|
text_parts.append(item.get("text", ""))
|
|
299
|
-
elif item.get("type") == "image_url":
|
|
300
|
-
base64_data = item.get("image_url"
|
|
301
|
-
url = item.get("image_url", {}).get("url")
|
|
344
|
+
elif item.get("type") == "input_image" or item.get("type") == "image_url":
|
|
345
|
+
base64_data = item.get("image_url")
|
|
302
346
|
if base64_data:
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
347
|
+
if isinstance(base64_data, str):
|
|
348
|
+
# ⚠️ remove prefix "data:image/...;base64,"
|
|
349
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
|
|
350
|
+
images.append(cleaned)
|
|
351
|
+
elif base64_data and isinstance(base64_data, dict) :
|
|
352
|
+
if "base64" in base64_data:
|
|
353
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["base64"])
|
|
354
|
+
images.append(cleaned)
|
|
355
|
+
elif "url" in base64_data :
|
|
356
|
+
if "http" in base64_data["url"]:
|
|
357
|
+
images.append(base64_data["url"])
|
|
358
|
+
else:
|
|
359
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["url"])
|
|
360
|
+
images.append(cleaned)
|
|
361
|
+
|
|
308
362
|
|
|
309
363
|
return {
|
|
310
364
|
"role": role,
|
|
@@ -335,6 +389,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
335
389
|
model=self.model_name,
|
|
336
390
|
messages=ollama_messages,
|
|
337
391
|
stream=True,
|
|
392
|
+
think = think,
|
|
338
393
|
options=options if options else None
|
|
339
394
|
)
|
|
340
395
|
for chunk_dict in response_stream:
|
|
@@ -346,13 +401,17 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
346
401
|
break
|
|
347
402
|
return full_response_text
|
|
348
403
|
else:
|
|
349
|
-
|
|
404
|
+
response = self.ollama_client.chat(
|
|
350
405
|
model=self.model_name,
|
|
351
406
|
messages=ollama_messages,
|
|
352
407
|
stream=False,
|
|
408
|
+
think=think if "gpt-oss" not in self.model_name else reasoning_effort,
|
|
353
409
|
options=options if options else None
|
|
354
410
|
)
|
|
355
|
-
|
|
411
|
+
full_response_text = response.message.content
|
|
412
|
+
if think:
|
|
413
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
414
|
+
return full_response_text
|
|
356
415
|
|
|
357
416
|
except ollama.ResponseError as e:
|
|
358
417
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
@@ -366,37 +425,28 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
366
425
|
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
367
426
|
trace_exception(ex)
|
|
368
427
|
return {"status": False, "error": error_message}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
except ollama.ResponseError as e:
|
|
372
|
-
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
373
|
-
ASCIIColors.error(error_message)
|
|
374
|
-
return {"status": False, "error": error_message, "status_code": e.status_code}
|
|
375
|
-
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
376
|
-
error_message = f"Ollama API RequestError: {str(e)}"
|
|
377
|
-
ASCIIColors.error(error_message)
|
|
378
|
-
return {"status": False, "error": error_message}
|
|
379
|
-
except Exception as ex:
|
|
380
|
-
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
381
|
-
trace_exception(ex)
|
|
382
|
-
return {"status": False, "error": error_message}
|
|
383
428
|
|
|
384
429
|
|
|
385
430
|
def chat(self,
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
431
|
+
discussion: LollmsDiscussion,
|
|
432
|
+
branch_tip_id: Optional[str] = None,
|
|
433
|
+
n_predict: Optional[int] = None,
|
|
434
|
+
stream: Optional[bool] = None,
|
|
435
|
+
temperature: float = 0.7,
|
|
436
|
+
top_k: int = 40,
|
|
437
|
+
top_p: float = 0.9,
|
|
438
|
+
repeat_penalty: float = 1.1,
|
|
439
|
+
repeat_last_n: int = 64,
|
|
440
|
+
seed: Optional[int] = None,
|
|
441
|
+
n_threads: Optional[int] = None,
|
|
442
|
+
ctx_size: Optional[int] = None,
|
|
443
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
444
|
+
think: Optional[bool] = False,
|
|
445
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
446
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
447
|
+
**kwargs
|
|
448
|
+
|
|
449
|
+
) -> Union[str, dict]:
|
|
400
450
|
"""
|
|
401
451
|
Conduct a chat session with the Ollama model using a LollmsDiscussion object.
|
|
402
452
|
|
|
@@ -441,6 +491,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
441
491
|
options = {k: v for k, v in options.items() if v is not None}
|
|
442
492
|
|
|
443
493
|
full_response_text = ""
|
|
494
|
+
think = think if "gpt-oss" not in self.model_name else reasoning_effort
|
|
495
|
+
ASCIIColors.magenta(f"Generation with think: {think}")
|
|
444
496
|
|
|
445
497
|
try:
|
|
446
498
|
# 3. Call the Ollama API
|
|
@@ -449,24 +501,38 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
449
501
|
model=self.model_name,
|
|
450
502
|
messages=messages,
|
|
451
503
|
stream=True,
|
|
504
|
+
think=think,
|
|
452
505
|
options=options if options else None
|
|
453
506
|
)
|
|
507
|
+
in_thinking = False
|
|
454
508
|
for chunk in response_stream:
|
|
455
|
-
|
|
456
|
-
|
|
509
|
+
if chunk.message.thinking and not in_thinking:
|
|
510
|
+
full_response_text += "<think>\n"
|
|
511
|
+
in_thinking = True
|
|
512
|
+
|
|
513
|
+
if chunk.message.content:# Ensure there is content to process
|
|
514
|
+
chunk_content = chunk.message.content
|
|
515
|
+
if in_thinking:
|
|
516
|
+
full_response_text += "\n</think>\n"
|
|
517
|
+
in_thinking = False
|
|
457
518
|
full_response_text += chunk_content
|
|
458
519
|
if streaming_callback:
|
|
459
520
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
460
|
-
break
|
|
521
|
+
break # Callback requested stop
|
|
522
|
+
|
|
461
523
|
return full_response_text
|
|
462
524
|
else: # Not streaming
|
|
463
|
-
|
|
525
|
+
response = self.ollama_client.chat(
|
|
464
526
|
model=self.model_name,
|
|
465
527
|
messages=messages,
|
|
466
528
|
stream=False,
|
|
529
|
+
think=think,
|
|
467
530
|
options=options if options else None
|
|
468
531
|
)
|
|
469
|
-
|
|
532
|
+
full_response_text = response.message.content
|
|
533
|
+
if think:
|
|
534
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
535
|
+
return full_response_text
|
|
470
536
|
|
|
471
537
|
except ollama.ResponseError as e:
|
|
472
538
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
@@ -597,7 +663,144 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
597
663
|
"supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
|
|
598
664
|
}
|
|
599
665
|
|
|
600
|
-
def
|
|
666
|
+
def pull_model(self, model_name: str, progress_callback: Callable[[dict], None] = None, **kwargs) -> dict:
|
|
667
|
+
"""
|
|
668
|
+
Pulls a model from the Ollama library.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
model_name (str): The name of the model to pull.
|
|
672
|
+
progress_callback (Callable[[dict], None], optional): A callback function that receives progress updates.
|
|
673
|
+
The dict typically contains 'status', 'completed', 'total'.
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
dict: Dictionary with status (bool) and message (str).
|
|
677
|
+
"""
|
|
678
|
+
if not self.ollama_client:
|
|
679
|
+
msg = "Ollama client not initialized. Cannot pull model."
|
|
680
|
+
ASCIIColors.error(msg)
|
|
681
|
+
return {"status": False, "message": msg}
|
|
682
|
+
|
|
683
|
+
try:
|
|
684
|
+
ASCIIColors.info(f"Pulling model {model_name}...")
|
|
685
|
+
# Stream the pull progress
|
|
686
|
+
for progress in self.ollama_client.pull(model_name, stream=True):
|
|
687
|
+
# Send raw progress to callback if provided
|
|
688
|
+
if progress_callback:
|
|
689
|
+
progress_callback(progress)
|
|
690
|
+
|
|
691
|
+
# Default console logging
|
|
692
|
+
status = progress.get('status', '')
|
|
693
|
+
completed = progress.get('completed')
|
|
694
|
+
total = progress.get('total')
|
|
695
|
+
|
|
696
|
+
if completed and total:
|
|
697
|
+
percent = (completed / total) * 100
|
|
698
|
+
print(f"\r{status}: {percent:.2f}%", end="", flush=True)
|
|
699
|
+
else:
|
|
700
|
+
print(f"\r{status}", end="", flush=True)
|
|
701
|
+
|
|
702
|
+
print() # Clear line
|
|
703
|
+
msg = f"Model {model_name} pulled successfully."
|
|
704
|
+
ASCIIColors.success(msg)
|
|
705
|
+
return {"status": True, "message": msg}
|
|
706
|
+
|
|
707
|
+
except ollama.ResponseError as e:
|
|
708
|
+
msg = f"Ollama API Pull Error: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
709
|
+
ASCIIColors.error(msg)
|
|
710
|
+
return {"status": False, "message": msg}
|
|
711
|
+
except ollama.RequestError as e:
|
|
712
|
+
msg = f"Ollama API Request Error: {str(e)}"
|
|
713
|
+
ASCIIColors.error(msg)
|
|
714
|
+
return {"status": False, "message": msg}
|
|
715
|
+
except Exception as ex:
|
|
716
|
+
msg = f"An unexpected error occurred while pulling model: {str(ex)}"
|
|
717
|
+
ASCIIColors.error(msg)
|
|
718
|
+
trace_exception(ex)
|
|
719
|
+
return {"status": False, "message": msg}
|
|
720
|
+
|
|
721
|
+
def install_ollama(self, callback: Callable[[dict], None] = None, **kwargs) -> dict:
|
|
722
|
+
"""
|
|
723
|
+
Installs Ollama based on the operating system.
|
|
724
|
+
"""
|
|
725
|
+
system = platform.system()
|
|
726
|
+
|
|
727
|
+
def report_progress(status, message, completed=0, total=100):
|
|
728
|
+
if callback:
|
|
729
|
+
callback({"status": status, "message": message, "completed": completed, "total": total})
|
|
730
|
+
else:
|
|
731
|
+
print(f"{status}: {message}")
|
|
732
|
+
|
|
733
|
+
try:
|
|
734
|
+
if system == "Linux":
|
|
735
|
+
report_progress("working", "Detected Linux. Running installation script...", 10, 100)
|
|
736
|
+
# Use the official install script
|
|
737
|
+
cmd = "curl -fsSL https://ollama.com/install.sh | sh"
|
|
738
|
+
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
739
|
+
stdout, stderr = process.communicate()
|
|
740
|
+
|
|
741
|
+
if process.returncode == 0:
|
|
742
|
+
report_progress("success", "Ollama installed successfully on Linux.", 100, 100)
|
|
743
|
+
return {"status": True, "message": "Ollama installed successfully."}
|
|
744
|
+
else:
|
|
745
|
+
msg = f"Installation failed: {stderr}"
|
|
746
|
+
report_progress("error", msg, 0, 0)
|
|
747
|
+
return {"status": False, "error": msg}
|
|
748
|
+
|
|
749
|
+
elif system == "Windows":
|
|
750
|
+
report_progress("working", "Detected Windows. Downloading OllamaSetup.exe...", 10, 100)
|
|
751
|
+
url = "https://ollama.com/download/OllamaSetup.exe"
|
|
752
|
+
filename = "OllamaSetup.exe"
|
|
753
|
+
|
|
754
|
+
# Download with progress
|
|
755
|
+
try:
|
|
756
|
+
def dl_callback(count, block_size, total_size):
|
|
757
|
+
percent = int(count * block_size * 100 / total_size)
|
|
758
|
+
report_progress("working", f"Downloading... {percent}%", percent, 100)
|
|
759
|
+
|
|
760
|
+
urllib.request.urlretrieve(url, filename, dl_callback)
|
|
761
|
+
except Exception as e:
|
|
762
|
+
return {"status": False, "error": f"Failed to download installer: {e}"}
|
|
763
|
+
|
|
764
|
+
report_progress("working", "Running installer...", 90, 100)
|
|
765
|
+
try:
|
|
766
|
+
subprocess.run([filename], check=True) # Runs the installer GUI
|
|
767
|
+
# We can't easily wait for the GUI installer to finish unless we block or it has silent flags.
|
|
768
|
+
# Ollama installer is usually simple.
|
|
769
|
+
report_progress("success", "Installer launched. Please complete the installation.", 100, 100)
|
|
770
|
+
return {"status": True, "message": "Installer launched."}
|
|
771
|
+
except Exception as e:
|
|
772
|
+
return {"status": False, "error": f"Failed to launch installer: {e}"}
|
|
773
|
+
|
|
774
|
+
elif system == "Darwin": # macOS
|
|
775
|
+
report_progress("working", "Detected macOS. Downloading Ollama...", 10, 100)
|
|
776
|
+
url = "https://ollama.com/download/Ollama-darwin.zip"
|
|
777
|
+
filename = "Ollama-darwin.zip"
|
|
778
|
+
|
|
779
|
+
# Download with progress
|
|
780
|
+
try:
|
|
781
|
+
def dl_callback(count, block_size, total_size):
|
|
782
|
+
percent = int(count * block_size * 100 / total_size)
|
|
783
|
+
report_progress("working", f"Downloading... {percent}%", percent, 100)
|
|
784
|
+
|
|
785
|
+
urllib.request.urlretrieve(url, filename, dl_callback)
|
|
786
|
+
except Exception as e:
|
|
787
|
+
return {"status": False, "error": f"Failed to download: {e}"}
|
|
788
|
+
|
|
789
|
+
report_progress("working", "Unzipping...", 80, 100)
|
|
790
|
+
with zipfile.ZipFile(filename, 'r') as zip_ref:
|
|
791
|
+
zip_ref.extractall("Ollama_Install")
|
|
792
|
+
|
|
793
|
+
report_progress("success", "Ollama downloaded and extracted to 'Ollama_Install'. Please move 'Ollama.app' to Applications.", 100, 100)
|
|
794
|
+
return {"status": True, "message": "Downloaded and extracted. Please install Ollama.app manually."}
|
|
795
|
+
|
|
796
|
+
else:
|
|
797
|
+
return {"status": False, "error": f"Unsupported OS: {system}"}
|
|
798
|
+
|
|
799
|
+
except Exception as e:
|
|
800
|
+
trace_exception(e)
|
|
801
|
+
return {"status": False, "error": str(e)}
|
|
802
|
+
|
|
803
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
601
804
|
"""
|
|
602
805
|
Lists available models from the Ollama service using the ollama-python library.
|
|
603
806
|
The returned list of dictionaries matches the format of the original template.
|
|
@@ -623,10 +826,10 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
623
826
|
})
|
|
624
827
|
return model_info_list
|
|
625
828
|
except ollama.ResponseError as e:
|
|
626
|
-
ASCIIColors.error(f"Ollama API
|
|
829
|
+
ASCIIColors.error(f"Ollama API list_models ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
|
|
627
830
|
return []
|
|
628
831
|
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
629
|
-
ASCIIColors.error(f"Ollama API
|
|
832
|
+
ASCIIColors.error(f"Ollama API list_models RequestError: {str(e)} from {self.host_address}")
|
|
630
833
|
return []
|
|
631
834
|
except Exception as ex:
|
|
632
835
|
trace_exception(ex)
|
|
@@ -660,6 +863,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
660
863
|
"""
|
|
661
864
|
if model_name is None:
|
|
662
865
|
model_name = self.model_name
|
|
866
|
+
if not model_name:
|
|
867
|
+
ASCIIColors.warning("Model name not specified and no default model set.")
|
|
868
|
+
return None
|
|
663
869
|
|
|
664
870
|
try:
|
|
665
871
|
info = ollama.show(model_name)
|
|
@@ -694,6 +900,12 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
694
900
|
'llama3.1': 131072, # Llama 3.1 extended context
|
|
695
901
|
'llama3.2': 131072, # Llama 3.2 extended context
|
|
696
902
|
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
903
|
+
'gpt-oss:20b': 16000, # GPT-OSS extended
|
|
904
|
+
'gpt-oss:120b': 128000, # GPT-OSS extended
|
|
905
|
+
'codestral': 256000, # Codestral
|
|
906
|
+
'mistralai-medium': 128000, # Mistral medium
|
|
907
|
+
'mistralai-mini': 128000, # Mistral medium
|
|
908
|
+
'ministral': 256000, # Mistral medium
|
|
697
909
|
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
698
910
|
'mixtral': 32768, # Mixtral 8x7B default
|
|
699
911
|
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
@@ -706,6 +918,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
706
918
|
'qwen': 8192, # Qwen default
|
|
707
919
|
'qwen2': 32768, # Qwen2 default for 7B
|
|
708
920
|
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
921
|
+
'qwen3': 128000, # Qwen3 with 128k
|
|
922
|
+
'qwen3-vl': 128000, # Qwen3-vl with 128k
|
|
923
|
+
'qwen3-coder': 256000, # Qwen3 with 256k
|
|
709
924
|
'codellama': 16384, # CodeLlama extended
|
|
710
925
|
'codegemma': 8192, # CodeGemma default
|
|
711
926
|
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
@@ -726,6 +941,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
726
941
|
'orca2': 4096, # Orca 2 default
|
|
727
942
|
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
728
943
|
'openhermes': 8192, # OpenHermes default
|
|
944
|
+
'gemini-3': 1000000, # Gemini 3 is a beast with 1M tokens
|
|
729
945
|
}
|
|
730
946
|
|
|
731
947
|
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
@@ -749,18 +965,6 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
749
965
|
Returns:
|
|
750
966
|
list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
|
|
751
967
|
Returns an empty list if the client is not initialized or if an error occurs.
|
|
752
|
-
|
|
753
|
-
Example of a returned model dictionary:
|
|
754
|
-
{
|
|
755
|
-
"model_name": "gemma3:12b",
|
|
756
|
-
"size": 13861175232,
|
|
757
|
-
"vram_size": 10961479680,
|
|
758
|
-
"parameters_size": "12.2B",
|
|
759
|
-
"quantization_level": "Q4_K_M",
|
|
760
|
-
"context_size": 32000,
|
|
761
|
-
"parent_model": "",
|
|
762
|
-
"expires_at": "2025-08-20T22:28:18.6708784+02:00"
|
|
763
|
-
}
|
|
764
968
|
"""
|
|
765
969
|
if not self.ollama_client:
|
|
766
970
|
ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
|
|
@@ -775,10 +979,22 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
775
979
|
for model_data in models_list:
|
|
776
980
|
details = model_data.get('details', {})
|
|
777
981
|
|
|
982
|
+
size = model_data.get("size", 0)
|
|
983
|
+
size_vram = model_data.get("size_vram", 0)
|
|
984
|
+
|
|
985
|
+
# Calculate spread
|
|
986
|
+
gpu_usage = 0
|
|
987
|
+
cpu_usage = 0
|
|
988
|
+
if size > 0:
|
|
989
|
+
gpu_usage = min(100, (size_vram / size) * 100)
|
|
990
|
+
cpu_usage = max(0, 100 - gpu_usage)
|
|
991
|
+
|
|
778
992
|
flat_model_info = {
|
|
779
993
|
"model_name": model_data.get("name"),
|
|
780
|
-
"size":
|
|
781
|
-
"vram_size":
|
|
994
|
+
"size": size,
|
|
995
|
+
"vram_size": size_vram,
|
|
996
|
+
"gpu_usage_percent": round(gpu_usage, 2),
|
|
997
|
+
"cpu_usage_percent": round(cpu_usage, 2),
|
|
782
998
|
"expires_at": model_data.get("expires_at"),
|
|
783
999
|
"parameters_size": details.get("parameter_size"),
|
|
784
1000
|
"quantization_level": details.get("quantization_level"),
|
|
@@ -815,7 +1031,7 @@ if __name__ == '__main__':
|
|
|
815
1031
|
|
|
816
1032
|
# --- List Models ---
|
|
817
1033
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
818
|
-
models = binding.
|
|
1034
|
+
models = binding.list_models()
|
|
819
1035
|
if models:
|
|
820
1036
|
ASCIIColors.green(f"Found {len(models)} models. First 5:")
|
|
821
1037
|
for m in models[:5]:
|
|
@@ -846,7 +1062,7 @@ if __name__ == '__main__':
|
|
|
846
1062
|
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
847
1063
|
prompt_text = "Why is the sky blue?"
|
|
848
1064
|
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
849
|
-
generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
|
|
1065
|
+
generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False, think=False)
|
|
850
1066
|
if isinstance(generated_text, str):
|
|
851
1067
|
ASCIIColors.green(f"Generated text: {generated_text}")
|
|
852
1068
|
else:
|
|
@@ -941,4 +1157,4 @@ if __name__ == '__main__':
|
|
|
941
1157
|
ASCIIColors.error(f"An error occurred during testing: {e}")
|
|
942
1158
|
trace_exception(e)
|
|
943
1159
|
|
|
944
|
-
ASCIIColors.yellow("\nOllamaBinding test finished.")
|
|
1160
|
+
ASCIIColors.yellow("\nOllamaBinding test finished.")
|
|
@@ -227,7 +227,7 @@ class OpenRouterBinding(LollmsLLMBinding):
|
|
|
227
227
|
"supports_vision": "Depends on the specific model selected. This generic binding does not support vision.",
|
|
228
228
|
}
|
|
229
229
|
|
|
230
|
-
def
|
|
230
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
231
231
|
"""Lists available models from the OpenRouter service."""
|
|
232
232
|
if not self.client:
|
|
233
233
|
ASCIIColors.error("OpenRouter client not initialized. Cannot list models.")
|
|
@@ -274,7 +274,7 @@ if __name__ == '__main__':
|
|
|
274
274
|
|
|
275
275
|
# --- List Models ---
|
|
276
276
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
277
|
-
models = binding.
|
|
277
|
+
models = binding.list_models()
|
|
278
278
|
if models:
|
|
279
279
|
ASCIIColors.green(f"Successfully fetched {len(models)} models from OpenRouter.")
|
|
280
280
|
ASCIIColors.info("Sample of available models:")
|