lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -35
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -15
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +76 -21
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
- lollms_client/llm_bindings/ollama/__init__.py +345 -89
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +81 -20
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +333 -171
- lollms_client/llm_bindings/perplexity/__init__.py +2 -2
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +512 -1890
- lollms_client/lollms_discussion.py +65 -39
- lollms_client/lollms_llm_binding.py +126 -261
- lollms_client/lollms_mcp_binding.py +49 -77
- lollms_client/lollms_stt_binding.py +99 -52
- lollms_client/lollms_tti_binding.py +38 -38
- lollms_client/lollms_ttm_binding.py +38 -42
- lollms_client/lollms_tts_binding.py +43 -18
- lollms_client/lollms_ttv_binding.py +38 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +464 -803
- lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +7 -4
- lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
- lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
- lollms_client-1.7.13.dist-info/RECORD +90 -0
- lollms_client-1.5.6.dist-info/RECORD +0 -87
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0
|
@@ -7,13 +7,18 @@ from lollms_client.lollms_types import MSG_TYPE
|
|
|
7
7
|
# from lollms_client.lollms_utilities import encode_image
|
|
8
8
|
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
9
9
|
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
10
|
-
from typing import Optional, Callable, List, Union, Dict
|
|
10
|
+
from typing import Optional, Callable, List, Union, Dict, Any
|
|
11
11
|
|
|
12
12
|
from ascii_colors import ASCIIColors, trace_exception
|
|
13
13
|
import pipmaster as pm
|
|
14
14
|
from lollms_client.lollms_utilities import ImageTokenizer
|
|
15
15
|
pm.ensure_packages(["ollama","pillow","tiktoken"])
|
|
16
16
|
import re
|
|
17
|
+
import platform
|
|
18
|
+
import subprocess
|
|
19
|
+
import urllib.request
|
|
20
|
+
import zipfile
|
|
21
|
+
import os
|
|
17
22
|
|
|
18
23
|
import ollama
|
|
19
24
|
import tiktoken
|
|
@@ -57,7 +62,9 @@ def count_tokens_ollama(
|
|
|
57
62
|
res = ollama_client.chat(
|
|
58
63
|
model=model_name,
|
|
59
64
|
messages=[{"role":"system","content":""},{"role":"user", "content":text_to_tokenize}],
|
|
60
|
-
stream=False,
|
|
65
|
+
stream=False,
|
|
66
|
+
think=False,
|
|
67
|
+
options={"num_predict":1}
|
|
61
68
|
)
|
|
62
69
|
|
|
63
70
|
return res.prompt_eval_count-5
|
|
@@ -108,24 +115,28 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
108
115
|
raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
|
|
109
116
|
|
|
110
117
|
def generate_text(self,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
118
|
+
prompt: str,
|
|
119
|
+
images: Optional[List[str]] = None,
|
|
120
|
+
system_prompt: str = "",
|
|
121
|
+
n_predict: Optional[int] = None,
|
|
122
|
+
stream: Optional[bool] = None,
|
|
123
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
124
|
+
top_k: int = 40, # Ollama default is 40
|
|
125
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
126
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
127
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
128
|
+
seed: Optional[int] = None,
|
|
129
|
+
n_threads: Optional[int] = None,
|
|
130
|
+
ctx_size: int | None = None,
|
|
131
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
132
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
133
|
+
user_keyword:Optional[str]="!@>user:",
|
|
134
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
135
|
+
think: Optional[bool] = False,
|
|
136
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
137
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
138
|
+
**kwargs
|
|
139
|
+
) -> Union[str, dict]:
|
|
129
140
|
"""
|
|
130
141
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
131
142
|
|
|
@@ -168,6 +179,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
168
179
|
if ctx_size is not None: options['num_ctx'] = ctx_size
|
|
169
180
|
|
|
170
181
|
full_response_text = ""
|
|
182
|
+
think = think if "gpt-oss" not in self.model_name else reasoning_effort
|
|
183
|
+
ASCIIColors.magenta(f"Generation with think: {think}")
|
|
171
184
|
|
|
172
185
|
try:
|
|
173
186
|
if images: # Multimodal
|
|
@@ -176,6 +189,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
176
189
|
for img_path in images:
|
|
177
190
|
# Assuming img_path is a file path. ollama-python will read and encode it.
|
|
178
191
|
# If images were base64 strings, they would need decoding to bytes first.
|
|
192
|
+
if img_path.startswith("data:image/png;base64,"):
|
|
193
|
+
img_path = img_path[len("data:image/png;base64,"):]
|
|
179
194
|
processed_images.append(img_path)
|
|
180
195
|
|
|
181
196
|
messages = [
|
|
@@ -192,24 +207,37 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
192
207
|
model=self.model_name,
|
|
193
208
|
messages=messages,
|
|
194
209
|
stream=True,
|
|
210
|
+
think=think,
|
|
195
211
|
options=options if options else None
|
|
196
212
|
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if
|
|
213
|
+
in_thinking = False
|
|
214
|
+
for chunk in response_stream:
|
|
215
|
+
if chunk.message.thinking and not in_thinking:
|
|
216
|
+
full_response_text += "<think>\n"
|
|
217
|
+
in_thinking = True
|
|
218
|
+
|
|
219
|
+
if chunk.message.content:# Ensure there is content to process
|
|
220
|
+
chunk_content = chunk.message.content
|
|
221
|
+
if in_thinking:
|
|
222
|
+
full_response_text += "\n</think>\n"
|
|
223
|
+
in_thinking = False
|
|
200
224
|
full_response_text += chunk_content
|
|
201
225
|
if streaming_callback:
|
|
202
226
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
203
227
|
break # Callback requested stop
|
|
204
228
|
return full_response_text
|
|
205
229
|
else: # Not streaming
|
|
206
|
-
|
|
230
|
+
response = self.ollama_client.chat(
|
|
207
231
|
model=self.model_name,
|
|
208
232
|
messages=messages,
|
|
209
233
|
stream=False,
|
|
234
|
+
think=think,
|
|
210
235
|
options=options if options else None
|
|
211
236
|
)
|
|
212
|
-
|
|
237
|
+
full_response_text = response.message.content
|
|
238
|
+
if think:
|
|
239
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
240
|
+
return full_response_text
|
|
213
241
|
else: # Text-only
|
|
214
242
|
messages = [
|
|
215
243
|
{'role': 'system', 'content':system_prompt},
|
|
@@ -224,24 +252,38 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
224
252
|
model=self.model_name,
|
|
225
253
|
messages=messages,
|
|
226
254
|
stream=True,
|
|
255
|
+
think=think,
|
|
227
256
|
options=options if options else None
|
|
228
257
|
)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if
|
|
258
|
+
in_thinking = False
|
|
259
|
+
for chunk in response_stream:
|
|
260
|
+
if chunk.message.thinking and not in_thinking:
|
|
261
|
+
full_response_text += "<think>\n"
|
|
262
|
+
in_thinking = True
|
|
263
|
+
|
|
264
|
+
if chunk.message.content:# Ensure there is content to process
|
|
265
|
+
chunk_content = chunk.message.content
|
|
266
|
+
if in_thinking:
|
|
267
|
+
full_response_text += "\n</think>\n"
|
|
268
|
+
in_thinking = False
|
|
232
269
|
full_response_text += chunk_content
|
|
233
270
|
if streaming_callback:
|
|
234
271
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
235
|
-
break
|
|
272
|
+
break # Callback requested stop
|
|
236
273
|
return full_response_text
|
|
237
274
|
else: # Not streaming
|
|
238
|
-
|
|
275
|
+
response = self.ollama_client.chat(
|
|
239
276
|
model=self.model_name,
|
|
240
277
|
messages=messages,
|
|
241
278
|
stream=False,
|
|
279
|
+
think=think,
|
|
242
280
|
options=options if options else None
|
|
243
281
|
)
|
|
244
|
-
|
|
282
|
+
full_response_text = response.message.content
|
|
283
|
+
if think:
|
|
284
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
285
|
+
return full_response_text
|
|
286
|
+
|
|
245
287
|
except ollama.ResponseError as e:
|
|
246
288
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
247
289
|
ASCIIColors.error(error_message)
|
|
@@ -268,6 +310,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
268
310
|
n_threads: Optional[int] = None,
|
|
269
311
|
ctx_size: int | None = None,
|
|
270
312
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
313
|
+
think: Optional[bool] = False,
|
|
314
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
315
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
271
316
|
**kwargs
|
|
272
317
|
) -> Union[str, dict]:
|
|
273
318
|
if not self.ollama_client:
|
|
@@ -296,12 +341,23 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
296
341
|
for item in content:
|
|
297
342
|
if item.get("type") == "text":
|
|
298
343
|
text_parts.append(item.get("text", ""))
|
|
299
|
-
elif item.get("type") == "input_image":
|
|
344
|
+
elif item.get("type") == "input_image" or item.get("type") == "image_url":
|
|
300
345
|
base64_data = item.get("image_url")
|
|
301
346
|
if base64_data:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
347
|
+
if isinstance(base64_data, str):
|
|
348
|
+
# ⚠️ remove prefix "data:image/...;base64,"
|
|
349
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data)
|
|
350
|
+
images.append(cleaned)
|
|
351
|
+
elif base64_data and isinstance(base64_data, dict) :
|
|
352
|
+
if "base64" in base64_data:
|
|
353
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["base64"])
|
|
354
|
+
images.append(cleaned)
|
|
355
|
+
elif "url" in base64_data :
|
|
356
|
+
if "http" in base64_data["url"]:
|
|
357
|
+
images.append(base64_data["url"])
|
|
358
|
+
else:
|
|
359
|
+
cleaned = re.sub(r"^data:image/[^;]+;base64,", "", base64_data["url"])
|
|
360
|
+
images.append(cleaned)
|
|
305
361
|
|
|
306
362
|
|
|
307
363
|
return {
|
|
@@ -333,6 +389,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
333
389
|
model=self.model_name,
|
|
334
390
|
messages=ollama_messages,
|
|
335
391
|
stream=True,
|
|
392
|
+
think = think,
|
|
336
393
|
options=options if options else None
|
|
337
394
|
)
|
|
338
395
|
for chunk_dict in response_stream:
|
|
@@ -344,13 +401,17 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
344
401
|
break
|
|
345
402
|
return full_response_text
|
|
346
403
|
else:
|
|
347
|
-
|
|
404
|
+
response = self.ollama_client.chat(
|
|
348
405
|
model=self.model_name,
|
|
349
406
|
messages=ollama_messages,
|
|
350
407
|
stream=False,
|
|
408
|
+
think=think if "gpt-oss" not in self.model_name else reasoning_effort,
|
|
351
409
|
options=options if options else None
|
|
352
410
|
)
|
|
353
|
-
|
|
411
|
+
full_response_text = response.message.content
|
|
412
|
+
if think:
|
|
413
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
414
|
+
return full_response_text
|
|
354
415
|
|
|
355
416
|
except ollama.ResponseError as e:
|
|
356
417
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
@@ -364,37 +425,28 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
364
425
|
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
365
426
|
trace_exception(ex)
|
|
366
427
|
return {"status": False, "error": error_message}
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
except ollama.ResponseError as e:
|
|
370
|
-
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
371
|
-
ASCIIColors.error(error_message)
|
|
372
|
-
return {"status": False, "error": error_message, "status_code": e.status_code}
|
|
373
|
-
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
374
|
-
error_message = f"Ollama API RequestError: {str(e)}"
|
|
375
|
-
ASCIIColors.error(error_message)
|
|
376
|
-
return {"status": False, "error": error_message}
|
|
377
|
-
except Exception as ex:
|
|
378
|
-
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
379
|
-
trace_exception(ex)
|
|
380
|
-
return {"status": False, "error": error_message}
|
|
381
428
|
|
|
382
429
|
|
|
383
430
|
def chat(self,
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
431
|
+
discussion: LollmsDiscussion,
|
|
432
|
+
branch_tip_id: Optional[str] = None,
|
|
433
|
+
n_predict: Optional[int] = None,
|
|
434
|
+
stream: Optional[bool] = None,
|
|
435
|
+
temperature: float = 0.7,
|
|
436
|
+
top_k: int = 40,
|
|
437
|
+
top_p: float = 0.9,
|
|
438
|
+
repeat_penalty: float = 1.1,
|
|
439
|
+
repeat_last_n: int = 64,
|
|
440
|
+
seed: Optional[int] = None,
|
|
441
|
+
n_threads: Optional[int] = None,
|
|
442
|
+
ctx_size: Optional[int] = None,
|
|
443
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
444
|
+
think: Optional[bool] = False,
|
|
445
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
446
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
447
|
+
**kwargs
|
|
448
|
+
|
|
449
|
+
) -> Union[str, dict]:
|
|
398
450
|
"""
|
|
399
451
|
Conduct a chat session with the Ollama model using a LollmsDiscussion object.
|
|
400
452
|
|
|
@@ -439,6 +491,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
439
491
|
options = {k: v for k, v in options.items() if v is not None}
|
|
440
492
|
|
|
441
493
|
full_response_text = ""
|
|
494
|
+
think = think if "gpt-oss" not in self.model_name else reasoning_effort
|
|
495
|
+
ASCIIColors.magenta(f"Generation with think: {think}")
|
|
442
496
|
|
|
443
497
|
try:
|
|
444
498
|
# 3. Call the Ollama API
|
|
@@ -447,24 +501,38 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
447
501
|
model=self.model_name,
|
|
448
502
|
messages=messages,
|
|
449
503
|
stream=True,
|
|
504
|
+
think=think,
|
|
450
505
|
options=options if options else None
|
|
451
506
|
)
|
|
507
|
+
in_thinking = False
|
|
452
508
|
for chunk in response_stream:
|
|
453
|
-
|
|
454
|
-
|
|
509
|
+
if chunk.message.thinking and not in_thinking:
|
|
510
|
+
full_response_text += "<think>\n"
|
|
511
|
+
in_thinking = True
|
|
512
|
+
|
|
513
|
+
if chunk.message.content:# Ensure there is content to process
|
|
514
|
+
chunk_content = chunk.message.content
|
|
515
|
+
if in_thinking:
|
|
516
|
+
full_response_text += "\n</think>\n"
|
|
517
|
+
in_thinking = False
|
|
455
518
|
full_response_text += chunk_content
|
|
456
519
|
if streaming_callback:
|
|
457
520
|
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
458
|
-
break
|
|
521
|
+
break # Callback requested stop
|
|
522
|
+
|
|
459
523
|
return full_response_text
|
|
460
524
|
else: # Not streaming
|
|
461
|
-
|
|
525
|
+
response = self.ollama_client.chat(
|
|
462
526
|
model=self.model_name,
|
|
463
527
|
messages=messages,
|
|
464
528
|
stream=False,
|
|
529
|
+
think=think,
|
|
465
530
|
options=options if options else None
|
|
466
531
|
)
|
|
467
|
-
|
|
532
|
+
full_response_text = response.message.content
|
|
533
|
+
if think:
|
|
534
|
+
full_response_text = "<think>\n"+response.message.thinking+"\n</think>\n"+full_response_text
|
|
535
|
+
return full_response_text
|
|
468
536
|
|
|
469
537
|
except ollama.ResponseError as e:
|
|
470
538
|
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
@@ -595,7 +663,182 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
595
663
|
"supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
|
|
596
664
|
}
|
|
597
665
|
|
|
598
|
-
def
|
|
666
|
+
def pull_model(self, model_name: str, progress_callback: Callable[[dict], None] = None, **kwargs) -> dict:
|
|
667
|
+
"""
|
|
668
|
+
Pulls a model from the Ollama library.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
model_name (str): The name of the model to pull.
|
|
672
|
+
progress_callback (Callable[[dict], None], optional): A callback function that receives progress updates.
|
|
673
|
+
The dict typically contains 'status', 'completed', 'total'.
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
dict: Dictionary with status (bool) and message (str).
|
|
677
|
+
"""
|
|
678
|
+
if not self.ollama_client:
|
|
679
|
+
msg = "Ollama client not initialized. Cannot pull model."
|
|
680
|
+
ASCIIColors.error(msg)
|
|
681
|
+
return {"status": False, "message": msg}
|
|
682
|
+
|
|
683
|
+
try:
|
|
684
|
+
ASCIIColors.info(f"Pulling model {model_name}...")
|
|
685
|
+
# Stream the pull progress
|
|
686
|
+
for progress in self.ollama_client.pull(model_name, stream=True):
|
|
687
|
+
# Send raw progress to callback if provided
|
|
688
|
+
if progress_callback:
|
|
689
|
+
progress_callback(progress)
|
|
690
|
+
|
|
691
|
+
# Default console logging
|
|
692
|
+
status = progress.get('status', '')
|
|
693
|
+
completed = progress.get('completed')
|
|
694
|
+
total = progress.get('total')
|
|
695
|
+
|
|
696
|
+
if completed and total:
|
|
697
|
+
percent = (completed / total) * 100
|
|
698
|
+
print(f"\r{status}: {percent:.2f}%", end="", flush=True)
|
|
699
|
+
else:
|
|
700
|
+
print(f"\r{status}", end="", flush=True)
|
|
701
|
+
|
|
702
|
+
print() # Clear line
|
|
703
|
+
msg = f"Model {model_name} pulled successfully."
|
|
704
|
+
ASCIIColors.success(msg)
|
|
705
|
+
return {"status": True, "message": msg}
|
|
706
|
+
|
|
707
|
+
except ollama.ResponseError as e:
|
|
708
|
+
msg = f"Ollama API Pull Error: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
709
|
+
ASCIIColors.error(msg)
|
|
710
|
+
return {"status": False, "message": msg}
|
|
711
|
+
except ollama.RequestError as e:
|
|
712
|
+
msg = f"Ollama API Request Error: {str(e)}"
|
|
713
|
+
ASCIIColors.error(msg)
|
|
714
|
+
return {"status": False, "message": msg}
|
|
715
|
+
except Exception as ex:
|
|
716
|
+
msg = f"An unexpected error occurred while pulling model: {str(ex)}"
|
|
717
|
+
ASCIIColors.error(msg)
|
|
718
|
+
trace_exception(ex)
|
|
719
|
+
return {"status": False, "message": msg}
|
|
720
|
+
|
|
721
|
+
def get_zoo(self) -> List[Dict[str, Any]]:
|
|
722
|
+
"""
|
|
723
|
+
Returns a list of models available for download.
|
|
724
|
+
each entry is a dict with:
|
|
725
|
+
name, description, size, type, link
|
|
726
|
+
"""
|
|
727
|
+
return [
|
|
728
|
+
{"name": "Llama3 8B", "description": "Meta's Llama 3 8B model. Good for general purpose chat.", "size": "4.7GB", "type": "model", "link": "llama3"},
|
|
729
|
+
{"name": "Llama3 70B", "description": "Meta's Llama 3 70B model. High capability.", "size": "40GB", "type": "model", "link": "llama3:70b"},
|
|
730
|
+
{"name": "Phi-3 Mini", "description": "Microsoft's Phi-3 Mini 3.8B model. Lightweight and capable.", "size": "2.3GB", "type": "model", "link": "phi3"},
|
|
731
|
+
{"name": "Phi-3 Medium", "description": "Microsoft's Phi-3 Medium 14B model.", "size": "7.9GB", "type": "model", "link": "phi3:medium"},
|
|
732
|
+
{"name": "Mistral 7B", "description": "Mistral AI's 7B model v0.3.", "size": "4.1GB", "type": "model", "link": "mistral"},
|
|
733
|
+
{"name": "Mixtral 8x7B", "description": "Mistral AI's Mixture of Experts model.", "size": "26GB", "type": "model", "link": "mixtral"},
|
|
734
|
+
{"name": "Gemma 2 9B", "description": "Google's Gemma 2 9B model.", "size": "5.4GB", "type": "model", "link": "gemma2"},
|
|
735
|
+
{"name": "Gemma 2 27B", "description": "Google's Gemma 2 27B model.", "size": "16GB", "type": "model", "link": "gemma2:27b"},
|
|
736
|
+
{"name": "Qwen 2.5 7B", "description": "Alibaba Cloud's Qwen2.5 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5"},
|
|
737
|
+
{"name": "Qwen 2.5 Coder 7B", "description": "Alibaba Cloud's Qwen2.5 Coder 7B model.", "size": "4.5GB", "type": "model", "link": "qwen2.5-coder"},
|
|
738
|
+
{"name": "CodeLlama 7B", "description": "Meta's CodeLlama 7B model.", "size": "3.8GB", "type": "model", "link": "codellama"},
|
|
739
|
+
{"name": "LLaVA 7B", "description": "Visual instruction tuning model (Vision).", "size": "4.5GB", "type": "model", "link": "llava"},
|
|
740
|
+
{"name": "Nomic Embed Text", "description": "A high-performing open embedding model.", "size": "274MB", "type": "embedding", "link": "nomic-embed-text"},
|
|
741
|
+
{"name": "DeepSeek Coder V2", "description": "DeepSeek Coder V2 model.", "size": "8.9GB", "type": "model", "link": "deepseek-coder-v2"},
|
|
742
|
+
{"name": "OpenHermes 2.5 Mistral", "description": "High quality finetune of Mistral 7B.", "size": "4.1GB", "type": "model", "link": "openhermes"},
|
|
743
|
+
{"name": "Dolphin Phi", "description": "Uncensored Dolphin fine-tune of Phi-2.", "size": "1.6GB", "type": "model", "link": "dolphin-phi"},
|
|
744
|
+
{"name": "TinyLlama", "description": "A compact 1.1B model.", "size": "637MB", "type": "model", "link": "tinyllama"},
|
|
745
|
+
]
|
|
746
|
+
|
|
747
|
+
def download_from_zoo(self, index: int, progress_callback: Callable[[dict], None] = None) -> dict:
|
|
748
|
+
"""
|
|
749
|
+
Downloads a model from the zoo using its index.
|
|
750
|
+
"""
|
|
751
|
+
zoo = self.get_zoo()
|
|
752
|
+
if index < 0 or index >= len(zoo):
|
|
753
|
+
msg = "Index out of bounds"
|
|
754
|
+
ASCIIColors.error(msg)
|
|
755
|
+
return {"status": False, "message": msg}
|
|
756
|
+
item = zoo[index]
|
|
757
|
+
return self.pull_model(item["link"], progress_callback=progress_callback)
|
|
758
|
+
|
|
759
|
+
def install_ollama(self, callback: Callable[[dict], None] = None, **kwargs) -> dict:
|
|
760
|
+
"""
|
|
761
|
+
Installs Ollama based on the operating system.
|
|
762
|
+
"""
|
|
763
|
+
system = platform.system()
|
|
764
|
+
|
|
765
|
+
def report_progress(status, message, completed=0, total=100):
|
|
766
|
+
if callback:
|
|
767
|
+
callback({"status": status, "message": message, "completed": completed, "total": total})
|
|
768
|
+
else:
|
|
769
|
+
print(f"{status}: {message}")
|
|
770
|
+
|
|
771
|
+
try:
|
|
772
|
+
if system == "Linux":
|
|
773
|
+
report_progress("working", "Detected Linux. Running installation script...", 10, 100)
|
|
774
|
+
# Use the official install script
|
|
775
|
+
cmd = "curl -fsSL https://ollama.com/install.sh | sh"
|
|
776
|
+
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
777
|
+
stdout, stderr = process.communicate()
|
|
778
|
+
|
|
779
|
+
if process.returncode == 0:
|
|
780
|
+
report_progress("success", "Ollama installed successfully on Linux.", 100, 100)
|
|
781
|
+
return {"status": True, "message": "Ollama installed successfully."}
|
|
782
|
+
else:
|
|
783
|
+
msg = f"Installation failed: {stderr}"
|
|
784
|
+
report_progress("error", msg, 0, 0)
|
|
785
|
+
return {"status": False, "error": msg}
|
|
786
|
+
|
|
787
|
+
elif system == "Windows":
|
|
788
|
+
report_progress("working", "Detected Windows. Downloading OllamaSetup.exe...", 10, 100)
|
|
789
|
+
url = "https://ollama.com/download/OllamaSetup.exe"
|
|
790
|
+
filename = "OllamaSetup.exe"
|
|
791
|
+
|
|
792
|
+
# Download with progress
|
|
793
|
+
try:
|
|
794
|
+
def dl_callback(count, block_size, total_size):
|
|
795
|
+
percent = int(count * block_size * 100 / total_size)
|
|
796
|
+
report_progress("working", f"Downloading... {percent}%", percent, 100)
|
|
797
|
+
|
|
798
|
+
urllib.request.urlretrieve(url, filename, dl_callback)
|
|
799
|
+
except Exception as e:
|
|
800
|
+
return {"status": False, "error": f"Failed to download installer: {e}"}
|
|
801
|
+
|
|
802
|
+
report_progress("working", "Running installer...", 90, 100)
|
|
803
|
+
try:
|
|
804
|
+
subprocess.run([filename], check=True) # Runs the installer GUI
|
|
805
|
+
# We can't easily wait for the GUI installer to finish unless we block or it has silent flags.
|
|
806
|
+
# Ollama installer is usually simple.
|
|
807
|
+
report_progress("success", "Installer launched. Please complete the installation.", 100, 100)
|
|
808
|
+
return {"status": True, "message": "Installer launched."}
|
|
809
|
+
except Exception as e:
|
|
810
|
+
return {"status": False, "error": f"Failed to launch installer: {e}"}
|
|
811
|
+
|
|
812
|
+
elif system == "Darwin": # macOS
|
|
813
|
+
report_progress("working", "Detected macOS. Downloading Ollama...", 10, 100)
|
|
814
|
+
url = "https://ollama.com/download/Ollama-darwin.zip"
|
|
815
|
+
filename = "Ollama-darwin.zip"
|
|
816
|
+
|
|
817
|
+
# Download with progress
|
|
818
|
+
try:
|
|
819
|
+
def dl_callback(count, block_size, total_size):
|
|
820
|
+
percent = int(count * block_size * 100 / total_size)
|
|
821
|
+
report_progress("working", f"Downloading... {percent}%", percent, 100)
|
|
822
|
+
|
|
823
|
+
urllib.request.urlretrieve(url, filename, dl_callback)
|
|
824
|
+
except Exception as e:
|
|
825
|
+
return {"status": False, "error": f"Failed to download: {e}"}
|
|
826
|
+
|
|
827
|
+
report_progress("working", "Unzipping...", 80, 100)
|
|
828
|
+
with zipfile.ZipFile(filename, 'r') as zip_ref:
|
|
829
|
+
zip_ref.extractall("Ollama_Install")
|
|
830
|
+
|
|
831
|
+
report_progress("success", "Ollama downloaded and extracted to 'Ollama_Install'. Please move 'Ollama.app' to Applications.", 100, 100)
|
|
832
|
+
return {"status": True, "message": "Downloaded and extracted. Please install Ollama.app manually."}
|
|
833
|
+
|
|
834
|
+
else:
|
|
835
|
+
return {"status": False, "error": f"Unsupported OS: {system}"}
|
|
836
|
+
|
|
837
|
+
except Exception as e:
|
|
838
|
+
trace_exception(e)
|
|
839
|
+
return {"status": False, "error": str(e)}
|
|
840
|
+
|
|
841
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
599
842
|
"""
|
|
600
843
|
Lists available models from the Ollama service using the ollama-python library.
|
|
601
844
|
The returned list of dictionaries matches the format of the original template.
|
|
@@ -621,10 +864,10 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
621
864
|
})
|
|
622
865
|
return model_info_list
|
|
623
866
|
except ollama.ResponseError as e:
|
|
624
|
-
ASCIIColors.error(f"Ollama API
|
|
867
|
+
ASCIIColors.error(f"Ollama API list_models ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
|
|
625
868
|
return []
|
|
626
869
|
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
627
|
-
ASCIIColors.error(f"Ollama API
|
|
870
|
+
ASCIIColors.error(f"Ollama API list_models RequestError: {str(e)} from {self.host_address}")
|
|
628
871
|
return []
|
|
629
872
|
except Exception as ex:
|
|
630
873
|
trace_exception(ex)
|
|
@@ -658,6 +901,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
658
901
|
"""
|
|
659
902
|
if model_name is None:
|
|
660
903
|
model_name = self.model_name
|
|
904
|
+
if not model_name:
|
|
905
|
+
ASCIIColors.warning("Model name not specified and no default model set.")
|
|
906
|
+
return None
|
|
661
907
|
|
|
662
908
|
try:
|
|
663
909
|
info = ollama.show(model_name)
|
|
@@ -692,6 +938,12 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
692
938
|
'llama3.1': 131072, # Llama 3.1 extended context
|
|
693
939
|
'llama3.2': 131072, # Llama 3.2 extended context
|
|
694
940
|
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
941
|
+
'gpt-oss:20b': 16000, # GPT-OSS extended
|
|
942
|
+
'gpt-oss:120b': 128000, # GPT-OSS extended
|
|
943
|
+
'codestral': 256000, # Codestral
|
|
944
|
+
'mistralai-medium': 128000, # Mistral medium
|
|
945
|
+
'mistralai-mini': 128000, # Mistral medium
|
|
946
|
+
'ministral': 256000, # Mistral medium
|
|
695
947
|
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
696
948
|
'mixtral': 32768, # Mixtral 8x7B default
|
|
697
949
|
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
@@ -704,6 +956,9 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
704
956
|
'qwen': 8192, # Qwen default
|
|
705
957
|
'qwen2': 32768, # Qwen2 default for 7B
|
|
706
958
|
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
959
|
+
'qwen3': 128000, # Qwen3 with 128k
|
|
960
|
+
'qwen3-vl': 128000, # Qwen3-vl with 128k
|
|
961
|
+
'qwen3-coder': 256000, # Qwen3 with 256k
|
|
707
962
|
'codellama': 16384, # CodeLlama extended
|
|
708
963
|
'codegemma': 8192, # CodeGemma default
|
|
709
964
|
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
@@ -724,6 +979,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
724
979
|
'orca2': 4096, # Orca 2 default
|
|
725
980
|
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
726
981
|
'openhermes': 8192, # OpenHermes default
|
|
982
|
+
'gemini-3': 1000000, # Gemini 3 is a beast with 1M tokens
|
|
727
983
|
}
|
|
728
984
|
|
|
729
985
|
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
@@ -747,18 +1003,6 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
747
1003
|
Returns:
|
|
748
1004
|
list[dict]: A list of dictionaries, each representing a running model with a standardized set of keys.
|
|
749
1005
|
Returns an empty list if the client is not initialized or if an error occurs.
|
|
750
|
-
|
|
751
|
-
Example of a returned model dictionary:
|
|
752
|
-
{
|
|
753
|
-
"model_name": "gemma3:12b",
|
|
754
|
-
"size": 13861175232,
|
|
755
|
-
"vram_size": 10961479680,
|
|
756
|
-
"parameters_size": "12.2B",
|
|
757
|
-
"quantization_level": "Q4_K_M",
|
|
758
|
-
"context_size": 32000,
|
|
759
|
-
"parent_model": "",
|
|
760
|
-
"expires_at": "2025-08-20T22:28:18.6708784+02:00"
|
|
761
|
-
}
|
|
762
1006
|
"""
|
|
763
1007
|
if not self.ollama_client:
|
|
764
1008
|
ASCIIColors.warning("Ollama client not initialized. Cannot list running models.")
|
|
@@ -773,10 +1017,22 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
773
1017
|
for model_data in models_list:
|
|
774
1018
|
details = model_data.get('details', {})
|
|
775
1019
|
|
|
1020
|
+
size = model_data.get("size", 0)
|
|
1021
|
+
size_vram = model_data.get("size_vram", 0)
|
|
1022
|
+
|
|
1023
|
+
# Calculate spread
|
|
1024
|
+
gpu_usage = 0
|
|
1025
|
+
cpu_usage = 0
|
|
1026
|
+
if size > 0:
|
|
1027
|
+
gpu_usage = min(100, (size_vram / size) * 100)
|
|
1028
|
+
cpu_usage = max(0, 100 - gpu_usage)
|
|
1029
|
+
|
|
776
1030
|
flat_model_info = {
|
|
777
1031
|
"model_name": model_data.get("name"),
|
|
778
|
-
"size":
|
|
779
|
-
"vram_size":
|
|
1032
|
+
"size": size,
|
|
1033
|
+
"vram_size": size_vram,
|
|
1034
|
+
"gpu_usage_percent": round(gpu_usage, 2),
|
|
1035
|
+
"cpu_usage_percent": round(cpu_usage, 2),
|
|
780
1036
|
"expires_at": model_data.get("expires_at"),
|
|
781
1037
|
"parameters_size": details.get("parameter_size"),
|
|
782
1038
|
"quantization_level": details.get("quantization_level"),
|
|
@@ -813,7 +1069,7 @@ if __name__ == '__main__':
|
|
|
813
1069
|
|
|
814
1070
|
# --- List Models ---
|
|
815
1071
|
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
816
|
-
models = binding.
|
|
1072
|
+
models = binding.list_models()
|
|
817
1073
|
if models:
|
|
818
1074
|
ASCIIColors.green(f"Found {len(models)} models. First 5:")
|
|
819
1075
|
for m in models[:5]:
|
|
@@ -844,7 +1100,7 @@ if __name__ == '__main__':
|
|
|
844
1100
|
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
845
1101
|
prompt_text = "Why is the sky blue?"
|
|
846
1102
|
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
847
|
-
generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
|
|
1103
|
+
generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False, think=False)
|
|
848
1104
|
if isinstance(generated_text, str):
|
|
849
1105
|
ASCIIColors.green(f"Generated text: {generated_text}")
|
|
850
1106
|
else:
|