lollms-client 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +76 -21
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
- lollms_client/llm_bindings/ollama/__init__.py +307 -89
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +81 -20
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +333 -171
- lollms_client/llm_bindings/perplexity/__init__.py +2 -2
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +512 -1890
- lollms_client/lollms_discussion.py +25 -11
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +7 -4
- lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
- lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/METADATA +113 -5
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client-1.5.6.dist-info/RECORD +0 -87
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -1,23 +1,22 @@
|
|
|
1
|
-
import requests
|
|
2
1
|
import json
|
|
3
2
|
import base64
|
|
4
3
|
import os
|
|
5
4
|
import mimetypes
|
|
6
|
-
import
|
|
5
|
+
import io
|
|
7
6
|
from typing import Optional, Callable, List, Union, Dict
|
|
8
7
|
|
|
9
8
|
import httpx
|
|
10
9
|
import tiktoken
|
|
11
10
|
import pipmaster as pm
|
|
11
|
+
from PIL import Image
|
|
12
12
|
|
|
13
13
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
14
|
-
from lollms_client.lollms_types import MSG_TYPE
|
|
14
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
15
15
|
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
16
|
-
from lollms_client.lollms_utilities import encode_image
|
|
17
16
|
from ascii_colors import ASCIIColors, trace_exception
|
|
18
17
|
|
|
19
18
|
# Ensure required packages are installed
|
|
20
|
-
pm.ensure_packages(["httpx", "tiktoken"])
|
|
19
|
+
pm.ensure_packages(["httpx", "tiktoken", "Pillow"])
|
|
21
20
|
|
|
22
21
|
BindingName = "OpenWebUIBinding"
|
|
23
22
|
|
|
@@ -26,278 +25,441 @@ def _read_file_as_base64(path):
|
|
|
26
25
|
with open(path, "rb") as f:
|
|
27
26
|
return base64.b64encode(f.read()).decode("utf-8")
|
|
28
27
|
|
|
28
|
+
|
|
29
29
|
def _extract_markdown_path(s):
|
|
30
30
|
s = s.strip()
|
|
31
31
|
if s.startswith("[") and s.endswith(")"):
|
|
32
32
|
lb, rb = s.find("["), s.find("]")
|
|
33
33
|
if lb != -1 and rb != -1 and rb > lb:
|
|
34
|
-
return s[lb+1:rb].strip()
|
|
34
|
+
return s[lb + 1 : rb].strip()
|
|
35
35
|
return s
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
def _guess_mime_from_name(name, default="image/jpeg"):
|
|
38
39
|
mime, _ = mimetypes.guess_type(name)
|
|
39
40
|
return mime or default
|
|
40
41
|
|
|
42
|
+
|
|
41
43
|
def _to_data_url(b64_str, mime):
|
|
42
44
|
return f"data:{mime};base64,{b64_str}"
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
url = _to_data_url(s, default_mime)
|
|
46
|
+
|
|
47
|
+
def normalize_image_input(
|
|
48
|
+
img_path: str,
|
|
49
|
+
cap_size: bool = False,
|
|
50
|
+
max_dim: int = 2048,
|
|
51
|
+
default_mime="image/jpeg"
|
|
52
|
+
) -> dict:
|
|
53
|
+
if not isinstance(img_path, str):
|
|
54
|
+
raise ValueError("Unsupported image input type for OpenWebUI")
|
|
55
|
+
|
|
56
|
+
s = _extract_markdown_path(img_path)
|
|
57
|
+
if not os.path.exists(s):
|
|
58
|
+
url = _to_data_url(s, default_mime)
|
|
58
59
|
return {"type": "image_url", "image_url": {"url": url}}
|
|
59
60
|
|
|
60
|
-
|
|
61
|
+
if cap_size:
|
|
62
|
+
with Image.open(s) as img_obj:
|
|
63
|
+
width, height = img_obj.size
|
|
64
|
+
if width > max_dim or height > max_dim:
|
|
65
|
+
ratio = max_dim / max(width, height)
|
|
66
|
+
new_width = int(width * ratio)
|
|
67
|
+
new_height = int(height * ratio)
|
|
68
|
+
|
|
69
|
+
ASCIIColors.info(f"Downsizing image from {width}x{height} to {new_width}x{new_height}")
|
|
70
|
+
resized_img = img_obj.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
71
|
+
|
|
72
|
+
buffer = io.BytesIO()
|
|
73
|
+
if resized_img.mode in ('RGBA', 'P'):
|
|
74
|
+
resized_img = resized_img.convert('RGB')
|
|
75
|
+
resized_img.save(buffer, format="JPEG")
|
|
76
|
+
img_bytes = buffer.getvalue()
|
|
77
|
+
|
|
78
|
+
b64 = base64.b64encode(img_bytes).decode("utf-8")
|
|
79
|
+
mime = "image/jpeg"
|
|
80
|
+
else:
|
|
81
|
+
b64 = _read_file_as_base64(s)
|
|
82
|
+
mime = _guess_mime_from_name(s, default_mime)
|
|
83
|
+
else:
|
|
84
|
+
b64 = _read_file_as_base64(s)
|
|
85
|
+
mime = _guess_mime_from_name(s, default_mime)
|
|
86
|
+
|
|
87
|
+
url = _to_data_url(b64, mime)
|
|
88
|
+
return {"type": "image_url", "image_url": {"url": url}}
|
|
61
89
|
|
|
62
90
|
|
|
63
91
|
class OpenWebUIBinding(LollmsLLMBinding):
|
|
64
|
-
"""OpenWebUI-specific binding implementation"""
|
|
65
|
-
|
|
66
92
|
def __init__(self, **kwargs):
|
|
67
|
-
"""
|
|
68
|
-
Initialize the OpenWebUI binding.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
host_address (str): The URL of the OpenWebUI server (e.g., "http://localhost:8080").
|
|
72
|
-
model_name (str): Name of the model to use.
|
|
73
|
-
service_key (str): Authentication token for the service.
|
|
74
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
75
|
-
"""
|
|
76
93
|
super().__init__(BindingName, **kwargs)
|
|
77
94
|
self.host_address = kwargs.get("host_address")
|
|
78
95
|
self.model_name = kwargs.get("model_name")
|
|
79
96
|
self.service_key = kwargs.get("service_key", os.getenv("OPENWEBUI_API_KEY"))
|
|
80
97
|
self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
|
|
81
|
-
|
|
98
|
+
self.allow_non_standard_parameters = kwargs.get("allow_non_standard_parameters", False)
|
|
99
|
+
self.cap_image_size = kwargs.get("cap_image_size", True)
|
|
100
|
+
self.image_downsizing_max_dimension = kwargs.get("image_downsizing_max_dimension", 2048)
|
|
101
|
+
|
|
82
102
|
if not self.host_address:
|
|
83
103
|
raise ValueError("OpenWebUI host address is required.")
|
|
84
|
-
if not self.service_key:
|
|
85
|
-
ASCIIColors.warning("No service key provided for OpenWebUI. Requests may fail.")
|
|
86
104
|
|
|
87
|
-
headers = {
|
|
88
|
-
|
|
89
|
-
"
|
|
90
|
-
}
|
|
105
|
+
headers = {"Content-Type": "application/json"}
|
|
106
|
+
if self.service_key:
|
|
107
|
+
headers["Authorization"] = f"Bearer {self.service_key}"
|
|
91
108
|
|
|
92
109
|
self.client = httpx.Client(
|
|
93
110
|
base_url=self.host_address,
|
|
94
111
|
headers=headers,
|
|
95
112
|
verify=self.verify_ssl_certificate,
|
|
96
|
-
timeout=None
|
|
113
|
+
timeout=None,
|
|
97
114
|
)
|
|
98
115
|
|
|
99
116
|
def _build_request_params(self, messages: list, **kwargs) -> dict:
|
|
100
|
-
"""Builds the request parameters for the OpenWebUI API."""
|
|
101
117
|
params = {
|
|
102
118
|
"model": kwargs.get("model", self.model_name),
|
|
103
119
|
"messages": messages,
|
|
104
120
|
"stream": kwargs.get("stream", True),
|
|
105
121
|
}
|
|
106
|
-
|
|
107
|
-
# Map Lollms parameters to OpenAI-compatible parameters
|
|
122
|
+
|
|
108
123
|
if "n_predict" in kwargs and kwargs["n_predict"] is not None:
|
|
109
124
|
params["max_tokens"] = kwargs["n_predict"]
|
|
110
125
|
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
111
126
|
params["temperature"] = kwargs["temperature"]
|
|
112
127
|
if "top_p" in kwargs and kwargs["top_p"] is not None:
|
|
113
128
|
params["top_p"] = kwargs["top_p"]
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
129
|
+
|
|
130
|
+
if self.allow_non_standard_parameters:
|
|
131
|
+
if "top_k" in kwargs and kwargs["top_k"] is not None:
|
|
132
|
+
params["top_k"] = kwargs["top_k"]
|
|
133
|
+
|
|
118
134
|
if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
|
|
119
135
|
params["frequency_penalty"] = kwargs["repeat_penalty"]
|
|
120
136
|
if "seed" in kwargs and kwargs["seed"] is not None:
|
|
121
137
|
params["seed"] = kwargs["seed"]
|
|
122
|
-
|
|
138
|
+
|
|
123
139
|
return params
|
|
124
140
|
|
|
125
|
-
def
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
def _process_request(
|
|
142
|
+
self,
|
|
143
|
+
params: dict,
|
|
144
|
+
stream: Optional[bool],
|
|
145
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
|
|
146
|
+
) -> Union[str, dict]:
|
|
147
|
+
output = ""
|
|
148
|
+
try:
|
|
149
|
+
if stream:
|
|
150
|
+
with self.client.stream("POST", "/api/chat/completions", json=params) as response:
|
|
151
|
+
response.raise_for_status()
|
|
152
|
+
|
|
153
|
+
for line in response.iter_lines():
|
|
154
|
+
if not line:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
data_str = None
|
|
158
|
+
if isinstance(line, bytes):
|
|
159
|
+
if line.startswith(b"data:"):
|
|
160
|
+
data_str = line[len(b"data:"):].strip().decode("utf-8")
|
|
161
|
+
elif isinstance(line, str):
|
|
162
|
+
if line.startswith("data:"):
|
|
163
|
+
data_str = line[len("data:"):].strip()
|
|
164
|
+
|
|
165
|
+
if data_str is None:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
if data_str == "[DONE]":
|
|
169
|
+
break
|
|
170
|
+
try:
|
|
171
|
+
chunk = json.loads(data_str)
|
|
172
|
+
if chunk.get("choices"):
|
|
173
|
+
delta = chunk["choices"][0].get("delta", {})
|
|
174
|
+
word = delta.get("content", "")
|
|
175
|
+
if word and streaming_callback:
|
|
176
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
177
|
+
break
|
|
178
|
+
output += word
|
|
179
|
+
except json.JSONDecodeError:
|
|
180
|
+
continue
|
|
181
|
+
else:
|
|
182
|
+
response = self.client.post("/api/chat/completions", json=params)
|
|
183
|
+
response.raise_for_status()
|
|
184
|
+
data = response.json()
|
|
185
|
+
output = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
186
|
+
if streaming_callback:
|
|
187
|
+
streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
188
|
+
|
|
189
|
+
except httpx.HTTPStatusError as e:
|
|
190
|
+
try:
|
|
191
|
+
e.response.read()
|
|
192
|
+
response_text = e.response.text
|
|
193
|
+
except Exception:
|
|
194
|
+
response_text = "(Could not read error response body)"
|
|
195
|
+
err_msg = f"API Error: {e.response.status_code} - {response_text}"
|
|
196
|
+
trace_exception(e)
|
|
197
|
+
if streaming_callback:
|
|
198
|
+
streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
199
|
+
return {"status": "error", "message": err_msg}
|
|
200
|
+
except Exception as e:
|
|
201
|
+
err_msg = f"An unexpected error occurred with the OpenWebUI API: {e}"
|
|
202
|
+
trace_exception(e)
|
|
203
|
+
if streaming_callback:
|
|
204
|
+
streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
205
|
+
return {"status": "error", "message": err_msg}
|
|
206
|
+
|
|
207
|
+
return output
|
|
138
208
|
|
|
209
|
+
def generate_text(
|
|
210
|
+
self,
|
|
211
|
+
prompt: str,
|
|
212
|
+
images: Optional[List[str]] = None,
|
|
213
|
+
system_prompt: str = "",
|
|
214
|
+
n_predict: Optional[int] = None,
|
|
215
|
+
stream: Optional[bool] = None,
|
|
216
|
+
temperature: float = 0.7,
|
|
217
|
+
top_k: int = 40,
|
|
218
|
+
top_p: float = 0.9,
|
|
219
|
+
repeat_penalty: float = 1.1,
|
|
220
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
221
|
+
**kwargs,
|
|
222
|
+
) -> Union[str, dict]:
|
|
139
223
|
messages = []
|
|
140
224
|
if system_prompt:
|
|
141
225
|
messages.append({"role": "system", "content": system_prompt})
|
|
142
226
|
|
|
143
|
-
|
|
227
|
+
user_message = {"role": "user", "content": prompt}
|
|
144
228
|
if images:
|
|
145
|
-
|
|
146
|
-
|
|
229
|
+
b64_images = []
|
|
230
|
+
for img_path in images:
|
|
231
|
+
normalized = normalize_image_input(
|
|
232
|
+
img_path,
|
|
233
|
+
cap_size=self.cap_image_size,
|
|
234
|
+
max_dim=self.image_downsizing_max_dimension
|
|
235
|
+
)
|
|
236
|
+
data_url = normalized["image_url"]["url"]
|
|
237
|
+
if "base64," in data_url:
|
|
238
|
+
b64_images.append(data_url.split("base64,")[1])
|
|
239
|
+
if b64_images:
|
|
240
|
+
user_message["images"] = b64_images
|
|
147
241
|
|
|
148
|
-
messages.append(
|
|
242
|
+
messages.append(user_message)
|
|
149
243
|
|
|
150
244
|
params = self._build_request_params(
|
|
151
|
-
messages=messages,
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
temperature=temperature,
|
|
155
|
-
top_k=top_k,
|
|
156
|
-
top_p=top_p,
|
|
157
|
-
repeat_penalty=repeat_penalty,
|
|
158
|
-
**kwargs
|
|
245
|
+
messages=messages, n_predict=n_predict, stream=stream,
|
|
246
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
247
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
159
248
|
)
|
|
160
|
-
|
|
161
249
|
return self._process_request(params, stream, streaming_callback)
|
|
162
250
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
251
|
+
def chat(
|
|
252
|
+
self,
|
|
253
|
+
discussion: LollmsDiscussion,
|
|
254
|
+
branch_tip_id: Optional[str] = None,
|
|
255
|
+
n_predict: Optional[int] = None,
|
|
256
|
+
stream: Optional[bool] = None,
|
|
257
|
+
temperature: float = 0.7,
|
|
258
|
+
top_k: int = 40,
|
|
259
|
+
top_p: float = 0.9,
|
|
260
|
+
repeat_penalty: float = 1.1,
|
|
261
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
262
|
+
**kwargs,
|
|
263
|
+
) -> Union[str, dict]:
|
|
264
|
+
messages = discussion.export("ollama_chat", branch_tip_id)
|
|
176
265
|
params = self._build_request_params(
|
|
177
|
-
messages=messages,
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
temperature=temperature,
|
|
181
|
-
top_k=top_k,
|
|
182
|
-
top_p=top_p,
|
|
183
|
-
repeat_penalty=repeat_penalty,
|
|
184
|
-
**kwargs
|
|
266
|
+
messages=messages, n_predict=n_predict, stream=stream,
|
|
267
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
268
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
185
269
|
)
|
|
186
|
-
|
|
187
270
|
return self._process_request(params, stream, streaming_callback)
|
|
188
271
|
|
|
189
|
-
def
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
response = self.client.post("/api/chat/completions", json=params)
|
|
218
|
-
if response.status_code != 200:
|
|
219
|
-
raise Exception(f"API Error: {response.status_code} - {response.text}")
|
|
272
|
+
def generate_from_messages(
|
|
273
|
+
self,
|
|
274
|
+
messages: List[Dict],
|
|
275
|
+
n_predict: Optional[int] = None,
|
|
276
|
+
stream: Optional[bool] = None,
|
|
277
|
+
temperature: Optional[float] = None,
|
|
278
|
+
top_k: Optional[int] = None,
|
|
279
|
+
top_p: Optional[float] = None,
|
|
280
|
+
repeat_penalty: Optional[float] = None,
|
|
281
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
282
|
+
**kwargs,
|
|
283
|
+
) -> Union[str, dict]:
|
|
284
|
+
# Convert from OpenAI vision format to Ollama vision format
|
|
285
|
+
ollama_messages = []
|
|
286
|
+
for msg in messages:
|
|
287
|
+
content = msg.get("content")
|
|
288
|
+
role = msg.get("role")
|
|
289
|
+
|
|
290
|
+
if isinstance(content, list):
|
|
291
|
+
text_parts = []
|
|
292
|
+
image_parts = []
|
|
293
|
+
for part in content:
|
|
294
|
+
if part.get("type") == "text":
|
|
295
|
+
text_parts.append(part.get("text", ""))
|
|
296
|
+
elif part.get("type") == "image_url":
|
|
297
|
+
url = part.get("image_url", {}).get("url", "")
|
|
298
|
+
if "base64," in url:
|
|
299
|
+
image_parts.append(url.split("base64,")[1])
|
|
220
300
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
301
|
+
new_msg = {"role": role, "content": "\n".join(text_parts)}
|
|
302
|
+
if image_parts:
|
|
303
|
+
new_msg["images"] = image_parts
|
|
304
|
+
ollama_messages.append(new_msg)
|
|
305
|
+
else:
|
|
306
|
+
ollama_messages.append(msg)
|
|
225
307
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
308
|
+
params = self._build_request_params(
|
|
309
|
+
messages=ollama_messages, n_predict=n_predict, stream=stream,
|
|
310
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
311
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
312
|
+
)
|
|
313
|
+
return self._process_request(params, stream, streaming_callback)
|
|
232
314
|
|
|
233
|
-
return output
|
|
234
315
|
|
|
235
|
-
def
|
|
316
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
317
|
+
"""
|
|
318
|
+
Retrieves the context size for an Ollama model.
|
|
319
|
+
|
|
320
|
+
The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
|
|
321
|
+
otherwise it falls back to the model's default context length from its architecture details.
|
|
322
|
+
As a final failsafe, uses a hardcoded list of known popular models' context lengths.
|
|
323
|
+
"""
|
|
324
|
+
if model_name is None:
|
|
325
|
+
model_name = self.model_name
|
|
326
|
+
if not model_name:
|
|
327
|
+
ASCIIColors.warning("Model name not specified and no default model set.")
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
# Failsafe: Hardcoded context sizes for popular Ollama models
|
|
331
|
+
known_contexts = {
|
|
332
|
+
'llama2': 4096, # Llama 2 default
|
|
333
|
+
'llama3': 8192, # Llama 3 default
|
|
334
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
335
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
336
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
337
|
+
'codestral': 256000, # Codestral
|
|
338
|
+
'mistralai-medium': 128000, # Mistral medium
|
|
339
|
+
'mistralai-mini': 128000, # Mistral medium
|
|
340
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
341
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
342
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
343
|
+
'gemma': 8192, # Gemma default
|
|
344
|
+
'gemma2': 8192, # Gemma 2 default
|
|
345
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
346
|
+
'phi': 2048, # Phi default (older)
|
|
347
|
+
'phi2': 2048, # Phi-2 default
|
|
348
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
349
|
+
'qwen': 8192, # Qwen default
|
|
350
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
351
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
352
|
+
'codellama': 16384, # CodeLlama extended
|
|
353
|
+
'codegemma': 8192, # CodeGemma default
|
|
354
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
355
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
356
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
357
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
358
|
+
'yi': 4096, # Yi base default
|
|
359
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
360
|
+
'command-r': 131072, # Command-R with 128K
|
|
361
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
362
|
+
'wizardlm': 16384, # WizardLM default
|
|
363
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
364
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
365
|
+
'falcon': 2048, # Falcon default
|
|
366
|
+
'starcoder': 8192, # StarCoder default
|
|
367
|
+
'stablelm': 4096, # StableLM default
|
|
368
|
+
'orca': 4096, # Orca default
|
|
369
|
+
'orca2': 4096, # Orca 2 default
|
|
370
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
371
|
+
'openhermes': 8192, # OpenHermes default
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
375
|
+
base_name = model_name.split(':')[0].lower().strip()
|
|
376
|
+
|
|
377
|
+
if base_name in known_contexts:
|
|
378
|
+
ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
|
|
379
|
+
return known_contexts[base_name]
|
|
380
|
+
|
|
381
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}'")
|
|
382
|
+
return None
|
|
383
|
+
|
|
384
|
+
def list_models(self) -> List[Dict]:
|
|
236
385
|
models_info = []
|
|
237
386
|
try:
|
|
238
|
-
response = self.client.get("/api/models")
|
|
387
|
+
response = self.client.get("/api/v1/models")
|
|
388
|
+
if response.status_code == 403 and "API key is not enabled" in response.text:
|
|
389
|
+
temp_client = httpx.Client(
|
|
390
|
+
base_url=self.host_address,
|
|
391
|
+
headers={"Content-Type": "application/json"},
|
|
392
|
+
verify=self.verify_ssl_certificate, timeout=None,
|
|
393
|
+
)
|
|
394
|
+
response = temp_client.get("/api/v1/models")
|
|
395
|
+
temp_client.close()
|
|
396
|
+
|
|
239
397
|
response.raise_for_status()
|
|
240
398
|
models_data = response.json().get("data", [])
|
|
241
|
-
|
|
242
399
|
for model in models_data:
|
|
243
400
|
models_info.append({
|
|
244
401
|
"model_name": model.get("id", "N/A"),
|
|
245
402
|
"owned_by": model.get("details", {}).get("family", "N/A"),
|
|
246
403
|
"created": model.get("modified_at", "N/A"),
|
|
247
|
-
|
|
248
|
-
"context_length": model.get("details", {}).get("parameter_size", "unknown"),
|
|
404
|
+
"context_length": model.get("details", {}).get("parameter_size", "unknown"),
|
|
249
405
|
})
|
|
250
406
|
except Exception as e:
|
|
251
407
|
ASCIIColors.error(f"Failed to list models from OpenWebUI: {e}")
|
|
408
|
+
trace_exception(e)
|
|
252
409
|
return models_info
|
|
253
|
-
|
|
254
|
-
def _get_encoding(self, model_name: str
|
|
255
|
-
"""Uses tiktoken as a general-purpose tokenizer."""
|
|
410
|
+
|
|
411
|
+
def _get_encoding(self, model_name: str = None):
|
|
256
412
|
try:
|
|
257
413
|
return tiktoken.encoding_for_model(model_name or self.model_name)
|
|
258
414
|
except KeyError:
|
|
259
415
|
return tiktoken.get_encoding("cl100k_base")
|
|
260
416
|
|
|
261
417
|
def tokenize(self, text: str) -> list[int]:
|
|
262
|
-
|
|
263
|
-
return encoding.encode(text)
|
|
418
|
+
return self._get_encoding().encode(text)
|
|
264
419
|
|
|
265
420
|
def detokenize(self, tokens: list[int]) -> str:
|
|
266
|
-
|
|
267
|
-
return encoding.decode(tokens)
|
|
268
|
-
|
|
269
|
-
def count_tokens(self, text: str) -> int:
|
|
270
|
-
return len(self.tokenize(text))
|
|
271
|
-
|
|
272
|
-
def get_input_tokens_price(self, model_name: str | None = None) -> float:
|
|
273
|
-
return 0.0
|
|
421
|
+
return self._get_encoding().decode(tokens)
|
|
274
422
|
|
|
275
|
-
def
|
|
276
|
-
return
|
|
423
|
+
def count_tokens(self, text: str) -> int:
|
|
424
|
+
return len(self.tokenize(text))
|
|
277
425
|
|
|
278
|
-
def embed(self, text: str
|
|
279
|
-
"""Get embeddings using Ollama's passthrough endpoint."""
|
|
426
|
+
def embed(self, text: Union[str, List[str]], **kwargs) -> List:
|
|
280
427
|
embedding_model = kwargs.get("model", self.model_name)
|
|
281
|
-
|
|
282
|
-
|
|
428
|
+
single_input = isinstance(text, str)
|
|
429
|
+
inputs = [text] if single_input else list(text)
|
|
283
430
|
embeddings = []
|
|
284
|
-
|
|
285
431
|
try:
|
|
286
|
-
for t in
|
|
432
|
+
for t in inputs:
|
|
287
433
|
payload = {"model": embedding_model, "prompt": t}
|
|
288
434
|
response = self.client.post("/ollama/api/embeddings", json=payload)
|
|
289
435
|
response.raise_for_status()
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
return embeddings[0] if
|
|
295
|
-
|
|
436
|
+
data = response.json()
|
|
437
|
+
vec = data.get("embedding")
|
|
438
|
+
if vec is not None:
|
|
439
|
+
embeddings.append(vec)
|
|
440
|
+
return embeddings[0] if single_input and embeddings else embeddings
|
|
296
441
|
except Exception as e:
|
|
297
442
|
ASCIIColors.error(f"Failed to generate embeddings using model '{embedding_model}': {e}")
|
|
298
443
|
trace_exception(e)
|
|
299
444
|
return []
|
|
445
|
+
|
|
446
|
+
def get_model_info(self) -> dict:
|
|
447
|
+
return {
|
|
448
|
+
"name": self.binding_name,
|
|
449
|
+
"version": "1.5",
|
|
450
|
+
"host_address": self.host_address,
|
|
451
|
+
"model_name": self.model_name,
|
|
452
|
+
"supports_structured_output": False,
|
|
453
|
+
"supports_vision": True,
|
|
454
|
+
}
|
|
300
455
|
|
|
301
456
|
def load_model(self, model_name: str) -> bool:
|
|
302
457
|
self.model_name = model_name
|
|
303
|
-
|
|
458
|
+
ASCIIColors.info(f"OpenWebUI model set to: {model_name}")
|
|
459
|
+
return True
|
|
460
|
+
|
|
461
|
+
def ps(self):
|
|
462
|
+
return []
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
OpenWebUIBinding.__abstractmethods__ = set()
|
|
@@ -224,7 +224,7 @@ class PerplexityBinding(LollmsLLMBinding):
|
|
|
224
224
|
"supports_structured_output": False
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
-
def
|
|
227
|
+
def list_models(self) -> List[Dict[str, str]]:
|
|
228
228
|
"""
|
|
229
229
|
Lists available models. Perplexity API does not have a models endpoint,
|
|
230
230
|
so a hardcoded list is returned.
|
|
@@ -255,7 +255,7 @@ if __name__ == '__main__':
|
|
|
255
255
|
|
|
256
256
|
# --- List Models ---
|
|
257
257
|
ASCIIColors.cyan("\n--- Listing Models (static list) ---")
|
|
258
|
-
models = binding.
|
|
258
|
+
models = binding.list_models()
|
|
259
259
|
if models:
|
|
260
260
|
ASCIIColors.green(f"Found {len(models)} models.")
|
|
261
261
|
for m in models:
|