lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +151 -32
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
- lollms_client/llm_bindings/ollama/__init__.py +309 -93
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +148 -29
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +465 -0
- lollms_client/llm_bindings/perplexity/__init__.py +326 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +516 -1890
- lollms_client/lollms_discussion.py +55 -18
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_personality.py +5 -2
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +115 -0
- lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
- lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client/ttm_bindings/bark/__init__.py +0 -339
- lollms_client-1.4.1.dist-info/RECORD +0 -78
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import base64
|
|
3
|
+
import os
|
|
4
|
+
import mimetypes
|
|
5
|
+
import io
|
|
6
|
+
from typing import Optional, Callable, List, Union, Dict
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
import tiktoken
|
|
10
|
+
import pipmaster as pm
|
|
11
|
+
from PIL import Image
|
|
12
|
+
|
|
13
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
14
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
15
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
16
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
17
|
+
|
|
18
|
+
# Ensure required packages are installed
|
|
19
|
+
pm.ensure_packages(["httpx", "tiktoken", "Pillow"])
|
|
20
|
+
|
|
21
|
+
BindingName = "OpenWebUIBinding"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _read_file_as_base64(path):
|
|
25
|
+
with open(path, "rb") as f:
|
|
26
|
+
return base64.b64encode(f.read()).decode("utf-8")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _extract_markdown_path(s):
|
|
30
|
+
s = s.strip()
|
|
31
|
+
if s.startswith("[") and s.endswith(")"):
|
|
32
|
+
lb, rb = s.find("["), s.find("]")
|
|
33
|
+
if lb != -1 and rb != -1 and rb > lb:
|
|
34
|
+
return s[lb + 1 : rb].strip()
|
|
35
|
+
return s
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _guess_mime_from_name(name, default="image/jpeg"):
|
|
39
|
+
mime, _ = mimetypes.guess_type(name)
|
|
40
|
+
return mime or default
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _to_data_url(b64_str, mime):
|
|
44
|
+
return f"data:{mime};base64,{b64_str}"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def normalize_image_input(
|
|
48
|
+
img_path: str,
|
|
49
|
+
cap_size: bool = False,
|
|
50
|
+
max_dim: int = 2048,
|
|
51
|
+
default_mime="image/jpeg"
|
|
52
|
+
) -> dict:
|
|
53
|
+
if not isinstance(img_path, str):
|
|
54
|
+
raise ValueError("Unsupported image input type for OpenWebUI")
|
|
55
|
+
|
|
56
|
+
s = _extract_markdown_path(img_path)
|
|
57
|
+
if not os.path.exists(s):
|
|
58
|
+
url = _to_data_url(s, default_mime)
|
|
59
|
+
return {"type": "image_url", "image_url": {"url": url}}
|
|
60
|
+
|
|
61
|
+
if cap_size:
|
|
62
|
+
with Image.open(s) as img_obj:
|
|
63
|
+
width, height = img_obj.size
|
|
64
|
+
if width > max_dim or height > max_dim:
|
|
65
|
+
ratio = max_dim / max(width, height)
|
|
66
|
+
new_width = int(width * ratio)
|
|
67
|
+
new_height = int(height * ratio)
|
|
68
|
+
|
|
69
|
+
ASCIIColors.info(f"Downsizing image from {width}x{height} to {new_width}x{new_height}")
|
|
70
|
+
resized_img = img_obj.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
71
|
+
|
|
72
|
+
buffer = io.BytesIO()
|
|
73
|
+
if resized_img.mode in ('RGBA', 'P'):
|
|
74
|
+
resized_img = resized_img.convert('RGB')
|
|
75
|
+
resized_img.save(buffer, format="JPEG")
|
|
76
|
+
img_bytes = buffer.getvalue()
|
|
77
|
+
|
|
78
|
+
b64 = base64.b64encode(img_bytes).decode("utf-8")
|
|
79
|
+
mime = "image/jpeg"
|
|
80
|
+
else:
|
|
81
|
+
b64 = _read_file_as_base64(s)
|
|
82
|
+
mime = _guess_mime_from_name(s, default_mime)
|
|
83
|
+
else:
|
|
84
|
+
b64 = _read_file_as_base64(s)
|
|
85
|
+
mime = _guess_mime_from_name(s, default_mime)
|
|
86
|
+
|
|
87
|
+
url = _to_data_url(b64, mime)
|
|
88
|
+
return {"type": "image_url", "image_url": {"url": url}}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class OpenWebUIBinding(LollmsLLMBinding):
|
|
92
|
+
def __init__(self, **kwargs):
|
|
93
|
+
super().__init__(BindingName, **kwargs)
|
|
94
|
+
self.host_address = kwargs.get("host_address")
|
|
95
|
+
self.model_name = kwargs.get("model_name")
|
|
96
|
+
self.service_key = kwargs.get("service_key", os.getenv("OPENWEBUI_API_KEY"))
|
|
97
|
+
self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
|
|
98
|
+
self.allow_non_standard_parameters = kwargs.get("allow_non_standard_parameters", False)
|
|
99
|
+
self.cap_image_size = kwargs.get("cap_image_size", True)
|
|
100
|
+
self.image_downsizing_max_dimension = kwargs.get("image_downsizing_max_dimension", 2048)
|
|
101
|
+
|
|
102
|
+
if not self.host_address:
|
|
103
|
+
raise ValueError("OpenWebUI host address is required.")
|
|
104
|
+
|
|
105
|
+
headers = {"Content-Type": "application/json"}
|
|
106
|
+
if self.service_key:
|
|
107
|
+
headers["Authorization"] = f"Bearer {self.service_key}"
|
|
108
|
+
|
|
109
|
+
self.client = httpx.Client(
|
|
110
|
+
base_url=self.host_address,
|
|
111
|
+
headers=headers,
|
|
112
|
+
verify=self.verify_ssl_certificate,
|
|
113
|
+
timeout=None,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def _build_request_params(self, messages: list, **kwargs) -> dict:
|
|
117
|
+
params = {
|
|
118
|
+
"model": kwargs.get("model", self.model_name),
|
|
119
|
+
"messages": messages,
|
|
120
|
+
"stream": kwargs.get("stream", True),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if "n_predict" in kwargs and kwargs["n_predict"] is not None:
|
|
124
|
+
params["max_tokens"] = kwargs["n_predict"]
|
|
125
|
+
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
126
|
+
params["temperature"] = kwargs["temperature"]
|
|
127
|
+
if "top_p" in kwargs and kwargs["top_p"] is not None:
|
|
128
|
+
params["top_p"] = kwargs["top_p"]
|
|
129
|
+
|
|
130
|
+
if self.allow_non_standard_parameters:
|
|
131
|
+
if "top_k" in kwargs and kwargs["top_k"] is not None:
|
|
132
|
+
params["top_k"] = kwargs["top_k"]
|
|
133
|
+
|
|
134
|
+
if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
|
|
135
|
+
params["frequency_penalty"] = kwargs["repeat_penalty"]
|
|
136
|
+
if "seed" in kwargs and kwargs["seed"] is not None:
|
|
137
|
+
params["seed"] = kwargs["seed"]
|
|
138
|
+
|
|
139
|
+
return params
|
|
140
|
+
|
|
141
|
+
def _process_request(
|
|
142
|
+
self,
|
|
143
|
+
params: dict,
|
|
144
|
+
stream: Optional[bool],
|
|
145
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
|
|
146
|
+
) -> Union[str, dict]:
|
|
147
|
+
output = ""
|
|
148
|
+
try:
|
|
149
|
+
if stream:
|
|
150
|
+
with self.client.stream("POST", "/api/chat/completions", json=params) as response:
|
|
151
|
+
response.raise_for_status()
|
|
152
|
+
|
|
153
|
+
for line in response.iter_lines():
|
|
154
|
+
if not line:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
data_str = None
|
|
158
|
+
if isinstance(line, bytes):
|
|
159
|
+
if line.startswith(b"data:"):
|
|
160
|
+
data_str = line[len(b"data:"):].strip().decode("utf-8")
|
|
161
|
+
elif isinstance(line, str):
|
|
162
|
+
if line.startswith("data:"):
|
|
163
|
+
data_str = line[len("data:"):].strip()
|
|
164
|
+
|
|
165
|
+
if data_str is None:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
if data_str == "[DONE]":
|
|
169
|
+
break
|
|
170
|
+
try:
|
|
171
|
+
chunk = json.loads(data_str)
|
|
172
|
+
if chunk.get("choices"):
|
|
173
|
+
delta = chunk["choices"][0].get("delta", {})
|
|
174
|
+
word = delta.get("content", "")
|
|
175
|
+
if word and streaming_callback:
|
|
176
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
177
|
+
break
|
|
178
|
+
output += word
|
|
179
|
+
except json.JSONDecodeError:
|
|
180
|
+
continue
|
|
181
|
+
else:
|
|
182
|
+
response = self.client.post("/api/chat/completions", json=params)
|
|
183
|
+
response.raise_for_status()
|
|
184
|
+
data = response.json()
|
|
185
|
+
output = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
186
|
+
if streaming_callback:
|
|
187
|
+
streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
188
|
+
|
|
189
|
+
except httpx.HTTPStatusError as e:
|
|
190
|
+
try:
|
|
191
|
+
e.response.read()
|
|
192
|
+
response_text = e.response.text
|
|
193
|
+
except Exception:
|
|
194
|
+
response_text = "(Could not read error response body)"
|
|
195
|
+
err_msg = f"API Error: {e.response.status_code} - {response_text}"
|
|
196
|
+
trace_exception(e)
|
|
197
|
+
if streaming_callback:
|
|
198
|
+
streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
199
|
+
return {"status": "error", "message": err_msg}
|
|
200
|
+
except Exception as e:
|
|
201
|
+
err_msg = f"An unexpected error occurred with the OpenWebUI API: {e}"
|
|
202
|
+
trace_exception(e)
|
|
203
|
+
if streaming_callback:
|
|
204
|
+
streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
205
|
+
return {"status": "error", "message": err_msg}
|
|
206
|
+
|
|
207
|
+
return output
|
|
208
|
+
|
|
209
|
+
def generate_text(
|
|
210
|
+
self,
|
|
211
|
+
prompt: str,
|
|
212
|
+
images: Optional[List[str]] = None,
|
|
213
|
+
system_prompt: str = "",
|
|
214
|
+
n_predict: Optional[int] = None,
|
|
215
|
+
stream: Optional[bool] = None,
|
|
216
|
+
temperature: float = 0.7,
|
|
217
|
+
top_k: int = 40,
|
|
218
|
+
top_p: float = 0.9,
|
|
219
|
+
repeat_penalty: float = 1.1,
|
|
220
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
221
|
+
**kwargs,
|
|
222
|
+
) -> Union[str, dict]:
|
|
223
|
+
messages = []
|
|
224
|
+
if system_prompt:
|
|
225
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
226
|
+
|
|
227
|
+
user_message = {"role": "user", "content": prompt}
|
|
228
|
+
if images:
|
|
229
|
+
b64_images = []
|
|
230
|
+
for img_path in images:
|
|
231
|
+
normalized = normalize_image_input(
|
|
232
|
+
img_path,
|
|
233
|
+
cap_size=self.cap_image_size,
|
|
234
|
+
max_dim=self.image_downsizing_max_dimension
|
|
235
|
+
)
|
|
236
|
+
data_url = normalized["image_url"]["url"]
|
|
237
|
+
if "base64," in data_url:
|
|
238
|
+
b64_images.append(data_url.split("base64,")[1])
|
|
239
|
+
if b64_images:
|
|
240
|
+
user_message["images"] = b64_images
|
|
241
|
+
|
|
242
|
+
messages.append(user_message)
|
|
243
|
+
|
|
244
|
+
params = self._build_request_params(
|
|
245
|
+
messages=messages, n_predict=n_predict, stream=stream,
|
|
246
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
247
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
248
|
+
)
|
|
249
|
+
return self._process_request(params, stream, streaming_callback)
|
|
250
|
+
|
|
251
|
+
def chat(
|
|
252
|
+
self,
|
|
253
|
+
discussion: LollmsDiscussion,
|
|
254
|
+
branch_tip_id: Optional[str] = None,
|
|
255
|
+
n_predict: Optional[int] = None,
|
|
256
|
+
stream: Optional[bool] = None,
|
|
257
|
+
temperature: float = 0.7,
|
|
258
|
+
top_k: int = 40,
|
|
259
|
+
top_p: float = 0.9,
|
|
260
|
+
repeat_penalty: float = 1.1,
|
|
261
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
262
|
+
**kwargs,
|
|
263
|
+
) -> Union[str, dict]:
|
|
264
|
+
messages = discussion.export("ollama_chat", branch_tip_id)
|
|
265
|
+
params = self._build_request_params(
|
|
266
|
+
messages=messages, n_predict=n_predict, stream=stream,
|
|
267
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
268
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
269
|
+
)
|
|
270
|
+
return self._process_request(params, stream, streaming_callback)
|
|
271
|
+
|
|
272
|
+
def generate_from_messages(
|
|
273
|
+
self,
|
|
274
|
+
messages: List[Dict],
|
|
275
|
+
n_predict: Optional[int] = None,
|
|
276
|
+
stream: Optional[bool] = None,
|
|
277
|
+
temperature: Optional[float] = None,
|
|
278
|
+
top_k: Optional[int] = None,
|
|
279
|
+
top_p: Optional[float] = None,
|
|
280
|
+
repeat_penalty: Optional[float] = None,
|
|
281
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
282
|
+
**kwargs,
|
|
283
|
+
) -> Union[str, dict]:
|
|
284
|
+
# Convert from OpenAI vision format to Ollama vision format
|
|
285
|
+
ollama_messages = []
|
|
286
|
+
for msg in messages:
|
|
287
|
+
content = msg.get("content")
|
|
288
|
+
role = msg.get("role")
|
|
289
|
+
|
|
290
|
+
if isinstance(content, list):
|
|
291
|
+
text_parts = []
|
|
292
|
+
image_parts = []
|
|
293
|
+
for part in content:
|
|
294
|
+
if part.get("type") == "text":
|
|
295
|
+
text_parts.append(part.get("text", ""))
|
|
296
|
+
elif part.get("type") == "image_url":
|
|
297
|
+
url = part.get("image_url", {}).get("url", "")
|
|
298
|
+
if "base64," in url:
|
|
299
|
+
image_parts.append(url.split("base64,")[1])
|
|
300
|
+
|
|
301
|
+
new_msg = {"role": role, "content": "\n".join(text_parts)}
|
|
302
|
+
if image_parts:
|
|
303
|
+
new_msg["images"] = image_parts
|
|
304
|
+
ollama_messages.append(new_msg)
|
|
305
|
+
else:
|
|
306
|
+
ollama_messages.append(msg)
|
|
307
|
+
|
|
308
|
+
params = self._build_request_params(
|
|
309
|
+
messages=ollama_messages, n_predict=n_predict, stream=stream,
|
|
310
|
+
temperature=temperature, top_k=top_k, top_p=top_p,
|
|
311
|
+
repeat_penalty=repeat_penalty, **kwargs,
|
|
312
|
+
)
|
|
313
|
+
return self._process_request(params, stream, streaming_callback)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
317
|
+
"""
|
|
318
|
+
Retrieves the context size for an Ollama model.
|
|
319
|
+
|
|
320
|
+
The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
|
|
321
|
+
otherwise it falls back to the model's default context length from its architecture details.
|
|
322
|
+
As a final failsafe, uses a hardcoded list of known popular models' context lengths.
|
|
323
|
+
"""
|
|
324
|
+
if model_name is None:
|
|
325
|
+
model_name = self.model_name
|
|
326
|
+
if not model_name:
|
|
327
|
+
ASCIIColors.warning("Model name not specified and no default model set.")
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
# Failsafe: Hardcoded context sizes for popular Ollama models
|
|
331
|
+
known_contexts = {
|
|
332
|
+
'llama2': 4096, # Llama 2 default
|
|
333
|
+
'llama3': 8192, # Llama 3 default
|
|
334
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
335
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
336
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
337
|
+
'codestral': 256000, # Codestral
|
|
338
|
+
'mistralai-medium': 128000, # Mistral medium
|
|
339
|
+
'mistralai-mini': 128000, # Mistral medium
|
|
340
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
341
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
342
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
343
|
+
'gemma': 8192, # Gemma default
|
|
344
|
+
'gemma2': 8192, # Gemma 2 default
|
|
345
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
346
|
+
'phi': 2048, # Phi default (older)
|
|
347
|
+
'phi2': 2048, # Phi-2 default
|
|
348
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
349
|
+
'qwen': 8192, # Qwen default
|
|
350
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
351
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
352
|
+
'codellama': 16384, # CodeLlama extended
|
|
353
|
+
'codegemma': 8192, # CodeGemma default
|
|
354
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
355
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
356
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
357
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
358
|
+
'yi': 4096, # Yi base default
|
|
359
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
360
|
+
'command-r': 131072, # Command-R with 128K
|
|
361
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
362
|
+
'wizardlm': 16384, # WizardLM default
|
|
363
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
364
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
365
|
+
'falcon': 2048, # Falcon default
|
|
366
|
+
'starcoder': 8192, # StarCoder default
|
|
367
|
+
'stablelm': 4096, # StableLM default
|
|
368
|
+
'orca': 4096, # Orca default
|
|
369
|
+
'orca2': 4096, # Orca 2 default
|
|
370
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
371
|
+
'openhermes': 8192, # OpenHermes default
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
375
|
+
base_name = model_name.split(':')[0].lower().strip()
|
|
376
|
+
|
|
377
|
+
if base_name in known_contexts:
|
|
378
|
+
ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
|
|
379
|
+
return known_contexts[base_name]
|
|
380
|
+
|
|
381
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}'")
|
|
382
|
+
return None
|
|
383
|
+
|
|
384
|
+
def list_models(self) -> List[Dict]:
|
|
385
|
+
models_info = []
|
|
386
|
+
try:
|
|
387
|
+
response = self.client.get("/api/v1/models")
|
|
388
|
+
if response.status_code == 403 and "API key is not enabled" in response.text:
|
|
389
|
+
temp_client = httpx.Client(
|
|
390
|
+
base_url=self.host_address,
|
|
391
|
+
headers={"Content-Type": "application/json"},
|
|
392
|
+
verify=self.verify_ssl_certificate, timeout=None,
|
|
393
|
+
)
|
|
394
|
+
response = temp_client.get("/api/v1/models")
|
|
395
|
+
temp_client.close()
|
|
396
|
+
|
|
397
|
+
response.raise_for_status()
|
|
398
|
+
models_data = response.json().get("data", [])
|
|
399
|
+
for model in models_data:
|
|
400
|
+
models_info.append({
|
|
401
|
+
"model_name": model.get("id", "N/A"),
|
|
402
|
+
"owned_by": model.get("details", {}).get("family", "N/A"),
|
|
403
|
+
"created": model.get("modified_at", "N/A"),
|
|
404
|
+
"context_length": model.get("details", {}).get("parameter_size", "unknown"),
|
|
405
|
+
})
|
|
406
|
+
except Exception as e:
|
|
407
|
+
ASCIIColors.error(f"Failed to list models from OpenWebUI: {e}")
|
|
408
|
+
trace_exception(e)
|
|
409
|
+
return models_info
|
|
410
|
+
|
|
411
|
+
def _get_encoding(self, model_name: str = None):
|
|
412
|
+
try:
|
|
413
|
+
return tiktoken.encoding_for_model(model_name or self.model_name)
|
|
414
|
+
except KeyError:
|
|
415
|
+
return tiktoken.get_encoding("cl100k_base")
|
|
416
|
+
|
|
417
|
+
def tokenize(self, text: str) -> list[int]:
|
|
418
|
+
return self._get_encoding().encode(text)
|
|
419
|
+
|
|
420
|
+
def detokenize(self, tokens: list[int]) -> str:
|
|
421
|
+
return self._get_encoding().decode(tokens)
|
|
422
|
+
|
|
423
|
+
def count_tokens(self, text: str) -> int:
|
|
424
|
+
return len(self.tokenize(text))
|
|
425
|
+
|
|
426
|
+
def embed(self, text: Union[str, List[str]], **kwargs) -> List:
|
|
427
|
+
embedding_model = kwargs.get("model", self.model_name)
|
|
428
|
+
single_input = isinstance(text, str)
|
|
429
|
+
inputs = [text] if single_input else list(text)
|
|
430
|
+
embeddings = []
|
|
431
|
+
try:
|
|
432
|
+
for t in inputs:
|
|
433
|
+
payload = {"model": embedding_model, "prompt": t}
|
|
434
|
+
response = self.client.post("/ollama/api/embeddings", json=payload)
|
|
435
|
+
response.raise_for_status()
|
|
436
|
+
data = response.json()
|
|
437
|
+
vec = data.get("embedding")
|
|
438
|
+
if vec is not None:
|
|
439
|
+
embeddings.append(vec)
|
|
440
|
+
return embeddings[0] if single_input and embeddings else embeddings
|
|
441
|
+
except Exception as e:
|
|
442
|
+
ASCIIColors.error(f"Failed to generate embeddings using model '{embedding_model}': {e}")
|
|
443
|
+
trace_exception(e)
|
|
444
|
+
return []
|
|
445
|
+
|
|
446
|
+
def get_model_info(self) -> dict:
|
|
447
|
+
return {
|
|
448
|
+
"name": self.binding_name,
|
|
449
|
+
"version": "1.5",
|
|
450
|
+
"host_address": self.host_address,
|
|
451
|
+
"model_name": self.model_name,
|
|
452
|
+
"supports_structured_output": False,
|
|
453
|
+
"supports_vision": True,
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
def load_model(self, model_name: str) -> bool:
|
|
457
|
+
self.model_name = model_name
|
|
458
|
+
ASCIIColors.info(f"OpenWebUI model set to: {model_name}")
|
|
459
|
+
return True
|
|
460
|
+
|
|
461
|
+
def ps(self):
|
|
462
|
+
return []
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
OpenWebUIBinding.__abstractmethods__ = set()
|