lollms-client 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
  3. lollms_client/llm_bindings/claude/__init__.py +125 -34
  4. lollms_client/llm_bindings/gemini/__init__.py +261 -159
  5. lollms_client/llm_bindings/grok/__init__.py +52 -14
  6. lollms_client/llm_bindings/groq/__init__.py +2 -2
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
  8. lollms_client/llm_bindings/litellm/__init__.py +1 -1
  9. lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
  10. lollms_client/llm_bindings/lollms/__init__.py +76 -21
  11. lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
  12. lollms_client/llm_bindings/mistral/__init__.py +2 -2
  13. lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
  14. lollms_client/llm_bindings/ollama/__init__.py +307 -89
  15. lollms_client/llm_bindings/open_router/__init__.py +2 -2
  16. lollms_client/llm_bindings/openai/__init__.py +81 -20
  17. lollms_client/llm_bindings/openllm/__init__.py +362 -506
  18. lollms_client/llm_bindings/openwebui/__init__.py +333 -171
  19. lollms_client/llm_bindings/perplexity/__init__.py +2 -2
  20. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
  21. lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
  22. lollms_client/llm_bindings/transformers/__init__.py +428 -632
  23. lollms_client/llm_bindings/vllm/__init__.py +1 -1
  24. lollms_client/lollms_agentic.py +4 -2
  25. lollms_client/lollms_base_binding.py +61 -0
  26. lollms_client/lollms_core.py +512 -1890
  27. lollms_client/lollms_discussion.py +25 -11
  28. lollms_client/lollms_llm_binding.py +112 -261
  29. lollms_client/lollms_mcp_binding.py +34 -75
  30. lollms_client/lollms_stt_binding.py +85 -52
  31. lollms_client/lollms_tti_binding.py +23 -37
  32. lollms_client/lollms_ttm_binding.py +24 -42
  33. lollms_client/lollms_tts_binding.py +28 -17
  34. lollms_client/lollms_ttv_binding.py +24 -42
  35. lollms_client/lollms_types.py +4 -2
  36. lollms_client/stt_bindings/whisper/__init__.py +108 -23
  37. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  38. lollms_client/tti_bindings/diffusers/__init__.py +418 -810
  39. lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
  40. lollms_client/tti_bindings/gemini/__init__.py +182 -239
  41. lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
  42. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  43. lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
  44. lollms_client/tti_bindings/openai/__init__.py +10 -11
  45. lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
  46. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  47. lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
  48. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  49. lollms_client/ttm_bindings/replicate/__init__.py +7 -4
  50. lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
  51. lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
  52. lollms_client/tts_bindings/bark/__init__.py +7 -10
  53. lollms_client/tts_bindings/lollms/__init__.py +6 -1
  54. lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
  55. lollms_client/tts_bindings/xtts/__init__.py +157 -74
  56. lollms_client/tts_bindings/xtts/server/main.py +241 -280
  57. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/METADATA +113 -5
  58. lollms_client-1.7.10.dist-info/RECORD +89 -0
  59. lollms_client-1.5.6.dist-info/RECORD +0 -87
  60. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
  61. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
  62. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,22 @@
1
- import requests
2
1
  import json
3
2
  import base64
4
3
  import os
5
4
  import mimetypes
6
- import math
5
+ import io
7
6
  from typing import Optional, Callable, List, Union, Dict
8
7
 
9
8
  import httpx
10
9
  import tiktoken
11
10
  import pipmaster as pm
11
+ from PIL import Image
12
12
 
13
13
  from lollms_client.lollms_llm_binding import LollmsLLMBinding
14
- from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
14
+ from lollms_client.lollms_types import MSG_TYPE
15
15
  from lollms_client.lollms_discussion import LollmsDiscussion
16
- from lollms_client.lollms_utilities import encode_image
17
16
  from ascii_colors import ASCIIColors, trace_exception
18
17
 
19
18
  # Ensure required packages are installed
20
- pm.ensure_packages(["httpx", "tiktoken"])
19
+ pm.ensure_packages(["httpx", "tiktoken", "Pillow"])
21
20
 
22
21
  BindingName = "OpenWebUIBinding"
23
22
 
@@ -26,278 +25,441 @@ def _read_file_as_base64(path):
26
25
  with open(path, "rb") as f:
27
26
  return base64.b64encode(f.read()).decode("utf-8")
28
27
 
28
+
29
29
  def _extract_markdown_path(s):
30
30
  s = s.strip()
31
31
  if s.startswith("[") and s.endswith(")"):
32
32
  lb, rb = s.find("["), s.find("]")
33
33
  if lb != -1 and rb != -1 and rb > lb:
34
- return s[lb+1:rb].strip()
34
+ return s[lb + 1 : rb].strip()
35
35
  return s
36
36
 
37
+
37
38
  def _guess_mime_from_name(name, default="image/jpeg"):
38
39
  mime, _ = mimetypes.guess_type(name)
39
40
  return mime or default
40
41
 
42
+
41
43
  def _to_data_url(b64_str, mime):
42
44
  return f"data:{mime};base64,{b64_str}"
43
45
 
44
- def normalize_image_input(img, default_mime="image/jpeg"):
45
- """
46
- Returns an OpenAI API-ready content block for an image.
47
- Accepts various input formats and converts them to a data URL.
48
- """
49
- if isinstance(img, str):
50
- # Handle path-like strings or raw base64
51
- s = _extract_markdown_path(img)
52
- if os.path.exists(s):
53
- b64 = _read_file_as_base64(s)
54
- mime = _guess_mime_from_name(s, default_mime)
55
- url = _to_data_url(b64, mime)
56
- else: # Assume it's a base64 string
57
- url = _to_data_url(s, default_mime)
46
+
47
+ def normalize_image_input(
48
+ img_path: str,
49
+ cap_size: bool = False,
50
+ max_dim: int = 2048,
51
+ default_mime="image/jpeg"
52
+ ) -> dict:
53
+ if not isinstance(img_path, str):
54
+ raise ValueError("Unsupported image input type for OpenWebUI")
55
+
56
+ s = _extract_markdown_path(img_path)
57
+ if not os.path.exists(s):
58
+ url = _to_data_url(s, default_mime)
58
59
  return {"type": "image_url", "image_url": {"url": url}}
59
60
 
60
- raise ValueError("Unsupported image input type for OpenWebUI")
61
+ if cap_size:
62
+ with Image.open(s) as img_obj:
63
+ width, height = img_obj.size
64
+ if width > max_dim or height > max_dim:
65
+ ratio = max_dim / max(width, height)
66
+ new_width = int(width * ratio)
67
+ new_height = int(height * ratio)
68
+
69
+ ASCIIColors.info(f"Downsizing image from {width}x{height} to {new_width}x{new_height}")
70
+ resized_img = img_obj.resize((new_width, new_height), Image.Resampling.LANCZOS)
71
+
72
+ buffer = io.BytesIO()
73
+ if resized_img.mode in ('RGBA', 'P'):
74
+ resized_img = resized_img.convert('RGB')
75
+ resized_img.save(buffer, format="JPEG")
76
+ img_bytes = buffer.getvalue()
77
+
78
+ b64 = base64.b64encode(img_bytes).decode("utf-8")
79
+ mime = "image/jpeg"
80
+ else:
81
+ b64 = _read_file_as_base64(s)
82
+ mime = _guess_mime_from_name(s, default_mime)
83
+ else:
84
+ b64 = _read_file_as_base64(s)
85
+ mime = _guess_mime_from_name(s, default_mime)
86
+
87
+ url = _to_data_url(b64, mime)
88
+ return {"type": "image_url", "image_url": {"url": url}}
61
89
 
62
90
 
63
91
  class OpenWebUIBinding(LollmsLLMBinding):
64
- """OpenWebUI-specific binding implementation"""
65
-
66
92
  def __init__(self, **kwargs):
67
- """
68
- Initialize the OpenWebUI binding.
69
-
70
- Args:
71
- host_address (str): The URL of the OpenWebUI server (e.g., "http://localhost:8080").
72
- model_name (str): Name of the model to use.
73
- service_key (str): Authentication token for the service.
74
- verify_ssl_certificate (bool): Whether to verify SSL certificates.
75
- """
76
93
  super().__init__(BindingName, **kwargs)
77
94
  self.host_address = kwargs.get("host_address")
78
95
  self.model_name = kwargs.get("model_name")
79
96
  self.service_key = kwargs.get("service_key", os.getenv("OPENWEBUI_API_KEY"))
80
97
  self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
81
-
98
+ self.allow_non_standard_parameters = kwargs.get("allow_non_standard_parameters", False)
99
+ self.cap_image_size = kwargs.get("cap_image_size", True)
100
+ self.image_downsizing_max_dimension = kwargs.get("image_downsizing_max_dimension", 2048)
101
+
82
102
  if not self.host_address:
83
103
  raise ValueError("OpenWebUI host address is required.")
84
- if not self.service_key:
85
- ASCIIColors.warning("No service key provided for OpenWebUI. Requests may fail.")
86
104
 
87
- headers = {
88
- "Authorization": f"Bearer {self.service_key}",
89
- "Content-Type": "application/json"
90
- }
105
+ headers = {"Content-Type": "application/json"}
106
+ if self.service_key:
107
+ headers["Authorization"] = f"Bearer {self.service_key}"
91
108
 
92
109
  self.client = httpx.Client(
93
110
  base_url=self.host_address,
94
111
  headers=headers,
95
112
  verify=self.verify_ssl_certificate,
96
- timeout=None
113
+ timeout=None,
97
114
  )
98
115
 
99
116
  def _build_request_params(self, messages: list, **kwargs) -> dict:
100
- """Builds the request parameters for the OpenWebUI API."""
101
117
  params = {
102
118
  "model": kwargs.get("model", self.model_name),
103
119
  "messages": messages,
104
120
  "stream": kwargs.get("stream", True),
105
121
  }
106
-
107
- # Map Lollms parameters to OpenAI-compatible parameters
122
+
108
123
  if "n_predict" in kwargs and kwargs["n_predict"] is not None:
109
124
  params["max_tokens"] = kwargs["n_predict"]
110
125
  if "temperature" in kwargs and kwargs["temperature"] is not None:
111
126
  params["temperature"] = kwargs["temperature"]
112
127
  if "top_p" in kwargs and kwargs["top_p"] is not None:
113
128
  params["top_p"] = kwargs["top_p"]
114
- if "top_k" in kwargs and kwargs["top_k"] is not None:
115
- # Note: top_k is not standard in OpenAI API, but some backends might support it.
116
- # We include it here for potential compatibility.
117
- params["top_k"] = kwargs["top_k"]
129
+
130
+ if self.allow_non_standard_parameters:
131
+ if "top_k" in kwargs and kwargs["top_k"] is not None:
132
+ params["top_k"] = kwargs["top_k"]
133
+
118
134
  if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
119
135
  params["frequency_penalty"] = kwargs["repeat_penalty"]
120
136
  if "seed" in kwargs and kwargs["seed"] is not None:
121
137
  params["seed"] = kwargs["seed"]
122
-
138
+
123
139
  return params
124
140
 
125
- def generate_text(self,
126
- prompt: str,
127
- images: Optional[List[str]] = None,
128
- system_prompt: str = "",
129
- n_predict: Optional[int] = None,
130
- stream: Optional[bool] = None,
131
- temperature: float = 0.7,
132
- top_k: int = 40,
133
- top_p: float = 0.9,
134
- repeat_penalty: float = 1.1,
135
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
136
- **kwargs
137
- ) -> Union[str, dict]:
141
+ def _process_request(
142
+ self,
143
+ params: dict,
144
+ stream: Optional[bool],
145
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
146
+ ) -> Union[str, dict]:
147
+ output = ""
148
+ try:
149
+ if stream:
150
+ with self.client.stream("POST", "/api/chat/completions", json=params) as response:
151
+ response.raise_for_status()
152
+
153
+ for line in response.iter_lines():
154
+ if not line:
155
+ continue
156
+
157
+ data_str = None
158
+ if isinstance(line, bytes):
159
+ if line.startswith(b"data:"):
160
+ data_str = line[len(b"data:"):].strip().decode("utf-8")
161
+ elif isinstance(line, str):
162
+ if line.startswith("data:"):
163
+ data_str = line[len("data:"):].strip()
164
+
165
+ if data_str is None:
166
+ continue
167
+
168
+ if data_str == "[DONE]":
169
+ break
170
+ try:
171
+ chunk = json.loads(data_str)
172
+ if chunk.get("choices"):
173
+ delta = chunk["choices"][0].get("delta", {})
174
+ word = delta.get("content", "")
175
+ if word and streaming_callback:
176
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
177
+ break
178
+ output += word
179
+ except json.JSONDecodeError:
180
+ continue
181
+ else:
182
+ response = self.client.post("/api/chat/completions", json=params)
183
+ response.raise_for_status()
184
+ data = response.json()
185
+ output = data.get("choices", [{}])[0].get("message", {}).get("content", "")
186
+ if streaming_callback:
187
+ streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
188
+
189
+ except httpx.HTTPStatusError as e:
190
+ try:
191
+ e.response.read()
192
+ response_text = e.response.text
193
+ except Exception:
194
+ response_text = "(Could not read error response body)"
195
+ err_msg = f"API Error: {e.response.status_code} - {response_text}"
196
+ trace_exception(e)
197
+ if streaming_callback:
198
+ streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
199
+ return {"status": "error", "message": err_msg}
200
+ except Exception as e:
201
+ err_msg = f"An unexpected error occurred with the OpenWebUI API: {e}"
202
+ trace_exception(e)
203
+ if streaming_callback:
204
+ streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
205
+ return {"status": "error", "message": err_msg}
206
+
207
+ return output
138
208
 
209
+ def generate_text(
210
+ self,
211
+ prompt: str,
212
+ images: Optional[List[str]] = None,
213
+ system_prompt: str = "",
214
+ n_predict: Optional[int] = None,
215
+ stream: Optional[bool] = None,
216
+ temperature: float = 0.7,
217
+ top_k: int = 40,
218
+ top_p: float = 0.9,
219
+ repeat_penalty: float = 1.1,
220
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
221
+ **kwargs,
222
+ ) -> Union[str, dict]:
139
223
  messages = []
140
224
  if system_prompt:
141
225
  messages.append({"role": "system", "content": system_prompt})
142
226
 
143
- user_content = [{"type": "text", "text": prompt}]
227
+ user_message = {"role": "user", "content": prompt}
144
228
  if images:
145
- for img in images:
146
- user_content.append(normalize_image_input(img))
229
+ b64_images = []
230
+ for img_path in images:
231
+ normalized = normalize_image_input(
232
+ img_path,
233
+ cap_size=self.cap_image_size,
234
+ max_dim=self.image_downsizing_max_dimension
235
+ )
236
+ data_url = normalized["image_url"]["url"]
237
+ if "base64," in data_url:
238
+ b64_images.append(data_url.split("base64,")[1])
239
+ if b64_images:
240
+ user_message["images"] = b64_images
147
241
 
148
- messages.append({"role": "user", "content": user_content})
242
+ messages.append(user_message)
149
243
 
150
244
  params = self._build_request_params(
151
- messages=messages,
152
- n_predict=n_predict,
153
- stream=stream,
154
- temperature=temperature,
155
- top_k=top_k,
156
- top_p=top_p,
157
- repeat_penalty=repeat_penalty,
158
- **kwargs
245
+ messages=messages, n_predict=n_predict, stream=stream,
246
+ temperature=temperature, top_k=top_k, top_p=top_p,
247
+ repeat_penalty=repeat_penalty, **kwargs,
159
248
  )
160
-
161
249
  return self._process_request(params, stream, streaming_callback)
162
250
 
163
-
164
- def generate_from_messages(self,
165
- messages: List[Dict],
166
- n_predict: Optional[int] = None,
167
- stream: Optional[bool] = None,
168
- temperature: Optional[float] = None,
169
- top_k: Optional[int] = None,
170
- top_p: Optional[float] = None,
171
- repeat_penalty: Optional[float] = None,
172
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
173
- **kwargs
174
- ) -> Union[str, dict]:
175
-
251
+ def chat(
252
+ self,
253
+ discussion: LollmsDiscussion,
254
+ branch_tip_id: Optional[str] = None,
255
+ n_predict: Optional[int] = None,
256
+ stream: Optional[bool] = None,
257
+ temperature: float = 0.7,
258
+ top_k: int = 40,
259
+ top_p: float = 0.9,
260
+ repeat_penalty: float = 1.1,
261
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
262
+ **kwargs,
263
+ ) -> Union[str, dict]:
264
+ messages = discussion.export("ollama_chat", branch_tip_id)
176
265
  params = self._build_request_params(
177
- messages=messages,
178
- n_predict=n_predict,
179
- stream=stream,
180
- temperature=temperature,
181
- top_k=top_k,
182
- top_p=top_p,
183
- repeat_penalty=repeat_penalty,
184
- **kwargs
266
+ messages=messages, n_predict=n_predict, stream=stream,
267
+ temperature=temperature, top_k=top_k, top_p=top_p,
268
+ repeat_penalty=repeat_penalty, **kwargs,
185
269
  )
186
-
187
270
  return self._process_request(params, stream, streaming_callback)
188
271
 
189
- def _process_request(self, params, stream, streaming_callback):
190
- """Helper to process streaming or non-streaming API calls."""
191
- output = ""
192
- try:
193
- if stream:
194
- with self.client.stream("POST", "/api/chat/completions", json=params) as response:
195
- if response.status_code != 200:
196
- error_content = response.read().decode('utf-8')
197
- raise Exception(f"API Error: {response.status_code} - {error_content}")
198
-
199
- for line in response.iter_lines():
200
- if line.startswith("data:"):
201
- data_str = line[len("data:"):].strip()
202
- if data_str == "[DONE]":
203
- break
204
- try:
205
- chunk = json.loads(data_str)
206
- if chunk["choices"]:
207
- delta = chunk["choices"][0].get("delta", {})
208
- word = delta.get("content", "")
209
- if word:
210
- if streaming_callback:
211
- if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
212
- break
213
- output += word
214
- except json.JSONDecodeError:
215
- continue # Ignore malformed SSE lines
216
- else:
217
- response = self.client.post("/api/chat/completions", json=params)
218
- if response.status_code != 200:
219
- raise Exception(f"API Error: {response.status_code} - {response.text}")
272
+ def generate_from_messages(
273
+ self,
274
+ messages: List[Dict],
275
+ n_predict: Optional[int] = None,
276
+ stream: Optional[bool] = None,
277
+ temperature: Optional[float] = None,
278
+ top_k: Optional[int] = None,
279
+ top_p: Optional[float] = None,
280
+ repeat_penalty: Optional[float] = None,
281
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
282
+ **kwargs,
283
+ ) -> Union[str, dict]:
284
+ # Convert from OpenAI vision format to Ollama vision format
285
+ ollama_messages = []
286
+ for msg in messages:
287
+ content = msg.get("content")
288
+ role = msg.get("role")
289
+
290
+ if isinstance(content, list):
291
+ text_parts = []
292
+ image_parts = []
293
+ for part in content:
294
+ if part.get("type") == "text":
295
+ text_parts.append(part.get("text", ""))
296
+ elif part.get("type") == "image_url":
297
+ url = part.get("image_url", {}).get("url", "")
298
+ if "base64," in url:
299
+ image_parts.append(url.split("base64,")[1])
220
300
 
221
- data = response.json()
222
- output = data["choices"][0]["message"]["content"]
223
- if streaming_callback:
224
- streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
301
+ new_msg = {"role": role, "content": "\n".join(text_parts)}
302
+ if image_parts:
303
+ new_msg["images"] = image_parts
304
+ ollama_messages.append(new_msg)
305
+ else:
306
+ ollama_messages.append(msg)
225
307
 
226
- except Exception as e:
227
- trace_exception(e)
228
- err_msg = f"An error occurred with the OpenWebUI API: {e}"
229
- if streaming_callback:
230
- streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
231
- return {"status": "error", "message": err_msg}
308
+ params = self._build_request_params(
309
+ messages=ollama_messages, n_predict=n_predict, stream=stream,
310
+ temperature=temperature, top_k=top_k, top_p=top_p,
311
+ repeat_penalty=repeat_penalty, **kwargs,
312
+ )
313
+ return self._process_request(params, stream, streaming_callback)
232
314
 
233
- return output
234
315
 
235
- def listModels(self) -> List[Dict]:
316
+ def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
317
+ """
318
+ Retrieves the context size for an Ollama model.
319
+
320
+ The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
321
+ otherwise it falls back to the model's default context length from its architecture details.
322
+ As a final failsafe, uses a hardcoded list of known popular models' context lengths.
323
+ """
324
+ if model_name is None:
325
+ model_name = self.model_name
326
+ if not model_name:
327
+ ASCIIColors.warning("Model name not specified and no default model set.")
328
+ return None
329
+
330
+ # Failsafe: Hardcoded context sizes for popular Ollama models
331
+ known_contexts = {
332
+ 'llama2': 4096, # Llama 2 default
333
+ 'llama3': 8192, # Llama 3 default
334
+ 'llama3.1': 131072, # Llama 3.1 extended context
335
+ 'llama3.2': 131072, # Llama 3.2 extended context
336
+ 'llama3.3': 131072, # Assuming similar to 3.1/3.2
337
+ 'codestral': 256000, # Codestral
338
+ 'mistralai-medium': 128000, # Mistral medium
339
+ 'mistralai-mini': 128000, # Mistral medium
340
+ 'mistral': 32768, # Mistral 7B v0.2+ default
341
+ 'mixtral': 32768, # Mixtral 8x7B default
342
+ 'mixtral8x22b': 65536, # Mixtral 8x22B default
343
+ 'gemma': 8192, # Gemma default
344
+ 'gemma2': 8192, # Gemma 2 default
345
+ 'gemma3': 131072, # Gemma 3 with 128K context
346
+ 'phi': 2048, # Phi default (older)
347
+ 'phi2': 2048, # Phi-2 default
348
+ 'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
349
+ 'qwen': 8192, # Qwen default
350
+ 'qwen2': 32768, # Qwen2 default for 7B
351
+ 'qwen2.5': 131072, # Qwen2.5 with 128K
352
+ 'codellama': 16384, # CodeLlama extended
353
+ 'codegemma': 8192, # CodeGemma default
354
+ 'deepseek-coder': 16384, # DeepSeek-Coder V1 default
355
+ 'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
356
+ 'deepseek-llm': 4096, # DeepSeek-LLM default
357
+ 'deepseek-v2': 131072, # DeepSeek-V2 with 128K
358
+ 'yi': 4096, # Yi base default
359
+ 'yi1.5': 32768, # Yi-1.5 with 32K
360
+ 'command-r': 131072, # Command-R with 128K
361
+ 'vicuna': 2048, # Vicuna default (up to 16K in some variants)
362
+ 'wizardlm': 16384, # WizardLM default
363
+ 'wizardlm2': 32768, # WizardLM2 (Mistral-based)
364
+ 'zephyr': 65536, # Zephyr beta (Mistral-based extended)
365
+ 'falcon': 2048, # Falcon default
366
+ 'starcoder': 8192, # StarCoder default
367
+ 'stablelm': 4096, # StableLM default
368
+ 'orca': 4096, # Orca default
369
+ 'orca2': 4096, # Orca 2 default
370
+ 'dolphin': 32768, # Dolphin (often Mistral-based)
371
+ 'openhermes': 8192, # OpenHermes default
372
+ }
373
+
374
+ # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
375
+ base_name = model_name.split(':')[0].lower().strip()
376
+
377
+ if base_name in known_contexts:
378
+ ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
379
+ return known_contexts[base_name]
380
+
381
+ ASCIIColors.warning(f"Context size not found for model '{model_name}'")
382
+ return None
383
+
384
+ def list_models(self) -> List[Dict]:
236
385
  models_info = []
237
386
  try:
238
- response = self.client.get("/api/models")
387
+ response = self.client.get("/api/v1/models")
388
+ if response.status_code == 403 and "API key is not enabled" in response.text:
389
+ temp_client = httpx.Client(
390
+ base_url=self.host_address,
391
+ headers={"Content-Type": "application/json"},
392
+ verify=self.verify_ssl_certificate, timeout=None,
393
+ )
394
+ response = temp_client.get("/api/v1/models")
395
+ temp_client.close()
396
+
239
397
  response.raise_for_status()
240
398
  models_data = response.json().get("data", [])
241
-
242
399
  for model in models_data:
243
400
  models_info.append({
244
401
  "model_name": model.get("id", "N/A"),
245
402
  "owned_by": model.get("details", {}).get("family", "N/A"),
246
403
  "created": model.get("modified_at", "N/A"),
247
- # Assuming context length might be in details, though not guaranteed
248
- "context_length": model.get("details", {}).get("parameter_size", "unknown"),
404
+ "context_length": model.get("details", {}).get("parameter_size", "unknown"),
249
405
  })
250
406
  except Exception as e:
251
407
  ASCIIColors.error(f"Failed to list models from OpenWebUI: {e}")
408
+ trace_exception(e)
252
409
  return models_info
253
-
254
- def _get_encoding(self, model_name: str | None = None):
255
- """Uses tiktoken as a general-purpose tokenizer."""
410
+
411
+ def _get_encoding(self, model_name: str = None):
256
412
  try:
257
413
  return tiktoken.encoding_for_model(model_name or self.model_name)
258
414
  except KeyError:
259
415
  return tiktoken.get_encoding("cl100k_base")
260
416
 
261
417
  def tokenize(self, text: str) -> list[int]:
262
- encoding = self._get_encoding()
263
- return encoding.encode(text)
418
+ return self._get_encoding().encode(text)
264
419
 
265
420
  def detokenize(self, tokens: list[int]) -> str:
266
- encoding = self._get_encoding()
267
- return encoding.decode(tokens)
268
-
269
- def count_tokens(self, text: str) -> int:
270
- return len(self.tokenize(text))
271
-
272
- def get_input_tokens_price(self, model_name: str | None = None) -> float:
273
- return 0.0
421
+ return self._get_encoding().decode(tokens)
274
422
 
275
- def get_output_tokens_price(self, model_name: str | None = None) -> float:
276
- return 0.0
423
+ def count_tokens(self, text: str) -> int:
424
+ return len(self.tokenize(text))
277
425
 
278
- def embed(self, text: str | list[str], **kwargs) -> list:
279
- """Get embeddings using Ollama's passthrough endpoint."""
426
+ def embed(self, text: Union[str, List[str]], **kwargs) -> List:
280
427
  embedding_model = kwargs.get("model", self.model_name)
281
- is_single_input = isinstance(text, str)
282
- input_texts = [text] if is_single_input else text
428
+ single_input = isinstance(text, str)
429
+ inputs = [text] if single_input else list(text)
283
430
  embeddings = []
284
-
285
431
  try:
286
- for t in input_texts:
432
+ for t in inputs:
287
433
  payload = {"model": embedding_model, "prompt": t}
288
434
  response = self.client.post("/ollama/api/embeddings", json=payload)
289
435
  response.raise_for_status()
290
- embedding_data = response.json().get("embedding")
291
- if embedding_data:
292
- embeddings.append(embedding_data)
293
-
294
- return embeddings[0] if is_single_input and embeddings else embeddings
295
-
436
+ data = response.json()
437
+ vec = data.get("embedding")
438
+ if vec is not None:
439
+ embeddings.append(vec)
440
+ return embeddings[0] if single_input and embeddings else embeddings
296
441
  except Exception as e:
297
442
  ASCIIColors.error(f"Failed to generate embeddings using model '{embedding_model}': {e}")
298
443
  trace_exception(e)
299
444
  return []
445
+
446
+ def get_model_info(self) -> dict:
447
+ return {
448
+ "name": self.binding_name,
449
+ "version": "1.5",
450
+ "host_address": self.host_address,
451
+ "model_name": self.model_name,
452
+ "supports_structured_output": False,
453
+ "supports_vision": True,
454
+ }
300
455
 
301
456
  def load_model(self, model_name: str) -> bool:
302
457
  self.model_name = model_name
303
- return True
458
+ ASCIIColors.info(f"OpenWebUI model set to: {model_name}")
459
+ return True
460
+
461
+ def ps(self):
462
+ return []
463
+
464
+
465
+ OpenWebUIBinding.__abstractmethods__ = set()
@@ -224,7 +224,7 @@ class PerplexityBinding(LollmsLLMBinding):
224
224
  "supports_structured_output": False
225
225
  }
226
226
 
227
- def listModels(self) -> List[Dict[str, str]]:
227
+ def list_models(self) -> List[Dict[str, str]]:
228
228
  """
229
229
  Lists available models. Perplexity API does not have a models endpoint,
230
230
  so a hardcoded list is returned.
@@ -255,7 +255,7 @@ if __name__ == '__main__':
255
255
 
256
256
  # --- List Models ---
257
257
  ASCIIColors.cyan("\n--- Listing Models (static list) ---")
258
- models = binding.listModels()
258
+ models = binding.list_models()
259
259
  if models:
260
260
  ASCIIColors.green(f"Found {len(models)} models.")
261
261
  for m in models: