lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +151 -32
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
- lollms_client/llm_bindings/ollama/__init__.py +309 -93
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +148 -29
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +465 -0
- lollms_client/llm_bindings/perplexity/__init__.py +326 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +516 -1890
- lollms_client/lollms_discussion.py +55 -18
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_personality.py +5 -2
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +115 -0
- lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
- lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client/ttm_bindings/bark/__init__.py +0 -339
- lollms_client-1.4.1.dist-info/RECORD +0 -78
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -1,550 +1,406 @@
|
|
|
1
|
-
|
|
2
|
-
import requests # May not be strictly needed if openllm client handles all
|
|
1
|
+
import requests
|
|
3
2
|
import json
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
import base64
|
|
4
|
+
import os
|
|
5
|
+
import mimetypes
|
|
6
|
+
import math
|
|
7
7
|
from typing import Optional, Callable, List, Union, Dict
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import httpx
|
|
10
|
+
import tiktoken
|
|
10
11
|
import pipmaster as pm
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
14
|
+
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
15
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
16
|
+
from lollms_client.lollms_utilities import encode_image
|
|
17
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
14
18
|
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
# Ensure required packages are installed
|
|
20
|
+
pm.ensure_packages(["httpx", "tiktoken"])
|
|
17
21
|
|
|
18
22
|
BindingName = "OpenLLMBinding"
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
)
|
|
24
|
+
|
|
25
|
+
def _read_file_as_base64(path):
|
|
26
|
+
with open(path, "rb") as f:
|
|
27
|
+
return base64.b64encode(f.read()).decode("utf-8")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _extract_markdown_path(s):
|
|
31
|
+
s = s.strip()
|
|
32
|
+
if s.startswith("[") and s.endswith(")"):
|
|
33
|
+
lb, rb = s.find("["), s.find("]")
|
|
34
|
+
if lb != -1 and rb != -1 and rb > lb:
|
|
35
|
+
return s[lb + 1 : rb].strip()
|
|
36
|
+
return s
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _guess_mime_from_name(name, default="image/jpeg"):
|
|
40
|
+
mime, _ = mimetypes.guess_type(name)
|
|
41
|
+
return mime or default
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _to_data_url(b64_str, mime):
|
|
45
|
+
return f"data:{mime};base64,{b64_str}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def normalize_image_input(img, default_mime="image/jpeg"):
|
|
27
49
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
the length of 'prompt_token_ids' from the response.
|
|
50
|
+
Returns an OpenAI API‑compatible content block for an image.
|
|
51
|
+
Accepts various input formats and converts them to a data URL.
|
|
31
52
|
"""
|
|
32
|
-
|
|
33
|
-
#
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# This needs model-specific knowledge or further investigation.
|
|
45
|
-
# For llama3 with ollama, it was prompt_eval_count - 5 (system, user, content etc)
|
|
46
|
-
# For OpenLLM, it's harder to generalize the "overhead".
|
|
47
|
-
# Let's assume prompt_token_ids is the count of tokens for the user's text.
|
|
48
|
-
return len(response.prompt_token_ids)
|
|
49
|
-
else:
|
|
50
|
-
# Fallback if prompt_token_ids is not available or empty
|
|
51
|
-
ASCIIColors.warning("prompt_token_ids not found in OpenLLM response, using tiktoken for count_tokens.")
|
|
52
|
-
return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
|
|
53
|
-
except Exception as e:
|
|
54
|
-
ASCIIColors.warning(f"Failed to count tokens via OpenLLM API, using tiktoken fallback: {e}")
|
|
55
|
-
return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
|
|
53
|
+
if isinstance(img, str):
|
|
54
|
+
# Handle path‑like strings or raw base64
|
|
55
|
+
s = _extract_markdown_path(img)
|
|
56
|
+
if os.path.exists(s):
|
|
57
|
+
b64 = _read_file_as_base64(s)
|
|
58
|
+
mime = _guess_mime_from_name(s, default_mime)
|
|
59
|
+
url = _to_data_url(b64, mime)
|
|
60
|
+
else: # Assume it's a raw base64 string
|
|
61
|
+
url = _to_data_url(s, default_mime)
|
|
62
|
+
return {"type": "image_url", "image_url": {"url": url}}
|
|
63
|
+
|
|
64
|
+
raise ValueError("Unsupported image input type for OpenLLM")
|
|
56
65
|
|
|
57
66
|
|
|
58
67
|
class OpenLLMBinding(LollmsLLMBinding):
|
|
59
|
-
"""OpenLLM
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
):
|
|
66
|
-
""" Initialize the OpenLLM binding.
|
|
68
|
+
"""OpenLLM‑specific binding implementation"""
|
|
69
|
+
|
|
70
|
+
def __init__(self, **kwargs):
|
|
71
|
+
"""
|
|
72
|
+
Initialize the OpenLLM binding.
|
|
73
|
+
|
|
67
74
|
Args:
|
|
68
|
-
host_address (str):
|
|
69
|
-
model_name (str):
|
|
70
|
-
service_key (
|
|
71
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates
|
|
72
|
-
timeout (int): Timeout for client requests in seconds (default: 120).
|
|
75
|
+
host_address (str): URL of the OpenLLM server (e.g. ``http://localhost:3000``).
|
|
76
|
+
model_name (str): Name of the model to use.
|
|
77
|
+
service_key (str): Authentication token for the service (optional).
|
|
78
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
73
79
|
"""
|
|
74
|
-
host_address = kwargs.get("host_address")
|
|
75
|
-
_host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
|
|
76
80
|
super().__init__(BindingName, **kwargs)
|
|
77
|
-
self.host_address =
|
|
78
|
-
self.model_name = kwargs.get("model_name")
|
|
79
|
-
self.
|
|
80
|
-
self.
|
|
81
|
-
|
|
82
|
-
if
|
|
83
|
-
raise
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
81
|
+
self.host_address = kwargs.get("host_address")
|
|
82
|
+
self.model_name = kwargs.get("model_name")
|
|
83
|
+
self.service_key = kwargs.get("service_key", os.getenv("OPENLLM_API_KEY"))
|
|
84
|
+
self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
|
|
85
|
+
|
|
86
|
+
if not self.host_address:
|
|
87
|
+
raise ValueError("OpenLLM host address is required.")
|
|
88
|
+
|
|
89
|
+
# Build headers – only include Authorization if a key is actually provided
|
|
90
|
+
headers = {"Content-Type": "application/json"}
|
|
91
|
+
if self.service_key:
|
|
92
|
+
headers["Authorization"] = f"Bearer {self.service_key}"
|
|
93
|
+
else:
|
|
94
|
+
ASCIIColors.warning(
|
|
95
|
+
"No service key provided for OpenLLM. Requests will be made without Authorization header."
|
|
89
96
|
)
|
|
90
|
-
# Perform a quick health check or metadata fetch to confirm connection
|
|
91
|
-
if not self._verify_connection():
|
|
92
|
-
raise ConnectionError(f"Failed to connect or verify OpenLLM server at {self.host_address}")
|
|
93
|
-
|
|
94
|
-
# Try to fetch model_name if not provided
|
|
95
|
-
if not self.model_name:
|
|
96
|
-
metadata = self._get_model_metadata_from_server()
|
|
97
|
-
if metadata and 'model_id' in metadata:
|
|
98
|
-
self.model_name = metadata['model_id']
|
|
99
|
-
else:
|
|
100
|
-
ASCIIColors.warning("Could not automatically determine model name from OpenLLM server.")
|
|
101
97
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
98
|
+
# Append /v1 to the base URL for OpenAI compatibility
|
|
99
|
+
base_url = f"{self.host_address.rstrip('/')}/v1"
|
|
100
|
+
|
|
101
|
+
self.client = httpx.Client(
|
|
102
|
+
base_url=base_url,
|
|
103
|
+
headers=headers,
|
|
104
|
+
verify=self.verify_ssl_certificate,
|
|
105
|
+
timeout=None,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# --------------------------------------------------------------------- #
|
|
109
|
+
# Helper methods
|
|
110
|
+
# --------------------------------------------------------------------- #
|
|
111
|
+
def _build_request_params(self, messages: list, **kwargs) -> dict:
|
|
112
|
+
"""Construct the JSON payload expected by the OpenLLM /chat/completions endpoint."""
|
|
113
|
+
params = {
|
|
114
|
+
"model": kwargs.get("model", self.model_name),
|
|
115
|
+
"messages": messages,
|
|
116
|
+
"stream": kwargs.get("stream", True),
|
|
117
|
+
}
|
|
106
118
|
|
|
107
|
-
|
|
108
|
-
if not
|
|
109
|
-
|
|
119
|
+
# Map Lollms parameters to OpenAI‑compatible fields
|
|
120
|
+
if "n_predict" in kwargs and kwargs["n_predict"] is not None:
|
|
121
|
+
params["max_tokens"] = kwargs["n_predict"]
|
|
122
|
+
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
123
|
+
params["temperature"] = kwargs["temperature"]
|
|
124
|
+
if "top_p" in kwargs and kwargs["top_p"] is not None:
|
|
125
|
+
params["top_p"] = kwargs["top_p"]
|
|
126
|
+
if "top_k" in kwargs and kwargs["top_k"] is not None:
|
|
127
|
+
params["top_k"] = kwargs["top_k"]
|
|
128
|
+
if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
|
|
129
|
+
params["frequency_penalty"] = kwargs["repeat_penalty"]
|
|
130
|
+
if "seed" in kwargs and kwargs["seed"] is not None:
|
|
131
|
+
params["seed"] = kwargs["seed"]
|
|
132
|
+
|
|
133
|
+
return params
|
|
134
|
+
|
|
135
|
+
def _process_request(
|
|
136
|
+
self,
|
|
137
|
+
params: dict,
|
|
138
|
+
stream: Optional[bool],
|
|
139
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
|
|
140
|
+
) -> Union[str, dict]:
|
|
141
|
+
"""Execute the request – handling both streaming and non‑streaming modes."""
|
|
142
|
+
output = ""
|
|
110
143
|
try:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
144
|
+
if stream:
|
|
145
|
+
with self.client.stream(
|
|
146
|
+
"POST", "/chat/completions", json=params
|
|
147
|
+
) as response:
|
|
148
|
+
if response.status_code != 200:
|
|
149
|
+
err = response.read().decode("utf-8")
|
|
150
|
+
raise Exception(
|
|
151
|
+
f"API Error: {response.status_code} - {err}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
for line in response.iter_lines():
|
|
155
|
+
if not line:
|
|
156
|
+
continue
|
|
157
|
+
if line.startswith("data:"):
|
|
158
|
+
data_str = line[len("data:") :].strip()
|
|
159
|
+
if data_str == "[DONE]":
|
|
160
|
+
break
|
|
161
|
+
try:
|
|
162
|
+
chunk = json.loads(data_str)
|
|
163
|
+
if chunk.get("choices"):
|
|
164
|
+
delta = chunk["choices"][0].get("delta", {})
|
|
165
|
+
word = delta.get("content", "")
|
|
166
|
+
if word:
|
|
167
|
+
if streaming_callback:
|
|
168
|
+
if not streaming_callback(
|
|
169
|
+
word, MSG_TYPE.MSG_TYPE_CHUNK
|
|
170
|
+
):
|
|
171
|
+
break
|
|
172
|
+
output += word
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
continue
|
|
175
|
+
else:
|
|
176
|
+
response = self.client.post("/chat/completions", json=params)
|
|
177
|
+
if response.status_code != 200:
|
|
178
|
+
raise Exception(
|
|
179
|
+
f"API Error: {response.status_code} - {response.text}"
|
|
180
|
+
)
|
|
181
|
+
data = response.json()
|
|
182
|
+
output = data["choices"][0]["message"]["content"]
|
|
183
|
+
if streaming_callback:
|
|
184
|
+
streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
115
185
|
|
|
116
|
-
def _get_model_metadata_from_server(self) -> Optional[Dict]:
|
|
117
|
-
if not self.openllm_client:
|
|
118
|
-
return None
|
|
119
|
-
try:
|
|
120
|
-
# metadata() returns a GenerationOutput object which contains model_name, backend etc.
|
|
121
|
-
meta_output = self.openllm_client.metadata()
|
|
122
|
-
# The actual LLMConfig and model details are in meta_output.configuration (a string JSON)
|
|
123
|
-
# and meta_output.model_name, meta_output.backend etc.
|
|
124
|
-
# For simplicity, let's try to parse configuration or use model_name
|
|
125
|
-
config_dict = {}
|
|
126
|
-
if meta_output.configuration:
|
|
127
|
-
try:
|
|
128
|
-
config_dict = json.loads(meta_output.configuration)
|
|
129
|
-
except json.JSONDecodeError:
|
|
130
|
-
ASCIIColors.warning("Failed to parse model configuration from OpenLLM metadata.")
|
|
131
|
-
|
|
132
|
-
return {
|
|
133
|
-
"model_id": config_dict.get("model_id", meta_output.model_name), # model_id from config is better
|
|
134
|
-
"model_name": meta_output.model_name, # As reported by client.metadata()
|
|
135
|
-
"backend": meta_output.backend,
|
|
136
|
-
"timeout": meta_output.timeout,
|
|
137
|
-
"configuration": config_dict
|
|
138
|
-
}
|
|
139
186
|
except Exception as e:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
"top_k": top_k,
|
|
172
|
-
"top_p": top_p,
|
|
173
|
-
"repetition_penalty": repeat_penalty,
|
|
174
|
-
}
|
|
175
|
-
if n_predict is not None: config_params['max_new_tokens'] = n_predict
|
|
176
|
-
if seed is not None: config_params['seed'] = seed # seed might not be supported by all backends/models
|
|
177
|
-
|
|
178
|
-
llm_config = openllm.LLMConfig(**config_params).model_dump(flatten=True, omit_default=True)
|
|
179
|
-
|
|
180
|
-
# Prepend system prompt if provided
|
|
181
|
-
full_prompt = prompt
|
|
182
|
-
if system_prompt and system_prompt.strip():
|
|
183
|
-
full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Common instruct format
|
|
184
|
-
|
|
185
|
-
# Handle images: This is highly model-dependent for OpenLLM.
|
|
186
|
-
# For LLaVA-like models, images are base64 encoded and put in the prompt.
|
|
187
|
-
# This is a simplified approach. A robust solution needs model-specific prompt templating.
|
|
187
|
+
trace_exception(e)
|
|
188
|
+
err_msg = f"An error occurred with the OpenLLM API: {e}"
|
|
189
|
+
if streaming_callback:
|
|
190
|
+
streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
191
|
+
return {"status": "error", "message": err_msg}
|
|
192
|
+
|
|
193
|
+
return output
|
|
194
|
+
|
|
195
|
+
# --------------------------------------------------------------------- #
|
|
196
|
+
# Public API required by LollmsLLMBinding
|
|
197
|
+
# --------------------------------------------------------------------- #
|
|
198
|
+
def generate_text(
|
|
199
|
+
self,
|
|
200
|
+
prompt: str,
|
|
201
|
+
images: Optional[List[str]] = None,
|
|
202
|
+
system_prompt: str = "",
|
|
203
|
+
n_predict: Optional[int] = None,
|
|
204
|
+
stream: Optional[bool] = None,
|
|
205
|
+
temperature: float = 0.7,
|
|
206
|
+
top_k: int = 40,
|
|
207
|
+
top_p: float = 0.9,
|
|
208
|
+
repeat_penalty: float = 1.1,
|
|
209
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
210
|
+
**kwargs,
|
|
211
|
+
) -> Union[str, dict]:
|
|
212
|
+
"""Generate text (or multimodal output) via OpenLLM."""
|
|
213
|
+
messages = []
|
|
214
|
+
if system_prompt:
|
|
215
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
216
|
+
|
|
217
|
+
user_content = [{"type": "text", "text": prompt}]
|
|
188
218
|
if images:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
219
|
+
for img in images:
|
|
220
|
+
user_content.append(normalize_image_input(img))
|
|
221
|
+
|
|
222
|
+
messages.append({"role": "user", "content": user_content})
|
|
223
|
+
|
|
224
|
+
params = self._build_request_params(
|
|
225
|
+
messages=messages,
|
|
226
|
+
n_predict=n_predict,
|
|
227
|
+
stream=stream,
|
|
228
|
+
temperature=temperature,
|
|
229
|
+
top_k=top_k,
|
|
230
|
+
top_p=top_p,
|
|
231
|
+
repeat_penalty=repeat_penalty,
|
|
232
|
+
**kwargs,
|
|
233
|
+
)
|
|
234
|
+
return self._process_request(params, stream, streaming_callback)
|
|
235
|
+
|
|
236
|
+
def generate_from_messages(
|
|
237
|
+
self,
|
|
238
|
+
messages: List[Dict],
|
|
239
|
+
n_predict: Optional[int] = None,
|
|
240
|
+
stream: Optional[bool] = None,
|
|
241
|
+
temperature: Optional[float] = None,
|
|
242
|
+
top_k: Optional[int] = None,
|
|
243
|
+
top_p: Optional[float] = None,
|
|
244
|
+
repeat_penalty: Optional[float] = None,
|
|
245
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
246
|
+
**kwargs,
|
|
247
|
+
) -> Union[str, dict]:
|
|
248
|
+
"""Generate from a pre‑formatted list of OpenAI‑compatible messages."""
|
|
249
|
+
params = self._build_request_params(
|
|
250
|
+
messages=messages,
|
|
251
|
+
n_predict=n_predict,
|
|
252
|
+
stream=stream,
|
|
253
|
+
temperature=temperature,
|
|
254
|
+
top_k=top_k,
|
|
255
|
+
top_p=top_p,
|
|
256
|
+
repeat_penalty=repeat_penalty,
|
|
257
|
+
**kwargs,
|
|
258
|
+
)
|
|
259
|
+
return self._process_request(params, stream, streaming_callback)
|
|
260
|
+
|
|
261
|
+
def chat(
|
|
262
|
+
self,
|
|
263
|
+
discussion: LollmsDiscussion,
|
|
264
|
+
branch_tip_id: Optional[str] = None,
|
|
265
|
+
n_predict: Optional[int] = None,
|
|
266
|
+
stream: Optional[bool] = None,
|
|
267
|
+
temperature: float = 0.7,
|
|
268
|
+
top_k: int = 40,
|
|
269
|
+
top_p: float = 0.9,
|
|
270
|
+
repeat_penalty: float = 1.1,
|
|
271
|
+
repeat_last_n: int = 64,
|
|
272
|
+
seed: Optional[int] = None,
|
|
273
|
+
n_threads: Optional[int] = None,
|
|
274
|
+
ctx_size: Optional[int] = None,
|
|
275
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
276
|
+
think: Optional[bool] = False,
|
|
277
|
+
reasoning_effort: Optional[bool] = "low",
|
|
278
|
+
reasoning_summary: Optional[bool] = "auto",
|
|
279
|
+
**kwargs,
|
|
280
|
+
) -> Union[str, dict]:
|
|
281
|
+
"""
|
|
282
|
+
Conduct a chat session using a :class:`LollmsDiscussion` object.
|
|
283
|
+
The discussion is exported in an OpenAI‑compatible format and then
|
|
284
|
+
passed to :meth:`_process_request`.
|
|
285
|
+
"""
|
|
286
|
+
messages = discussion.export("openai_chat", branch_tip_id)
|
|
287
|
+
|
|
288
|
+
params = self._build_request_params(
|
|
289
|
+
messages=messages,
|
|
290
|
+
n_predict=n_predict,
|
|
291
|
+
stream=stream,
|
|
292
|
+
temperature=temperature,
|
|
293
|
+
top_k=top_k,
|
|
294
|
+
top_p=top_p,
|
|
295
|
+
repeat_penalty=repeat_penalty,
|
|
296
|
+
**kwargs,
|
|
297
|
+
)
|
|
298
|
+
return self._process_request(params, stream, streaming_callback)
|
|
299
|
+
|
|
300
|
+
def list_models(self) -> List[Dict]:
|
|
301
|
+
"""Return a list of models known to the OpenLLM server."""
|
|
302
|
+
models_info = []
|
|
209
303
|
try:
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
304
|
+
response = self.client.get("/models")
|
|
305
|
+
|
|
306
|
+
if response.status_code != 200:
|
|
307
|
+
ASCIIColors.error(
|
|
308
|
+
f"OpenLLM /v1/models returned status {response.status_code}. "
|
|
309
|
+
f"Response body: {response.text}"
|
|
215
310
|
)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
311
|
+
try:
|
|
312
|
+
response.raise_for_status()
|
|
313
|
+
except Exception as e:
|
|
314
|
+
trace_exception(e)
|
|
315
|
+
return models_info # Empty list due to error
|
|
316
|
+
|
|
317
|
+
models_data = response.json().get("data", [])
|
|
318
|
+
for model in models_data:
|
|
319
|
+
models_info.append(
|
|
320
|
+
{
|
|
321
|
+
"model_name": model.get("id", "N/A"),
|
|
322
|
+
"owned_by": model.get("owned_by", "N/A"),
|
|
323
|
+
"created": model.get("created", "N/A"),
|
|
324
|
+
"context_length": "unknown", # Not a standard field in OpenAI spec
|
|
325
|
+
}
|
|
230
326
|
)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
if hasattr(e, '__cause__') and isinstance(e.__cause__, requests.exceptions.HTTPError):
|
|
242
|
-
error_message += f" - HTTP Status: {e.__cause__.response.status_code}, Response: {e.__cause__.response.text}"
|
|
243
|
-
elif hasattr(e, 'response') and hasattr(e.response, 'status_code'): # For httpx.HTTPStatusError
|
|
244
|
-
error_message += f" - HTTP Status: {e.response.status_code}, Response: {e.response.text}"
|
|
245
|
-
|
|
246
|
-
return {"status": False, "error": error_message}
|
|
247
|
-
except Exception as ex:
|
|
248
|
-
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
249
|
-
trace_exception(ex)
|
|
250
|
-
return {"status": False, "error": error_message}
|
|
251
|
-
|
|
252
|
-
def tokenize(self, text: str) -> list:
|
|
253
|
-
"""Tokenize text using tiktoken as a fallback."""
|
|
254
|
-
# OpenLLM client doesn't provide a direct tokenization API.
|
|
255
|
-
# For accurate tokenization, it would depend on the specific model served.
|
|
256
|
-
# Using tiktoken as a general approximation.
|
|
257
|
-
try:
|
|
258
|
-
# Try to use a tokenizer related to the model if known, else default
|
|
259
|
-
if "llama" in self.model_name.lower(): # Crude check
|
|
260
|
-
enc = tiktoken.encoding_for_model("text-davinci-003") # Llama tokenizers are different but this is a proxy
|
|
261
|
-
elif "gpt" in self.model_name.lower(): # e.g. gpt2 served by OpenLLM
|
|
262
|
-
enc = tiktoken.get_encoding("gpt2")
|
|
263
|
-
else:
|
|
264
|
-
enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo") # Fallback
|
|
265
|
-
return enc.encode(text)
|
|
266
|
-
except Exception:
|
|
267
|
-
# Further fallback
|
|
268
|
-
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
|
|
269
|
-
|
|
270
|
-
def detokenize(self, tokens: list) -> str:
|
|
271
|
-
"""Detokenize tokens using tiktoken as a fallback."""
|
|
327
|
+
except Exception as e:
|
|
328
|
+
ASCIIColors.error(
|
|
329
|
+
f"Failed to list models from OpenLLM: {e.__class__.__name__}: {e}"
|
|
330
|
+
)
|
|
331
|
+
trace_exception(e)
|
|
332
|
+
return models_info
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _get_encoding(self, model_name: str | None = None):
|
|
336
|
+
"""Fallback to tiktoken for generic tokenisation."""
|
|
272
337
|
try:
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
return enc.decode(tokens)
|
|
280
|
-
except Exception:
|
|
281
|
-
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
282
|
-
|
|
283
|
-
def count_tokens(self, text: str) -> int:
|
|
284
|
-
"""Count tokens using the OpenLLM server if possible, else tiktoken."""
|
|
285
|
-
if not self.openllm_client:
|
|
286
|
-
ASCIIColors.warning("OpenLLM client not initialized. Using tiktoken for count_tokens.")
|
|
287
|
-
return len(self.tokenize(text)) # Fallback to tiktoken via self.tokenize
|
|
288
|
-
|
|
289
|
-
# Try the API call method for better accuracy for the specific model
|
|
290
|
-
# return count_tokens_openllm(text, self.openllm_client, self.timeout)
|
|
291
|
-
# The API call above can be slow. For faster, but less model-specific count:
|
|
292
|
-
return len(self.tokenize(text))
|
|
338
|
+
return tiktoken.encoding_for_model(model_name or self.model_name)
|
|
339
|
+
except KeyError:
|
|
340
|
+
return tiktoken.get_encoding("cl100k_base")
|
|
341
|
+
|
|
342
|
+
def tokenize(self, text: str) -> list[int]:
|
|
343
|
+
return self._get_encoding().encode(text)
|
|
293
344
|
|
|
345
|
+
def detokenize(self, tokens: list[int]) -> str:
|
|
346
|
+
return self._get_encoding().decode(tokens)
|
|
294
347
|
|
|
295
|
-
def
|
|
296
|
-
|
|
297
|
-
if not self.openllm_client:
|
|
298
|
-
raise Exception("OpenLLM client not initialized.")
|
|
348
|
+
def count_tokens(self, text: str) -> int:
|
|
349
|
+
return len(self.tokenize(text))
|
|
299
350
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
351
|
+
def embed(self, text: str | List[str], **kwargs) -> List:
|
|
352
|
+
"""
|
|
353
|
+
Obtain embeddings via the OpenLLM ``/embeddings`` endpoint.
|
|
354
|
+
If a single string is supplied, a single embedding vector is returned;
|
|
355
|
+
otherwise a list of vectors is returned.
|
|
356
|
+
"""
|
|
357
|
+
embedding_model = kwargs.get("model", self.model_name)
|
|
358
|
+
single_input = isinstance(text, str)
|
|
359
|
+
inputs = [text] if single_input else list(text)
|
|
305
360
|
|
|
306
361
|
try:
|
|
307
|
-
|
|
308
|
-
response = self.
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
)
|
|
313
|
-
|
|
314
|
-
if
|
|
315
|
-
return
|
|
362
|
+
payload = {"model": embedding_model, "input": inputs}
|
|
363
|
+
response = self.client.post("/embeddings", json=payload)
|
|
364
|
+
response.raise_for_status()
|
|
365
|
+
data = response.json()
|
|
366
|
+
|
|
367
|
+
embeddings = [item["embedding"] for item in data.get("data", [])]
|
|
368
|
+
|
|
369
|
+
if single_input and embeddings:
|
|
370
|
+
return embeddings[0]
|
|
316
371
|
else:
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
ASCIIColors.error(
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
trace_exception(
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
def get_model_info(self) -> dict:
|
|
327
|
-
"""Return information about the current OpenLLM model setup."""
|
|
328
|
-
server_metadata = self._get_model_metadata_from_server()
|
|
329
|
-
model_id_from_server = "unknown"
|
|
330
|
-
if server_metadata and 'model_id' in server_metadata:
|
|
331
|
-
model_id_from_server = server_metadata['model_id']
|
|
332
|
-
|
|
333
|
-
# Try to determine vision support based on model name (very basic)
|
|
334
|
-
supports_vision = False
|
|
335
|
-
if self.model_name and any(vm_name in self.model_name.lower() for vm_name in ["llava", "bakllava", "vision"]):
|
|
336
|
-
supports_vision = True
|
|
372
|
+
return embeddings
|
|
373
|
+
|
|
374
|
+
except Exception as e:
|
|
375
|
+
ASCIIColors.error(
|
|
376
|
+
f"Failed to generate embeddings using model '{embedding_model}': {e}"
|
|
377
|
+
)
|
|
378
|
+
trace_exception(e)
|
|
379
|
+
return []
|
|
337
380
|
|
|
381
|
+
def get_model_info(self) -> dict:
|
|
382
|
+
"""Return basic information about the current binding configuration."""
|
|
338
383
|
return {
|
|
339
384
|
"name": self.binding_name,
|
|
340
|
-
"version":
|
|
385
|
+
"version": pm.get_installed_version("openllm")
|
|
386
|
+
if pm.is_installed("openllm")
|
|
387
|
+
else "unknown",
|
|
341
388
|
"host_address": self.host_address,
|
|
342
|
-
"model_name": self.model_name
|
|
343
|
-
"supports_structured_output": False,
|
|
344
|
-
"supports_vision":
|
|
389
|
+
"model_name": self.model_name,
|
|
390
|
+
"supports_structured_output": False,
|
|
391
|
+
"supports_vision": True, # Assuming vision support based on original code
|
|
345
392
|
}
|
|
346
393
|
|
|
347
|
-
def listModels(self) -> List[Dict[str, str]]:
|
|
348
|
-
"""
|
|
349
|
-
Lists the model currently served by the connected OpenLLM instance.
|
|
350
|
-
OpenLLM client connects to one model server at a time.
|
|
351
|
-
"""
|
|
352
|
-
if not self.openllm_client:
|
|
353
|
-
ASCIIColors.error("OpenLLM client not initialized. Cannot list models.")
|
|
354
|
-
return []
|
|
355
|
-
|
|
356
|
-
metadata = self._get_model_metadata_from_server()
|
|
357
|
-
if metadata:
|
|
358
|
-
return [{
|
|
359
|
-
'model_name': metadata.get('model_id', metadata.get('model_name', 'Unknown Model')), # Prefer model_id
|
|
360
|
-
'owned_by': metadata.get('backend', 'OpenLLM'), # Using backend as a proxy for owner/type
|
|
361
|
-
# OpenLLM metadata doesn't typically include a creation/modification date for the model files themselves.
|
|
362
|
-
'created_datetime': None
|
|
363
|
-
}]
|
|
364
|
-
return []
|
|
365
|
-
|
|
366
394
|
def load_model(self, model_name: str) -> bool:
|
|
367
|
-
"""
|
|
368
|
-
For OpenLLM, this primarily sets the model_name for reference, as the
|
|
369
|
-
model is already loaded by the server the client connects to.
|
|
370
|
-
Optionally, it could re-initialize the client if host_address also changes,
|
|
371
|
-
or verify the existing connection serves this model.
|
|
372
|
-
Args:
|
|
373
|
-
model_name (str): Name of the model (e.g., 'mistralai/Mistral-7B-Instruct-v0.1').
|
|
374
|
-
This should match what the server at self.host_address is running.
|
|
375
|
-
Returns:
|
|
376
|
-
bool: True if model name is set and connection seems okay.
|
|
377
|
-
"""
|
|
395
|
+
"""Select a model for subsequent calls."""
|
|
378
396
|
self.model_name = model_name
|
|
379
|
-
ASCIIColors.info(f"OpenLLM
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
# Optionally, verify the connected server's model matches
|
|
383
|
-
server_meta = self._get_model_metadata_from_server()
|
|
384
|
-
if server_meta:
|
|
385
|
-
current_server_model_id = server_meta.get('model_id', server_meta.get('model_name'))
|
|
386
|
-
if current_server_model_id and model_name not in current_server_model_id : # Check if model_name is substring of actual ID
|
|
387
|
-
ASCIIColors.warning(f"Warning: Requested model '{model_name}' may not match model '{current_server_model_id}' served at {self.host_address}.")
|
|
388
|
-
else:
|
|
389
|
-
ASCIIColors.green(f"Connected OpenLLM server model appears to be '{current_server_model_id}'.")
|
|
390
|
-
|
|
391
|
-
return self._verify_connection()
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
if __name__ == '__main__':
|
|
395
|
-
global full_streamed_text
|
|
396
|
-
ASCIIColors.yellow("Testing OpenLLMBinding...")
|
|
397
|
-
|
|
398
|
-
# --- Configuration ---
|
|
399
|
-
# Ensure an OpenLLM server is running. Example:
|
|
400
|
-
# `openllm start mistralai/Mistral-7B-Instruct-v0.1`
|
|
401
|
-
# or for embeddings: `openllm start baai/bge-small-en-v1.5`
|
|
402
|
-
# or for vision (if you have a LLaVA model compatible with OpenLLM):
|
|
403
|
-
# `openllm start llava-hf/llava-1.5-7b-hf` (You might need to convert/setup some vision models for OpenLLM)
|
|
404
|
-
|
|
405
|
-
openllm_host = "http://localhost:3000"
|
|
406
|
-
# This should match the model_id you started OpenLLM with
|
|
407
|
-
test_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Example, change if your server runs a different model
|
|
408
|
-
# test_model_name = "facebook/opt-125m" # A smaller model for quicker tests if available
|
|
409
|
-
|
|
410
|
-
# For embedding test, you'd point to an OpenLLM server running an embedding model
|
|
411
|
-
# openllm_embedding_host = "http://localhost:3001" # If running embedding model on different port
|
|
412
|
-
# test_embedding_model_name = "baai/bge-small-en-v1.5"
|
|
413
|
-
|
|
414
|
-
# For vision, if you have a LLaVA model running with OpenLLM
|
|
415
|
-
# openllm_vision_host = "http://localhost:3002"
|
|
416
|
-
# test_vision_model_name = "llava-hf/llava-1.5-7b-hf" # Example
|
|
417
|
-
|
|
418
|
-
try:
|
|
419
|
-
ASCIIColors.cyan("\n--- Initializing Binding for Text Generation ---")
|
|
420
|
-
# Initialize with the host where your text generation model is running
|
|
421
|
-
binding = OpenLLMBinding(host_address=openllm_host, model_name=test_model_name)
|
|
422
|
-
ASCIIColors.green(f"Binding initialized successfully. Connected to model: {binding.model_name}")
|
|
423
|
-
ASCIIColors.info(f"Using OpenLLM client version: {openllm.__version__ if openllm else 'N/A'}")
|
|
424
|
-
|
|
425
|
-
ASCIIColors.cyan("\n--- Listing Model (should be the one connected) ---")
|
|
426
|
-
models = binding.listModels()
|
|
427
|
-
if models:
|
|
428
|
-
ASCIIColors.green(f"Connected model info:")
|
|
429
|
-
for m in models:
|
|
430
|
-
print(m)
|
|
431
|
-
else:
|
|
432
|
-
ASCIIColors.warning("Failed to list model from server. Ensure OpenLLM server is running.")
|
|
433
|
-
|
|
434
|
-
ASCIIColors.cyan(f"\n--- Setting model to (for info): {test_model_name} ---")
|
|
435
|
-
binding.load_model(test_model_name) # This confirms the model name and checks connection
|
|
436
|
-
|
|
437
|
-
ASCIIColors.cyan("\n--- Counting Tokens (using tiktoken fallback or API) ---")
|
|
438
|
-
sample_text = "Hello, OpenLLM world! This is a test."
|
|
439
|
-
token_count = binding.count_tokens(sample_text)
|
|
440
|
-
ASCIIColors.green(f"Token count for '{sample_text}': {token_count} (may use tiktoken approximation)")
|
|
441
|
-
|
|
442
|
-
ASCIIColors.cyan("\n--- Tokenize/Detokenize (using tiktoken fallback) ---")
|
|
443
|
-
tokens = binding.tokenize(sample_text)
|
|
444
|
-
ASCIIColors.green(f"Tokens (tiktoken): {tokens[:10]}...")
|
|
445
|
-
detokenized_text = binding.detokenize(tokens)
|
|
446
|
-
ASCIIColors.green(f"Detokenized text (tiktoken): {detokenized_text}")
|
|
447
|
-
|
|
448
|
-
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
449
|
-
prompt_text = "Why is the sky blue?"
|
|
450
|
-
system_prompt_text = "You are a helpful AI assistant providing concise answers."
|
|
451
|
-
ASCIIColors.info(f"System Prompt: {system_prompt_text}")
|
|
452
|
-
ASCIIColors.info(f"User Prompt: {prompt_text}")
|
|
453
|
-
generated_text = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=50, stream=False)
|
|
454
|
-
if isinstance(generated_text, str):
|
|
455
|
-
ASCIIColors.green(f"Generated text: {generated_text}")
|
|
456
|
-
else:
|
|
457
|
-
ASCIIColors.error(f"Generation failed: {generated_text}")
|
|
458
|
-
|
|
459
|
-
ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
|
|
460
|
-
full_streamed_text = ""
|
|
461
|
-
def stream_callback(chunk: str, msg_type: int):
|
|
462
|
-
global full_streamed_text
|
|
463
|
-
print(f"{ASCIIColors.GREEN}{chunk}{ASCIIColors.RESET}", end="", flush=True)
|
|
464
|
-
full_streamed_text += chunk
|
|
465
|
-
return True
|
|
466
|
-
|
|
467
|
-
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
468
|
-
result = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
|
|
469
|
-
print("\n--- End of Stream ---")
|
|
470
|
-
if isinstance(result, str):
|
|
471
|
-
ASCIIColors.green(f"Full streamed text: {result}")
|
|
472
|
-
else:
|
|
473
|
-
ASCIIColors.error(f"Streaming generation failed: {result}")
|
|
474
|
-
|
|
475
|
-
# --- Embeddings Test ---
|
|
476
|
-
# You need to run an OpenLLM server with an embedding model for this.
|
|
477
|
-
# Example: `openllm start baai/bge-small-en-v1.5 --port 3001`
|
|
478
|
-
# Then change openllm_host to "http://localhost:3001" for this section.
|
|
479
|
-
ASCIIColors.cyan("\n--- Embeddings Test ---")
|
|
480
|
-
ASCIIColors.magenta("INFO: This test requires an OpenLLM server running an EMBEDDING model (e.g., bge, E5).")
|
|
481
|
-
ASCIIColors.magenta(f" If your server at {openllm_host} is a text generation model, this might fail.")
|
|
482
|
-
embedding_text = "Lollms is a cool project using OpenLLM."
|
|
483
|
-
try:
|
|
484
|
-
# If your main binding is for text-gen, you might need a separate binding instance
|
|
485
|
-
# for an embedding model if it's on a different host/port.
|
|
486
|
-
# For this example, we'll try with the current binding.
|
|
487
|
-
# If it fails, it means the model at openllm_host doesn't support /v1/embeddings
|
|
488
|
-
embedding_vector = binding.embed(embedding_text)
|
|
489
|
-
ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
|
|
490
|
-
ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
|
|
491
|
-
except Exception as e:
|
|
492
|
-
ASCIIColors.warning(f"Could not get embedding with model '{binding.model_name}' at '{binding.host_address}': {e}")
|
|
493
|
-
ASCIIColors.warning("Ensure the OpenLLM server is running an embedding-capable model and supports the /v1/embeddings endpoint.")
|
|
397
|
+
ASCIIColors.info(f"OpenLLM model set to: {model_name}")
|
|
398
|
+
return True
|
|
494
399
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
ASCIIColors.magenta(f" And the model needs to accept images as base64 in prompt. This is a basic test.")
|
|
499
|
-
|
|
500
|
-
dummy_image_path = "dummy_test_image_openllm.png"
|
|
501
|
-
try:
|
|
502
|
-
from PIL import Image, ImageDraw
|
|
503
|
-
img = Image.new('RGB', (200, 50), color = ('blue'))
|
|
504
|
-
d = ImageDraw.Draw(img)
|
|
505
|
-
d.text((10,10), "OpenLLM Test", fill=('white'))
|
|
506
|
-
img.save(dummy_image_path)
|
|
507
|
-
ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
|
|
508
|
-
|
|
509
|
-
# Assuming your 'binding' is connected to a vision model server.
|
|
510
|
-
# If not, you'd initialize a new binding pointing to your vision model server.
|
|
511
|
-
# e.g., vision_binding = OpenLLMBinding(host_address=openllm_vision_host, model_name=test_vision_model_name)
|
|
512
|
-
|
|
513
|
-
# Check if current model_name hints at vision
|
|
514
|
-
if "llava" not in binding.model_name.lower() and "vision" not in binding.model_name.lower() :
|
|
515
|
-
ASCIIColors.warning(f"Current model '{binding.model_name}' might not be a vision model. Vision test may not be meaningful.")
|
|
400
|
+
def ps(self):
|
|
401
|
+
"""Placeholder – OpenLLM does not expose a process‑list endpoint."""
|
|
402
|
+
return []
|
|
516
403
|
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
vision_response = binding.generate_text(
|
|
521
|
-
prompt=vision_prompt,
|
|
522
|
-
images=[dummy_image_path], # The binding will attempt to base64 encode this
|
|
523
|
-
n_predict=50,
|
|
524
|
-
stream=False
|
|
525
|
-
)
|
|
526
|
-
if isinstance(vision_response, str):
|
|
527
|
-
ASCIIColors.green(f"Vision model response: {vision_response}")
|
|
528
|
-
else:
|
|
529
|
-
ASCIIColors.error(f"Vision generation failed: {vision_response}")
|
|
530
|
-
except ImportError:
|
|
531
|
-
ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
|
|
532
|
-
except Exception as e:
|
|
533
|
-
ASCIIColors.error(f"Error during vision test: {e}")
|
|
534
|
-
trace_exception(e)
|
|
535
|
-
finally:
|
|
536
|
-
import os
|
|
537
|
-
if os.path.exists(dummy_image_path):
|
|
538
|
-
os.remove(dummy_image_path)
|
|
539
|
-
|
|
540
|
-
except ConnectionRefusedError:
|
|
541
|
-
ASCIIColors.error(f"Connection to OpenLLM server at {openllm_host} refused. Is OpenLLM server running?")
|
|
542
|
-
ASCIIColors.error("Example: `openllm start mistralai/Mistral-7B-Instruct-v0.1`")
|
|
543
|
-
except openllm.exceptions.OpenLLMException as e:
|
|
544
|
-
ASCIIColors.error(f"OpenLLM specific error: {e}")
|
|
545
|
-
trace_exception(e)
|
|
546
|
-
except Exception as e:
|
|
547
|
-
ASCIIColors.error(f"An error occurred during testing: {e}")
|
|
548
|
-
trace_exception(e)
|
|
549
|
-
|
|
550
|
-
ASCIIColors.yellow("\nOpenLLMBinding test finished.")
|
|
404
|
+
|
|
405
|
+
# Ensure the class is treated as concrete (no remaining abstract methods)
|
|
406
|
+
OpenLLMBinding.__abstractmethods__ = set()
|