lollms-client 0.14.1__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/simple_text_gen_with_image_test.py +21 -9
- examples/text_gen.py +3 -1
- examples/text_gen_system_prompt.py +2 -1
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +1041 -0
- lollms_client/llm_bindings/ollama/__init__.py +3 -3
- lollms_client/llm_bindings/openllm/__init__.py +547 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +591 -0
- lollms_client/llm_bindings/transformers/__init__.py +660 -251
- lollms_client/lollms_core.py +5 -3
- lollms_client/lollms_llm_binding.py +1 -5
- {lollms_client-0.14.1.dist-info → lollms_client-0.15.1.dist-info}/METADATA +1 -1
- {lollms_client-0.14.1.dist-info → lollms_client-0.15.1.dist-info}/RECORD +16 -13
- {lollms_client-0.14.1.dist-info → lollms_client-0.15.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.14.1.dist-info → lollms_client-0.15.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.14.1.dist-info → lollms_client-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -68,8 +68,8 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
68
68
|
host_address: str = None,
|
|
69
69
|
model_name: str = "",
|
|
70
70
|
service_key: str = None,
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
verify_ssl_certificate: bool = True,
|
|
72
|
+
**kwargs
|
|
73
73
|
):
|
|
74
74
|
"""
|
|
75
75
|
Initialize the Ollama binding.
|
|
@@ -89,7 +89,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
89
89
|
self.model_name=model_name
|
|
90
90
|
self.service_key=service_key
|
|
91
91
|
self.verify_ssl_certificate=verify_ssl_certificate
|
|
92
|
-
self.default_completion_format=default_completion_format
|
|
92
|
+
self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
|
|
93
93
|
|
|
94
94
|
if ollama is None:
|
|
95
95
|
raise ImportError("Ollama library is not installed. Please run 'pip install ollama'.")
|
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
# bindings/openllm/binding.py
|
|
2
|
+
import requests # May not be strictly needed if openllm client handles all
|
|
3
|
+
import json
|
|
4
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
5
|
+
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
6
|
+
from lollms_client.lollms_utilities import encode_image # Keep for potential image handling
|
|
7
|
+
from typing import Optional, Callable, List, Union, Dict
|
|
8
|
+
|
|
9
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
10
|
+
import pipmaster as pm
|
|
11
|
+
|
|
12
|
+
# Ensure openllm, pillow (for dummy image), and tiktoken are installed
|
|
13
|
+
pm.ensure_packages(["openllm", "pillow", "tiktoken"])
|
|
14
|
+
|
|
15
|
+
import openllm
|
|
16
|
+
import tiktoken # For fallback tokenization
|
|
17
|
+
|
|
18
|
+
BindingName = "OpenLLMBinding"
|
|
19
|
+
|
|
20
|
+
# Helper function to count tokens by making a minimal API call
|
|
21
|
+
# This is more accurate for the specific model than a generic tokenizer
|
|
22
|
+
def count_tokens_openllm(
|
|
23
|
+
text_to_tokenize: str,
|
|
24
|
+
openllm_client: openllm.client.HTTPClient,
|
|
25
|
+
timeout: int = 60,
|
|
26
|
+
) -> int:
|
|
27
|
+
"""
|
|
28
|
+
Counts the number of tokens in a given text for the connected OpenLLM model
|
|
29
|
+
by making a minimal request to the /v1/generate endpoint and extracting
|
|
30
|
+
the length of 'prompt_token_ids' from the response.
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
# Make a generation request asking for 0 or 1 new token
|
|
34
|
+
# Some models might require at least 1 max_new_tokens
|
|
35
|
+
llm_config = openllm.LLMConfig(max_new_tokens=1).model_dump(flatten=True, omit_default=True)
|
|
36
|
+
response = openllm_client.generate(prompt=text_to_tokenize, llm_config=llm_config, timeout=timeout)
|
|
37
|
+
|
|
38
|
+
if response.prompt_token_ids is not None and len(response.prompt_token_ids) > 0:
|
|
39
|
+
# The prompt_token_ids from OpenLLM often include special tokens (e.g., BOS)
|
|
40
|
+
# depending on the model's tokenizer configuration.
|
|
41
|
+
# For consistency with typical "user text token count", we might need to adjust.
|
|
42
|
+
# However, for now, let's return the raw count from the model.
|
|
43
|
+
# A simple heuristic might be to subtract 1 for a BOS token if always present.
|
|
44
|
+
# This needs model-specific knowledge or further investigation.
|
|
45
|
+
# For llama3 with ollama, it was prompt_eval_count - 5 (system, user, content etc)
|
|
46
|
+
# For OpenLLM, it's harder to generalize the "overhead".
|
|
47
|
+
# Let's assume prompt_token_ids is the count of tokens for the user's text.
|
|
48
|
+
return len(response.prompt_token_ids)
|
|
49
|
+
else:
|
|
50
|
+
# Fallback if prompt_token_ids is not available or empty
|
|
51
|
+
ASCIIColors.warning("prompt_token_ids not found in OpenLLM response, using tiktoken for count_tokens.")
|
|
52
|
+
return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
|
|
53
|
+
except Exception as e:
|
|
54
|
+
ASCIIColors.warning(f"Failed to count tokens via OpenLLM API, using tiktoken fallback: {e}")
|
|
55
|
+
return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class OpenLLMBinding(LollmsLLMBinding):
|
|
59
|
+
"""OpenLLM-specific binding implementation using the openllm-python client."""
|
|
60
|
+
|
|
61
|
+
DEFAULT_HOST_ADDRESS = "http://localhost:3000" # Default OpenLLM server address
|
|
62
|
+
|
|
63
|
+
def __init__(self,
|
|
64
|
+
host_address: str = None,
|
|
65
|
+
model_name: str = "", # Informational, as client connects to specific model server
|
|
66
|
+
# service_key and verify_ssl_certificate are not directly used by openllm.client.HTTPClient constructor
|
|
67
|
+
# but kept for potential future extensions or custom client logic.
|
|
68
|
+
service_key: Optional[str] = None,
|
|
69
|
+
verify_ssl_certificate: bool = True,
|
|
70
|
+
timeout: int = 120, # Timeout for client requests
|
|
71
|
+
**kwargs
|
|
72
|
+
):
|
|
73
|
+
_host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
|
|
74
|
+
super().__init__(
|
|
75
|
+
binding_name=BindingName,
|
|
76
|
+
)
|
|
77
|
+
self.host_address = _host_address
|
|
78
|
+
self.model_name = model_name # Can be set by load_model or from config
|
|
79
|
+
self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
|
|
80
|
+
self.timeout = timeout
|
|
81
|
+
|
|
82
|
+
if openllm is None or openllm.client is None:
|
|
83
|
+
raise ImportError("OpenLLM library is not installed or client module not found. Please run 'pip install openllm'.")
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
self.openllm_client = openllm.client.HTTPClient(
|
|
87
|
+
address=self.host_address,
|
|
88
|
+
timeout=self.timeout
|
|
89
|
+
)
|
|
90
|
+
# Perform a quick health check or metadata fetch to confirm connection
|
|
91
|
+
if not self._verify_connection():
|
|
92
|
+
raise ConnectionError(f"Failed to connect or verify OpenLLM server at {self.host_address}")
|
|
93
|
+
|
|
94
|
+
# Try to fetch model_name if not provided
|
|
95
|
+
if not self.model_name:
|
|
96
|
+
metadata = self._get_model_metadata_from_server()
|
|
97
|
+
if metadata and 'model_id' in metadata:
|
|
98
|
+
self.model_name = metadata['model_id']
|
|
99
|
+
else:
|
|
100
|
+
ASCIIColors.warning("Could not automatically determine model name from OpenLLM server.")
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
ASCIIColors.error(f"Failed to initialize OpenLLM client: {e}")
|
|
104
|
+
self.openllm_client = None
|
|
105
|
+
raise ConnectionError(f"Could not connect or initialize OpenLLM client at {self.host_address}: {e}") from e
|
|
106
|
+
|
|
107
|
+
def _verify_connection(self) -> bool:
|
|
108
|
+
if not self.openllm_client:
|
|
109
|
+
return False
|
|
110
|
+
try:
|
|
111
|
+
return self.openllm_client.health() # health() returns True if healthy, raises error otherwise
|
|
112
|
+
except Exception as e:
|
|
113
|
+
ASCIIColors.warning(f"OpenLLM server health check failed for {self.host_address}: {e}")
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
def _get_model_metadata_from_server(self) -> Optional[Dict]:
|
|
117
|
+
if not self.openllm_client:
|
|
118
|
+
return None
|
|
119
|
+
try:
|
|
120
|
+
# metadata() returns a GenerationOutput object which contains model_name, backend etc.
|
|
121
|
+
meta_output = self.openllm_client.metadata()
|
|
122
|
+
# The actual LLMConfig and model details are in meta_output.configuration (a string JSON)
|
|
123
|
+
# and meta_output.model_name, meta_output.backend etc.
|
|
124
|
+
# For simplicity, let's try to parse configuration or use model_name
|
|
125
|
+
config_dict = {}
|
|
126
|
+
if meta_output.configuration:
|
|
127
|
+
try:
|
|
128
|
+
config_dict = json.loads(meta_output.configuration)
|
|
129
|
+
except json.JSONDecodeError:
|
|
130
|
+
ASCIIColors.warning("Failed to parse model configuration from OpenLLM metadata.")
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
"model_id": config_dict.get("model_id", meta_output.model_name), # model_id from config is better
|
|
134
|
+
"model_name": meta_output.model_name, # As reported by client.metadata()
|
|
135
|
+
"backend": meta_output.backend,
|
|
136
|
+
"timeout": meta_output.timeout,
|
|
137
|
+
"configuration": config_dict
|
|
138
|
+
}
|
|
139
|
+
except Exception as e:
|
|
140
|
+
ASCIIColors.warning(f"Could not fetch metadata from OpenLLM server: {e}")
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
def generate_text(self,
|
|
144
|
+
prompt: str,
|
|
145
|
+
images: Optional[List[str]] = None, # List of image file paths
|
|
146
|
+
system_prompt: str = "",
|
|
147
|
+
n_predict: Optional[int] = None,
|
|
148
|
+
stream: bool = False,
|
|
149
|
+
temperature: float = 0.7,
|
|
150
|
+
top_k: int = 40,
|
|
151
|
+
top_p: float = 0.9,
|
|
152
|
+
repeat_penalty: float = 1.1,
|
|
153
|
+
# repeat_last_n: int = 64, # OpenLLM's LLMConfig doesn't have direct repeat_last_n
|
|
154
|
+
seed: Optional[int] = None,
|
|
155
|
+
# n_threads: Optional[int] = None, # Server-side config for OpenLLM
|
|
156
|
+
# ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
|
|
157
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
158
|
+
) -> Union[str, Dict[str, any]]:
|
|
159
|
+
|
|
160
|
+
if not self.openllm_client:
|
|
161
|
+
return {"status": False, "error": "OpenLLM client not initialized."}
|
|
162
|
+
|
|
163
|
+
# Construct LLMConfig
|
|
164
|
+
# Note: Not all Lollms params map directly to OpenLLM's LLMConfig.
|
|
165
|
+
# We map what's available.
|
|
166
|
+
config_params = {
|
|
167
|
+
"temperature": float(temperature),
|
|
168
|
+
"top_k": top_k,
|
|
169
|
+
"top_p": top_p,
|
|
170
|
+
"repetition_penalty": repeat_penalty,
|
|
171
|
+
}
|
|
172
|
+
if n_predict is not None: config_params['max_new_tokens'] = n_predict
|
|
173
|
+
if seed is not None: config_params['seed'] = seed # seed might not be supported by all backends/models
|
|
174
|
+
|
|
175
|
+
llm_config = openllm.LLMConfig(**config_params).model_dump(flatten=True, omit_default=True)
|
|
176
|
+
|
|
177
|
+
# Prepend system prompt if provided
|
|
178
|
+
full_prompt = prompt
|
|
179
|
+
if system_prompt and system_prompt.strip():
|
|
180
|
+
full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Common instruct format
|
|
181
|
+
|
|
182
|
+
# Handle images: This is highly model-dependent for OpenLLM.
|
|
183
|
+
# For LLaVA-like models, images are base64 encoded and put in the prompt.
|
|
184
|
+
# This is a simplified approach. A robust solution needs model-specific prompt templating.
|
|
185
|
+
if images:
|
|
186
|
+
ASCIIColors.warning("Image support in OpenLLMBinding is basic and assumes a LLaVA-like model "
|
|
187
|
+
"that accepts base64 image data in the prompt.")
|
|
188
|
+
image_parts = []
|
|
189
|
+
for img_path in images:
|
|
190
|
+
try:
|
|
191
|
+
# encode_image from lollms_utilities returns base64 string
|
|
192
|
+
base64_image = encode_image(img_path)
|
|
193
|
+
# Basic assumption: image can be prepended or appended.
|
|
194
|
+
# For LLaVA, it's often "<image>\nUSER: What is this? ASSISTANT:"
|
|
195
|
+
# or the raw base64 data might be directly in the prompt.
|
|
196
|
+
# This is a placeholder for where more complex prompt construction would go.
|
|
197
|
+
# For now, let's just put the base64 string.
|
|
198
|
+
image_parts.append(f"[Image data: {base64_image}]") # Simplistic
|
|
199
|
+
except Exception as e:
|
|
200
|
+
ASCIIColors.error(f"Could not encode image {img_path}: {e}")
|
|
201
|
+
|
|
202
|
+
if image_parts:
|
|
203
|
+
full_prompt = "\n".join(image_parts) + "\n" + full_prompt
|
|
204
|
+
|
|
205
|
+
full_response_text = ""
|
|
206
|
+
try:
|
|
207
|
+
if stream:
|
|
208
|
+
response_stream = self.openllm_client.generate_stream(
|
|
209
|
+
prompt=full_prompt,
|
|
210
|
+
llm_config=llm_config,
|
|
211
|
+
timeout=self.timeout
|
|
212
|
+
)
|
|
213
|
+
for chunk in response_stream:
|
|
214
|
+
# chunk is openllm.GenerationChunk
|
|
215
|
+
chunk_content = chunk.text
|
|
216
|
+
if chunk_content:
|
|
217
|
+
full_response_text += chunk_content
|
|
218
|
+
if streaming_callback:
|
|
219
|
+
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
220
|
+
break # Callback requested stop
|
|
221
|
+
return full_response_text
|
|
222
|
+
else: # Not streaming
|
|
223
|
+
response_output = self.openllm_client.generate(
|
|
224
|
+
prompt=full_prompt,
|
|
225
|
+
llm_config=llm_config,
|
|
226
|
+
timeout=self.timeout
|
|
227
|
+
)
|
|
228
|
+
# response_output is openllm.GenerationOutput
|
|
229
|
+
# It can contain multiple responses if n > 1 (not used here)
|
|
230
|
+
if response_output.responses:
|
|
231
|
+
return response_output.responses[0].text
|
|
232
|
+
else:
|
|
233
|
+
return {"status": False, "error": "OpenLLM returned no response."}
|
|
234
|
+
except openllm.exceptions.OpenLLMException as e:
|
|
235
|
+
error_message = f"OpenLLM API Error: {str(e)}"
|
|
236
|
+
ASCIIColors.error(error_message)
|
|
237
|
+
# Attempt to get more details if it's an HTTPError from httpx
|
|
238
|
+
if hasattr(e, '__cause__') and isinstance(e.__cause__, requests.exceptions.HTTPError):
|
|
239
|
+
error_message += f" - HTTP Status: {e.__cause__.response.status_code}, Response: {e.__cause__.response.text}"
|
|
240
|
+
elif hasattr(e, 'response') and hasattr(e.response, 'status_code'): # For httpx.HTTPStatusError
|
|
241
|
+
error_message += f" - HTTP Status: {e.response.status_code}, Response: {e.response.text}"
|
|
242
|
+
|
|
243
|
+
return {"status": False, "error": error_message}
|
|
244
|
+
except Exception as ex:
|
|
245
|
+
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
246
|
+
trace_exception(ex)
|
|
247
|
+
return {"status": False, "error": error_message}
|
|
248
|
+
|
|
249
|
+
def tokenize(self, text: str) -> list:
|
|
250
|
+
"""Tokenize text using tiktoken as a fallback."""
|
|
251
|
+
# OpenLLM client doesn't provide a direct tokenization API.
|
|
252
|
+
# For accurate tokenization, it would depend on the specific model served.
|
|
253
|
+
# Using tiktoken as a general approximation.
|
|
254
|
+
try:
|
|
255
|
+
# Try to use a tokenizer related to the model if known, else default
|
|
256
|
+
if "llama" in self.model_name.lower(): # Crude check
|
|
257
|
+
enc = tiktoken.encoding_for_model("text-davinci-003") # Llama tokenizers are different but this is a proxy
|
|
258
|
+
elif "gpt" in self.model_name.lower(): # e.g. gpt2 served by OpenLLM
|
|
259
|
+
enc = tiktoken.get_encoding("gpt2")
|
|
260
|
+
else:
|
|
261
|
+
enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo") # Fallback
|
|
262
|
+
return enc.encode(text)
|
|
263
|
+
except Exception:
|
|
264
|
+
# Further fallback
|
|
265
|
+
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
|
|
266
|
+
|
|
267
|
+
def detokenize(self, tokens: list) -> str:
|
|
268
|
+
"""Detokenize tokens using tiktoken as a fallback."""
|
|
269
|
+
try:
|
|
270
|
+
if "llama" in self.model_name.lower():
|
|
271
|
+
enc = tiktoken.encoding_for_model("text-davinci-003")
|
|
272
|
+
elif "gpt" in self.model_name.lower():
|
|
273
|
+
enc = tiktoken.get_encoding("gpt2")
|
|
274
|
+
else:
|
|
275
|
+
enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo")
|
|
276
|
+
return enc.decode(tokens)
|
|
277
|
+
except Exception:
|
|
278
|
+
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
279
|
+
|
|
280
|
+
def count_tokens(self, text: str) -> int:
|
|
281
|
+
"""Count tokens using the OpenLLM server if possible, else tiktoken."""
|
|
282
|
+
if not self.openllm_client:
|
|
283
|
+
ASCIIColors.warning("OpenLLM client not initialized. Using tiktoken for count_tokens.")
|
|
284
|
+
return len(self.tokenize(text)) # Fallback to tiktoken via self.tokenize
|
|
285
|
+
|
|
286
|
+
# Try the API call method for better accuracy for the specific model
|
|
287
|
+
# return count_tokens_openllm(text, self.openllm_client, self.timeout)
|
|
288
|
+
# The API call above can be slow. For faster, but less model-specific count:
|
|
289
|
+
return len(self.tokenize(text))
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def embed(self, text: str, **kwargs) -> List[float]:
|
|
293
|
+
"""Get embeddings for the input text using OpenLLM API."""
|
|
294
|
+
if not self.openllm_client:
|
|
295
|
+
raise Exception("OpenLLM client not initialized.")
|
|
296
|
+
|
|
297
|
+
# model_to_use kwarg is less relevant here as client is tied to one model server.
|
|
298
|
+
# If that server is an embedding model, it will work.
|
|
299
|
+
# llm_config can be passed via kwargs if needed for embeddings.
|
|
300
|
+
llm_config_dict = kwargs.get("llm_config", {})
|
|
301
|
+
llm_config = openllm.LLMConfig(**llm_config_dict).model_dump(flatten=True, omit_default=True) if llm_config_dict else None
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
# openllm_client.embeddings expects a list of prompts
|
|
305
|
+
response = self.openllm_client.embeddings(
|
|
306
|
+
prompts=[text],
|
|
307
|
+
llm_config=llm_config,
|
|
308
|
+
timeout=self.timeout
|
|
309
|
+
)
|
|
310
|
+
# response is a list of embeddings (list of lists of floats)
|
|
311
|
+
if response and len(response) > 0:
|
|
312
|
+
return response[0]
|
|
313
|
+
else:
|
|
314
|
+
raise Exception("OpenLLM returned no embeddings.")
|
|
315
|
+
except openllm.exceptions.OpenLLMException as e:
|
|
316
|
+
error_message = f"OpenLLM API Embeddings Error: {str(e)}"
|
|
317
|
+
ASCIIColors.error(error_message)
|
|
318
|
+
raise Exception(error_message) from e
|
|
319
|
+
except Exception as ex:
|
|
320
|
+
trace_exception(ex)
|
|
321
|
+
raise Exception(f"Embedding failed: {str(ex)}") from ex
|
|
322
|
+
|
|
323
|
+
def get_model_info(self) -> dict:
|
|
324
|
+
"""Return information about the current OpenLLM model setup."""
|
|
325
|
+
server_metadata = self._get_model_metadata_from_server()
|
|
326
|
+
model_id_from_server = "unknown"
|
|
327
|
+
if server_metadata and 'model_id' in server_metadata:
|
|
328
|
+
model_id_from_server = server_metadata['model_id']
|
|
329
|
+
|
|
330
|
+
# Try to determine vision support based on model name (very basic)
|
|
331
|
+
supports_vision = False
|
|
332
|
+
if self.model_name and any(vm_name in self.model_name.lower() for vm_name in ["llava", "bakllava", "vision"]):
|
|
333
|
+
supports_vision = True
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
"name": self.binding_name,
|
|
337
|
+
"version": openllm.__version__ if openllm else "unknown",
|
|
338
|
+
"host_address": self.host_address,
|
|
339
|
+
"model_name": self.model_name or model_id_from_server, # Use self.model_name if set, else from server
|
|
340
|
+
"supports_structured_output": False, # Generic OpenLLM text generation doesn't guarantee this
|
|
341
|
+
"supports_vision": supports_vision # Highly dependent on the specific model served
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
def listModels(self) -> List[Dict[str, str]]:
|
|
345
|
+
"""
|
|
346
|
+
Lists the model currently served by the connected OpenLLM instance.
|
|
347
|
+
OpenLLM client connects to one model server at a time.
|
|
348
|
+
"""
|
|
349
|
+
if not self.openllm_client:
|
|
350
|
+
ASCIIColors.error("OpenLLM client not initialized. Cannot list models.")
|
|
351
|
+
return []
|
|
352
|
+
|
|
353
|
+
metadata = self._get_model_metadata_from_server()
|
|
354
|
+
if metadata:
|
|
355
|
+
return [{
|
|
356
|
+
'model_name': metadata.get('model_id', metadata.get('model_name', 'Unknown Model')), # Prefer model_id
|
|
357
|
+
'owned_by': metadata.get('backend', 'OpenLLM'), # Using backend as a proxy for owner/type
|
|
358
|
+
# OpenLLM metadata doesn't typically include a creation/modification date for the model files themselves.
|
|
359
|
+
'created_datetime': None
|
|
360
|
+
}]
|
|
361
|
+
return []
|
|
362
|
+
|
|
363
|
+
def load_model(self, model_name: str) -> bool:
|
|
364
|
+
"""
|
|
365
|
+
For OpenLLM, this primarily sets the model_name for reference, as the
|
|
366
|
+
model is already loaded by the server the client connects to.
|
|
367
|
+
Optionally, it could re-initialize the client if host_address also changes,
|
|
368
|
+
or verify the existing connection serves this model.
|
|
369
|
+
Args:
|
|
370
|
+
model_name (str): Name of the model (e.g., 'mistralai/Mistral-7B-Instruct-v0.1').
|
|
371
|
+
This should match what the server at self.host_address is running.
|
|
372
|
+
Returns:
|
|
373
|
+
bool: True if model name is set and connection seems okay.
|
|
374
|
+
"""
|
|
375
|
+
self.model_name = model_name
|
|
376
|
+
ASCIIColors.info(f"OpenLLM binding model_name set to: {model_name}.")
|
|
377
|
+
ASCIIColors.info(f"Ensure OpenLLM server at {self.host_address} is running this model.")
|
|
378
|
+
|
|
379
|
+
# Optionally, verify the connected server's model matches
|
|
380
|
+
server_meta = self._get_model_metadata_from_server()
|
|
381
|
+
if server_meta:
|
|
382
|
+
current_server_model_id = server_meta.get('model_id', server_meta.get('model_name'))
|
|
383
|
+
if current_server_model_id and model_name not in current_server_model_id : # Check if model_name is substring of actual ID
|
|
384
|
+
ASCIIColors.warning(f"Warning: Requested model '{model_name}' may not match model '{current_server_model_id}' served at {self.host_address}.")
|
|
385
|
+
else:
|
|
386
|
+
ASCIIColors.green(f"Connected OpenLLM server model appears to be '{current_server_model_id}'.")
|
|
387
|
+
|
|
388
|
+
return self._verify_connection()
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
if __name__ == '__main__':
|
|
392
|
+
global full_streamed_text
|
|
393
|
+
ASCIIColors.yellow("Testing OpenLLMBinding...")
|
|
394
|
+
|
|
395
|
+
# --- Configuration ---
|
|
396
|
+
# Ensure an OpenLLM server is running. Example:
|
|
397
|
+
# `openllm start mistralai/Mistral-7B-Instruct-v0.1`
|
|
398
|
+
# or for embeddings: `openllm start baai/bge-small-en-v1.5`
|
|
399
|
+
# or for vision (if you have a LLaVA model compatible with OpenLLM):
|
|
400
|
+
# `openllm start llava-hf/llava-1.5-7b-hf` (You might need to convert/setup some vision models for OpenLLM)
|
|
401
|
+
|
|
402
|
+
openllm_host = "http://localhost:3000"
|
|
403
|
+
# This should match the model_id you started OpenLLM with
|
|
404
|
+
test_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Example, change if your server runs a different model
|
|
405
|
+
# test_model_name = "facebook/opt-125m" # A smaller model for quicker tests if available
|
|
406
|
+
|
|
407
|
+
# For embedding test, you'd point to an OpenLLM server running an embedding model
|
|
408
|
+
# openllm_embedding_host = "http://localhost:3001" # If running embedding model on different port
|
|
409
|
+
# test_embedding_model_name = "baai/bge-small-en-v1.5"
|
|
410
|
+
|
|
411
|
+
# For vision, if you have a LLaVA model running with OpenLLM
|
|
412
|
+
# openllm_vision_host = "http://localhost:3002"
|
|
413
|
+
# test_vision_model_name = "llava-hf/llava-1.5-7b-hf" # Example
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
ASCIIColors.cyan("\n--- Initializing Binding for Text Generation ---")
|
|
417
|
+
# Initialize with the host where your text generation model is running
|
|
418
|
+
binding = OpenLLMBinding(host_address=openllm_host, model_name=test_model_name)
|
|
419
|
+
ASCIIColors.green(f"Binding initialized successfully. Connected to model: {binding.model_name}")
|
|
420
|
+
ASCIIColors.info(f"Using OpenLLM client version: {openllm.__version__ if openllm else 'N/A'}")
|
|
421
|
+
|
|
422
|
+
ASCIIColors.cyan("\n--- Listing Model (should be the one connected) ---")
|
|
423
|
+
models = binding.listModels()
|
|
424
|
+
if models:
|
|
425
|
+
ASCIIColors.green(f"Connected model info:")
|
|
426
|
+
for m in models:
|
|
427
|
+
print(m)
|
|
428
|
+
else:
|
|
429
|
+
ASCIIColors.warning("Failed to list model from server. Ensure OpenLLM server is running.")
|
|
430
|
+
|
|
431
|
+
ASCIIColors.cyan(f"\n--- Setting model to (for info): {test_model_name} ---")
|
|
432
|
+
binding.load_model(test_model_name) # This confirms the model name and checks connection
|
|
433
|
+
|
|
434
|
+
ASCIIColors.cyan("\n--- Counting Tokens (using tiktoken fallback or API) ---")
|
|
435
|
+
sample_text = "Hello, OpenLLM world! This is a test."
|
|
436
|
+
token_count = binding.count_tokens(sample_text)
|
|
437
|
+
ASCIIColors.green(f"Token count for '{sample_text}': {token_count} (may use tiktoken approximation)")
|
|
438
|
+
|
|
439
|
+
ASCIIColors.cyan("\n--- Tokenize/Detokenize (using tiktoken fallback) ---")
|
|
440
|
+
tokens = binding.tokenize(sample_text)
|
|
441
|
+
ASCIIColors.green(f"Tokens (tiktoken): {tokens[:10]}...")
|
|
442
|
+
detokenized_text = binding.detokenize(tokens)
|
|
443
|
+
ASCIIColors.green(f"Detokenized text (tiktoken): {detokenized_text}")
|
|
444
|
+
|
|
445
|
+
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
446
|
+
prompt_text = "Why is the sky blue?"
|
|
447
|
+
system_prompt_text = "You are a helpful AI assistant providing concise answers."
|
|
448
|
+
ASCIIColors.info(f"System Prompt: {system_prompt_text}")
|
|
449
|
+
ASCIIColors.info(f"User Prompt: {prompt_text}")
|
|
450
|
+
generated_text = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=50, stream=False)
|
|
451
|
+
if isinstance(generated_text, str):
|
|
452
|
+
ASCIIColors.green(f"Generated text: {generated_text}")
|
|
453
|
+
else:
|
|
454
|
+
ASCIIColors.error(f"Generation failed: {generated_text}")
|
|
455
|
+
|
|
456
|
+
ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
|
|
457
|
+
full_streamed_text = ""
|
|
458
|
+
def stream_callback(chunk: str, msg_type: int):
|
|
459
|
+
global full_streamed_text
|
|
460
|
+
print(f"{ASCIIColors.GREEN}{chunk}{ASCIIColors.RESET}", end="", flush=True)
|
|
461
|
+
full_streamed_text += chunk
|
|
462
|
+
return True
|
|
463
|
+
|
|
464
|
+
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
465
|
+
result = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
|
|
466
|
+
print("\n--- End of Stream ---")
|
|
467
|
+
if isinstance(result, str):
|
|
468
|
+
ASCIIColors.green(f"Full streamed text: {result}")
|
|
469
|
+
else:
|
|
470
|
+
ASCIIColors.error(f"Streaming generation failed: {result}")
|
|
471
|
+
|
|
472
|
+
# --- Embeddings Test ---
|
|
473
|
+
# You need to run an OpenLLM server with an embedding model for this.
|
|
474
|
+
# Example: `openllm start baai/bge-small-en-v1.5 --port 3001`
|
|
475
|
+
# Then change openllm_host to "http://localhost:3001" for this section.
|
|
476
|
+
ASCIIColors.cyan("\n--- Embeddings Test ---")
|
|
477
|
+
ASCIIColors.magenta("INFO: This test requires an OpenLLM server running an EMBEDDING model (e.g., bge, E5).")
|
|
478
|
+
ASCIIColors.magenta(f" If your server at {openllm_host} is a text generation model, this might fail.")
|
|
479
|
+
embedding_text = "Lollms is a cool project using OpenLLM."
|
|
480
|
+
try:
|
|
481
|
+
# If your main binding is for text-gen, you might need a separate binding instance
|
|
482
|
+
# for an embedding model if it's on a different host/port.
|
|
483
|
+
# For this example, we'll try with the current binding.
|
|
484
|
+
# If it fails, it means the model at openllm_host doesn't support /v1/embeddings
|
|
485
|
+
embedding_vector = binding.embed(embedding_text)
|
|
486
|
+
ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
|
|
487
|
+
ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
|
|
488
|
+
except Exception as e:
|
|
489
|
+
ASCIIColors.warning(f"Could not get embedding with model '{binding.model_name}' at '{binding.host_address}': {e}")
|
|
490
|
+
ASCIIColors.warning("Ensure the OpenLLM server is running an embedding-capable model and supports the /v1/embeddings endpoint.")
|
|
491
|
+
|
|
492
|
+
# --- Vision Model Test ---
|
|
493
|
+
ASCIIColors.cyan("\n--- Vision Model Test (Conceptual) ---")
|
|
494
|
+
ASCIIColors.magenta("INFO: This test requires an OpenLLM server running a VISION model (e.g., LLaVA).")
|
|
495
|
+
ASCIIColors.magenta(f" And the model needs to accept images as base64 in prompt. This is a basic test.")
|
|
496
|
+
|
|
497
|
+
dummy_image_path = "dummy_test_image_openllm.png"
|
|
498
|
+
try:
|
|
499
|
+
from PIL import Image, ImageDraw
|
|
500
|
+
img = Image.new('RGB', (200, 50), color = ('blue'))
|
|
501
|
+
d = ImageDraw.Draw(img)
|
|
502
|
+
d.text((10,10), "OpenLLM Test", fill=('white'))
|
|
503
|
+
img.save(dummy_image_path)
|
|
504
|
+
ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
|
|
505
|
+
|
|
506
|
+
# Assuming your 'binding' is connected to a vision model server.
|
|
507
|
+
# If not, you'd initialize a new binding pointing to your vision model server.
|
|
508
|
+
# e.g., vision_binding = OpenLLMBinding(host_address=openllm_vision_host, model_name=test_vision_model_name)
|
|
509
|
+
|
|
510
|
+
# Check if current model_name hints at vision
|
|
511
|
+
if "llava" not in binding.model_name.lower() and "vision" not in binding.model_name.lower() :
|
|
512
|
+
ASCIIColors.warning(f"Current model '{binding.model_name}' might not be a vision model. Vision test may not be meaningful.")
|
|
513
|
+
|
|
514
|
+
vision_prompt = "What is written in the image and what color is the background?"
|
|
515
|
+
ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
|
|
516
|
+
|
|
517
|
+
vision_response = binding.generate_text(
|
|
518
|
+
prompt=vision_prompt,
|
|
519
|
+
images=[dummy_image_path], # The binding will attempt to base64 encode this
|
|
520
|
+
n_predict=50,
|
|
521
|
+
stream=False
|
|
522
|
+
)
|
|
523
|
+
if isinstance(vision_response, str):
|
|
524
|
+
ASCIIColors.green(f"Vision model response: {vision_response}")
|
|
525
|
+
else:
|
|
526
|
+
ASCIIColors.error(f"Vision generation failed: {vision_response}")
|
|
527
|
+
except ImportError:
|
|
528
|
+
ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
|
|
529
|
+
except Exception as e:
|
|
530
|
+
ASCIIColors.error(f"Error during vision test: {e}")
|
|
531
|
+
trace_exception(e)
|
|
532
|
+
finally:
|
|
533
|
+
import os
|
|
534
|
+
if os.path.exists(dummy_image_path):
|
|
535
|
+
os.remove(dummy_image_path)
|
|
536
|
+
|
|
537
|
+
except ConnectionRefusedError:
|
|
538
|
+
ASCIIColors.error(f"Connection to OpenLLM server at {openllm_host} refused. Is OpenLLM server running?")
|
|
539
|
+
ASCIIColors.error("Example: `openllm start mistralai/Mistral-7B-Instruct-v0.1`")
|
|
540
|
+
except openllm.exceptions.OpenLLMException as e:
|
|
541
|
+
ASCIIColors.error(f"OpenLLM specific error: {e}")
|
|
542
|
+
trace_exception(e)
|
|
543
|
+
except Exception as e:
|
|
544
|
+
ASCIIColors.error(f"An error occurred during testing: {e}")
|
|
545
|
+
trace_exception(e)
|
|
546
|
+
|
|
547
|
+
ASCIIColors.yellow("\nOpenLLMBinding test finished.")
|