lollms-client 0.14.1__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -68,8 +68,8 @@ class OllamaBinding(LollmsLLMBinding):
68
68
  host_address: str = None,
69
69
  model_name: str = "",
70
70
  service_key: str = None,
71
- default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
72
- verify_ssl_certificate: bool = True
71
+ verify_ssl_certificate: bool = True,
72
+ **kwargs
73
73
  ):
74
74
  """
75
75
  Initialize the Ollama binding.
@@ -89,7 +89,7 @@ class OllamaBinding(LollmsLLMBinding):
89
89
  self.model_name=model_name
90
90
  self.service_key=service_key
91
91
  self.verify_ssl_certificate=verify_ssl_certificate
92
- self.default_completion_format=default_completion_format
92
+ self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
93
93
 
94
94
  if ollama is None:
95
95
  raise ImportError("Ollama library is not installed. Please run 'pip install ollama'.")
@@ -0,0 +1,547 @@
1
+ # bindings/openllm/binding.py
2
+ import requests # May not be strictly needed if openllm client handles all
3
+ import json
4
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
+ from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
6
+ from lollms_client.lollms_utilities import encode_image # Keep for potential image handling
7
+ from typing import Optional, Callable, List, Union, Dict
8
+
9
+ from ascii_colors import ASCIIColors, trace_exception
10
+ import pipmaster as pm
11
+
12
+ # Ensure openllm, pillow (for dummy image), and tiktoken are installed
13
+ pm.ensure_packages(["openllm", "pillow", "tiktoken"])
14
+
15
+ import openllm
16
+ import tiktoken # For fallback tokenization
17
+
18
+ BindingName = "OpenLLMBinding"
19
+
20
+ # Helper function to count tokens by making a minimal API call
21
+ # This is more accurate for the specific model than a generic tokenizer
22
+ def count_tokens_openllm(
23
+ text_to_tokenize: str,
24
+ openllm_client: openllm.client.HTTPClient,
25
+ timeout: int = 60,
26
+ ) -> int:
27
+ """
28
+ Counts the number of tokens in a given text for the connected OpenLLM model
29
+ by making a minimal request to the /v1/generate endpoint and extracting
30
+ the length of 'prompt_token_ids' from the response.
31
+ """
32
+ try:
33
+ # Make a generation request asking for 0 or 1 new token
34
+ # Some models might require at least 1 max_new_tokens
35
+ llm_config = openllm.LLMConfig(max_new_tokens=1).model_dump(flatten=True, omit_default=True)
36
+ response = openllm_client.generate(prompt=text_to_tokenize, llm_config=llm_config, timeout=timeout)
37
+
38
+ if response.prompt_token_ids is not None and len(response.prompt_token_ids) > 0:
39
+ # The prompt_token_ids from OpenLLM often include special tokens (e.g., BOS)
40
+ # depending on the model's tokenizer configuration.
41
+ # For consistency with typical "user text token count", we might need to adjust.
42
+ # However, for now, let's return the raw count from the model.
43
+ # A simple heuristic might be to subtract 1 for a BOS token if always present.
44
+ # This needs model-specific knowledge or further investigation.
45
+ # For llama3 with ollama, it was prompt_eval_count - 5 (system, user, content etc)
46
+ # For OpenLLM, it's harder to generalize the "overhead".
47
+ # Let's assume prompt_token_ids is the count of tokens for the user's text.
48
+ return len(response.prompt_token_ids)
49
+ else:
50
+ # Fallback if prompt_token_ids is not available or empty
51
+ ASCIIColors.warning("prompt_token_ids not found in OpenLLM response, using tiktoken for count_tokens.")
52
+ return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
53
+ except Exception as e:
54
+ ASCIIColors.warning(f"Failed to count tokens via OpenLLM API, using tiktoken fallback: {e}")
55
+ return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
56
+
57
+
58
+ class OpenLLMBinding(LollmsLLMBinding):
59
+ """OpenLLM-specific binding implementation using the openllm-python client."""
60
+
61
+ DEFAULT_HOST_ADDRESS = "http://localhost:3000" # Default OpenLLM server address
62
+
63
+ def __init__(self,
64
+ host_address: str = None,
65
+ model_name: str = "", # Informational, as client connects to specific model server
66
+ # service_key and verify_ssl_certificate are not directly used by openllm.client.HTTPClient constructor
67
+ # but kept for potential future extensions or custom client logic.
68
+ service_key: Optional[str] = None,
69
+ verify_ssl_certificate: bool = True,
70
+ timeout: int = 120, # Timeout for client requests
71
+ **kwargs
72
+ ):
73
+ _host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
74
+ super().__init__(
75
+ binding_name=BindingName,
76
+ )
77
+ self.host_address = _host_address
78
+ self.model_name = model_name # Can be set by load_model or from config
79
+ self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
80
+ self.timeout = timeout
81
+
82
+ if openllm is None or openllm.client is None:
83
+ raise ImportError("OpenLLM library is not installed or client module not found. Please run 'pip install openllm'.")
84
+
85
+ try:
86
+ self.openllm_client = openllm.client.HTTPClient(
87
+ address=self.host_address,
88
+ timeout=self.timeout
89
+ )
90
+ # Perform a quick health check or metadata fetch to confirm connection
91
+ if not self._verify_connection():
92
+ raise ConnectionError(f"Failed to connect or verify OpenLLM server at {self.host_address}")
93
+
94
+ # Try to fetch model_name if not provided
95
+ if not self.model_name:
96
+ metadata = self._get_model_metadata_from_server()
97
+ if metadata and 'model_id' in metadata:
98
+ self.model_name = metadata['model_id']
99
+ else:
100
+ ASCIIColors.warning("Could not automatically determine model name from OpenLLM server.")
101
+
102
+ except Exception as e:
103
+ ASCIIColors.error(f"Failed to initialize OpenLLM client: {e}")
104
+ self.openllm_client = None
105
+ raise ConnectionError(f"Could not connect or initialize OpenLLM client at {self.host_address}: {e}") from e
106
+
107
+ def _verify_connection(self) -> bool:
108
+ if not self.openllm_client:
109
+ return False
110
+ try:
111
+ return self.openllm_client.health() # health() returns True if healthy, raises error otherwise
112
+ except Exception as e:
113
+ ASCIIColors.warning(f"OpenLLM server health check failed for {self.host_address}: {e}")
114
+ return False
115
+
116
+ def _get_model_metadata_from_server(self) -> Optional[Dict]:
117
+ if not self.openllm_client:
118
+ return None
119
+ try:
120
+ # metadata() returns a GenerationOutput object which contains model_name, backend etc.
121
+ meta_output = self.openllm_client.metadata()
122
+ # The actual LLMConfig and model details are in meta_output.configuration (a string JSON)
123
+ # and meta_output.model_name, meta_output.backend etc.
124
+ # For simplicity, let's try to parse configuration or use model_name
125
+ config_dict = {}
126
+ if meta_output.configuration:
127
+ try:
128
+ config_dict = json.loads(meta_output.configuration)
129
+ except json.JSONDecodeError:
130
+ ASCIIColors.warning("Failed to parse model configuration from OpenLLM metadata.")
131
+
132
+ return {
133
+ "model_id": config_dict.get("model_id", meta_output.model_name), # model_id from config is better
134
+ "model_name": meta_output.model_name, # As reported by client.metadata()
135
+ "backend": meta_output.backend,
136
+ "timeout": meta_output.timeout,
137
+ "configuration": config_dict
138
+ }
139
+ except Exception as e:
140
+ ASCIIColors.warning(f"Could not fetch metadata from OpenLLM server: {e}")
141
+ return None
142
+
143
+ def generate_text(self,
144
+ prompt: str,
145
+ images: Optional[List[str]] = None, # List of image file paths
146
+ system_prompt: str = "",
147
+ n_predict: Optional[int] = None,
148
+ stream: bool = False,
149
+ temperature: float = 0.7,
150
+ top_k: int = 40,
151
+ top_p: float = 0.9,
152
+ repeat_penalty: float = 1.1,
153
+ # repeat_last_n: int = 64, # OpenLLM's LLMConfig doesn't have direct repeat_last_n
154
+ seed: Optional[int] = None,
155
+ # n_threads: Optional[int] = None, # Server-side config for OpenLLM
156
+ # ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
157
+ streaming_callback: Optional[Callable[[str, int], bool]] = None
158
+ ) -> Union[str, Dict[str, any]]:
159
+
160
+ if not self.openllm_client:
161
+ return {"status": False, "error": "OpenLLM client not initialized."}
162
+
163
+ # Construct LLMConfig
164
+ # Note: Not all Lollms params map directly to OpenLLM's LLMConfig.
165
+ # We map what's available.
166
+ config_params = {
167
+ "temperature": float(temperature),
168
+ "top_k": top_k,
169
+ "top_p": top_p,
170
+ "repetition_penalty": repeat_penalty,
171
+ }
172
+ if n_predict is not None: config_params['max_new_tokens'] = n_predict
173
+ if seed is not None: config_params['seed'] = seed # seed might not be supported by all backends/models
174
+
175
+ llm_config = openllm.LLMConfig(**config_params).model_dump(flatten=True, omit_default=True)
176
+
177
+ # Prepend system prompt if provided
178
+ full_prompt = prompt
179
+ if system_prompt and system_prompt.strip():
180
+ full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Common instruct format
181
+
182
+ # Handle images: This is highly model-dependent for OpenLLM.
183
+ # For LLaVA-like models, images are base64 encoded and put in the prompt.
184
+ # This is a simplified approach. A robust solution needs model-specific prompt templating.
185
+ if images:
186
+ ASCIIColors.warning("Image support in OpenLLMBinding is basic and assumes a LLaVA-like model "
187
+ "that accepts base64 image data in the prompt.")
188
+ image_parts = []
189
+ for img_path in images:
190
+ try:
191
+ # encode_image from lollms_utilities returns base64 string
192
+ base64_image = encode_image(img_path)
193
+ # Basic assumption: image can be prepended or appended.
194
+ # For LLaVA, it's often "<image>\nUSER: What is this? ASSISTANT:"
195
+ # or the raw base64 data might be directly in the prompt.
196
+ # This is a placeholder for where more complex prompt construction would go.
197
+ # For now, let's just put the base64 string.
198
+ image_parts.append(f"[Image data: {base64_image}]") # Simplistic
199
+ except Exception as e:
200
+ ASCIIColors.error(f"Could not encode image {img_path}: {e}")
201
+
202
+ if image_parts:
203
+ full_prompt = "\n".join(image_parts) + "\n" + full_prompt
204
+
205
+ full_response_text = ""
206
+ try:
207
+ if stream:
208
+ response_stream = self.openllm_client.generate_stream(
209
+ prompt=full_prompt,
210
+ llm_config=llm_config,
211
+ timeout=self.timeout
212
+ )
213
+ for chunk in response_stream:
214
+ # chunk is openllm.GenerationChunk
215
+ chunk_content = chunk.text
216
+ if chunk_content:
217
+ full_response_text += chunk_content
218
+ if streaming_callback:
219
+ if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
220
+ break # Callback requested stop
221
+ return full_response_text
222
+ else: # Not streaming
223
+ response_output = self.openllm_client.generate(
224
+ prompt=full_prompt,
225
+ llm_config=llm_config,
226
+ timeout=self.timeout
227
+ )
228
+ # response_output is openllm.GenerationOutput
229
+ # It can contain multiple responses if n > 1 (not used here)
230
+ if response_output.responses:
231
+ return response_output.responses[0].text
232
+ else:
233
+ return {"status": False, "error": "OpenLLM returned no response."}
234
+ except openllm.exceptions.OpenLLMException as e:
235
+ error_message = f"OpenLLM API Error: {str(e)}"
236
+ ASCIIColors.error(error_message)
237
+ # Attempt to get more details if it's an HTTPError from httpx
238
+ if hasattr(e, '__cause__') and isinstance(e.__cause__, requests.exceptions.HTTPError):
239
+ error_message += f" - HTTP Status: {e.__cause__.response.status_code}, Response: {e.__cause__.response.text}"
240
+ elif hasattr(e, 'response') and hasattr(e.response, 'status_code'): # For httpx.HTTPStatusError
241
+ error_message += f" - HTTP Status: {e.response.status_code}, Response: {e.response.text}"
242
+
243
+ return {"status": False, "error": error_message}
244
+ except Exception as ex:
245
+ error_message = f"An unexpected error occurred: {str(ex)}"
246
+ trace_exception(ex)
247
+ return {"status": False, "error": error_message}
248
+
249
+ def tokenize(self, text: str) -> list:
250
+ """Tokenize text using tiktoken as a fallback."""
251
+ # OpenLLM client doesn't provide a direct tokenization API.
252
+ # For accurate tokenization, it would depend on the specific model served.
253
+ # Using tiktoken as a general approximation.
254
+ try:
255
+ # Try to use a tokenizer related to the model if known, else default
256
+ if "llama" in self.model_name.lower(): # Crude check
257
+ enc = tiktoken.encoding_for_model("text-davinci-003") # Llama tokenizers are different but this is a proxy
258
+ elif "gpt" in self.model_name.lower(): # e.g. gpt2 served by OpenLLM
259
+ enc = tiktoken.get_encoding("gpt2")
260
+ else:
261
+ enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo") # Fallback
262
+ return enc.encode(text)
263
+ except Exception:
264
+ # Further fallback
265
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
266
+
267
+ def detokenize(self, tokens: list) -> str:
268
+ """Detokenize tokens using tiktoken as a fallback."""
269
+ try:
270
+ if "llama" in self.model_name.lower():
271
+ enc = tiktoken.encoding_for_model("text-davinci-003")
272
+ elif "gpt" in self.model_name.lower():
273
+ enc = tiktoken.get_encoding("gpt2")
274
+ else:
275
+ enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo")
276
+ return enc.decode(tokens)
277
+ except Exception:
278
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
279
+
280
+ def count_tokens(self, text: str) -> int:
281
+ """Count tokens using the OpenLLM server if possible, else tiktoken."""
282
+ if not self.openllm_client:
283
+ ASCIIColors.warning("OpenLLM client not initialized. Using tiktoken for count_tokens.")
284
+ return len(self.tokenize(text)) # Fallback to tiktoken via self.tokenize
285
+
286
+ # Try the API call method for better accuracy for the specific model
287
+ # return count_tokens_openllm(text, self.openllm_client, self.timeout)
288
+ # The API call above can be slow. For faster, but less model-specific count:
289
+ return len(self.tokenize(text))
290
+
291
+
292
+ def embed(self, text: str, **kwargs) -> List[float]:
293
+ """Get embeddings for the input text using OpenLLM API."""
294
+ if not self.openllm_client:
295
+ raise Exception("OpenLLM client not initialized.")
296
+
297
+ # model_to_use kwarg is less relevant here as client is tied to one model server.
298
+ # If that server is an embedding model, it will work.
299
+ # llm_config can be passed via kwargs if needed for embeddings.
300
+ llm_config_dict = kwargs.get("llm_config", {})
301
+ llm_config = openllm.LLMConfig(**llm_config_dict).model_dump(flatten=True, omit_default=True) if llm_config_dict else None
302
+
303
+ try:
304
+ # openllm_client.embeddings expects a list of prompts
305
+ response = self.openllm_client.embeddings(
306
+ prompts=[text],
307
+ llm_config=llm_config,
308
+ timeout=self.timeout
309
+ )
310
+ # response is a list of embeddings (list of lists of floats)
311
+ if response and len(response) > 0:
312
+ return response[0]
313
+ else:
314
+ raise Exception("OpenLLM returned no embeddings.")
315
+ except openllm.exceptions.OpenLLMException as e:
316
+ error_message = f"OpenLLM API Embeddings Error: {str(e)}"
317
+ ASCIIColors.error(error_message)
318
+ raise Exception(error_message) from e
319
+ except Exception as ex:
320
+ trace_exception(ex)
321
+ raise Exception(f"Embedding failed: {str(ex)}") from ex
322
+
323
+ def get_model_info(self) -> dict:
324
+ """Return information about the current OpenLLM model setup."""
325
+ server_metadata = self._get_model_metadata_from_server()
326
+ model_id_from_server = "unknown"
327
+ if server_metadata and 'model_id' in server_metadata:
328
+ model_id_from_server = server_metadata['model_id']
329
+
330
+ # Try to determine vision support based on model name (very basic)
331
+ supports_vision = False
332
+ if self.model_name and any(vm_name in self.model_name.lower() for vm_name in ["llava", "bakllava", "vision"]):
333
+ supports_vision = True
334
+
335
+ return {
336
+ "name": self.binding_name,
337
+ "version": openllm.__version__ if openllm else "unknown",
338
+ "host_address": self.host_address,
339
+ "model_name": self.model_name or model_id_from_server, # Use self.model_name if set, else from server
340
+ "supports_structured_output": False, # Generic OpenLLM text generation doesn't guarantee this
341
+ "supports_vision": supports_vision # Highly dependent on the specific model served
342
+ }
343
+
344
+ def listModels(self) -> List[Dict[str, str]]:
345
+ """
346
+ Lists the model currently served by the connected OpenLLM instance.
347
+ OpenLLM client connects to one model server at a time.
348
+ """
349
+ if not self.openllm_client:
350
+ ASCIIColors.error("OpenLLM client not initialized. Cannot list models.")
351
+ return []
352
+
353
+ metadata = self._get_model_metadata_from_server()
354
+ if metadata:
355
+ return [{
356
+ 'model_name': metadata.get('model_id', metadata.get('model_name', 'Unknown Model')), # Prefer model_id
357
+ 'owned_by': metadata.get('backend', 'OpenLLM'), # Using backend as a proxy for owner/type
358
+ # OpenLLM metadata doesn't typically include a creation/modification date for the model files themselves.
359
+ 'created_datetime': None
360
+ }]
361
+ return []
362
+
363
+ def load_model(self, model_name: str) -> bool:
364
+ """
365
+ For OpenLLM, this primarily sets the model_name for reference, as the
366
+ model is already loaded by the server the client connects to.
367
+ Optionally, it could re-initialize the client if host_address also changes,
368
+ or verify the existing connection serves this model.
369
+ Args:
370
+ model_name (str): Name of the model (e.g., 'mistralai/Mistral-7B-Instruct-v0.1').
371
+ This should match what the server at self.host_address is running.
372
+ Returns:
373
+ bool: True if model name is set and connection seems okay.
374
+ """
375
+ self.model_name = model_name
376
+ ASCIIColors.info(f"OpenLLM binding model_name set to: {model_name}.")
377
+ ASCIIColors.info(f"Ensure OpenLLM server at {self.host_address} is running this model.")
378
+
379
+ # Optionally, verify the connected server's model matches
380
+ server_meta = self._get_model_metadata_from_server()
381
+ if server_meta:
382
+ current_server_model_id = server_meta.get('model_id', server_meta.get('model_name'))
383
+ if current_server_model_id and model_name not in current_server_model_id : # Check if model_name is substring of actual ID
384
+ ASCIIColors.warning(f"Warning: Requested model '{model_name}' may not match model '{current_server_model_id}' served at {self.host_address}.")
385
+ else:
386
+ ASCIIColors.green(f"Connected OpenLLM server model appears to be '{current_server_model_id}'.")
387
+
388
+ return self._verify_connection()
389
+
390
+
391
+ if __name__ == '__main__':
392
+ global full_streamed_text
393
+ ASCIIColors.yellow("Testing OpenLLMBinding...")
394
+
395
+ # --- Configuration ---
396
+ # Ensure an OpenLLM server is running. Example:
397
+ # `openllm start mistralai/Mistral-7B-Instruct-v0.1`
398
+ # or for embeddings: `openllm start baai/bge-small-en-v1.5`
399
+ # or for vision (if you have a LLaVA model compatible with OpenLLM):
400
+ # `openllm start llava-hf/llava-1.5-7b-hf` (You might need to convert/setup some vision models for OpenLLM)
401
+
402
+ openllm_host = "http://localhost:3000"
403
+ # This should match the model_id you started OpenLLM with
404
+ test_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Example, change if your server runs a different model
405
+ # test_model_name = "facebook/opt-125m" # A smaller model for quicker tests if available
406
+
407
+ # For embedding test, you'd point to an OpenLLM server running an embedding model
408
+ # openllm_embedding_host = "http://localhost:3001" # If running embedding model on different port
409
+ # test_embedding_model_name = "baai/bge-small-en-v1.5"
410
+
411
+ # For vision, if you have a LLaVA model running with OpenLLM
412
+ # openllm_vision_host = "http://localhost:3002"
413
+ # test_vision_model_name = "llava-hf/llava-1.5-7b-hf" # Example
414
+
415
+ try:
416
+ ASCIIColors.cyan("\n--- Initializing Binding for Text Generation ---")
417
+ # Initialize with the host where your text generation model is running
418
+ binding = OpenLLMBinding(host_address=openllm_host, model_name=test_model_name)
419
+ ASCIIColors.green(f"Binding initialized successfully. Connected to model: {binding.model_name}")
420
+ ASCIIColors.info(f"Using OpenLLM client version: {openllm.__version__ if openllm else 'N/A'}")
421
+
422
+ ASCIIColors.cyan("\n--- Listing Model (should be the one connected) ---")
423
+ models = binding.listModels()
424
+ if models:
425
+ ASCIIColors.green(f"Connected model info:")
426
+ for m in models:
427
+ print(m)
428
+ else:
429
+ ASCIIColors.warning("Failed to list model from server. Ensure OpenLLM server is running.")
430
+
431
+ ASCIIColors.cyan(f"\n--- Setting model to (for info): {test_model_name} ---")
432
+ binding.load_model(test_model_name) # This confirms the model name and checks connection
433
+
434
+ ASCIIColors.cyan("\n--- Counting Tokens (using tiktoken fallback or API) ---")
435
+ sample_text = "Hello, OpenLLM world! This is a test."
436
+ token_count = binding.count_tokens(sample_text)
437
+ ASCIIColors.green(f"Token count for '{sample_text}': {token_count} (may use tiktoken approximation)")
438
+
439
+ ASCIIColors.cyan("\n--- Tokenize/Detokenize (using tiktoken fallback) ---")
440
+ tokens = binding.tokenize(sample_text)
441
+ ASCIIColors.green(f"Tokens (tiktoken): {tokens[:10]}...")
442
+ detokenized_text = binding.detokenize(tokens)
443
+ ASCIIColors.green(f"Detokenized text (tiktoken): {detokenized_text}")
444
+
445
+ ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
446
+ prompt_text = "Why is the sky blue?"
447
+ system_prompt_text = "You are a helpful AI assistant providing concise answers."
448
+ ASCIIColors.info(f"System Prompt: {system_prompt_text}")
449
+ ASCIIColors.info(f"User Prompt: {prompt_text}")
450
+ generated_text = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=50, stream=False)
451
+ if isinstance(generated_text, str):
452
+ ASCIIColors.green(f"Generated text: {generated_text}")
453
+ else:
454
+ ASCIIColors.error(f"Generation failed: {generated_text}")
455
+
456
+ ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
457
+ full_streamed_text = ""
458
+ def stream_callback(chunk: str, msg_type: int):
459
+ global full_streamed_text
460
+ print(f"{ASCIIColors.GREEN}{chunk}{ASCIIColors.RESET}", end="", flush=True)
461
+ full_streamed_text += chunk
462
+ return True
463
+
464
+ ASCIIColors.info(f"Prompt: {prompt_text}")
465
+ result = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
466
+ print("\n--- End of Stream ---")
467
+ if isinstance(result, str):
468
+ ASCIIColors.green(f"Full streamed text: {result}")
469
+ else:
470
+ ASCIIColors.error(f"Streaming generation failed: {result}")
471
+
472
+ # --- Embeddings Test ---
473
+ # You need to run an OpenLLM server with an embedding model for this.
474
+ # Example: `openllm start baai/bge-small-en-v1.5 --port 3001`
475
+ # Then change openllm_host to "http://localhost:3001" for this section.
476
+ ASCIIColors.cyan("\n--- Embeddings Test ---")
477
+ ASCIIColors.magenta("INFO: This test requires an OpenLLM server running an EMBEDDING model (e.g., bge, E5).")
478
+ ASCIIColors.magenta(f" If your server at {openllm_host} is a text generation model, this might fail.")
479
+ embedding_text = "Lollms is a cool project using OpenLLM."
480
+ try:
481
+ # If your main binding is for text-gen, you might need a separate binding instance
482
+ # for an embedding model if it's on a different host/port.
483
+ # For this example, we'll try with the current binding.
484
+ # If it fails, it means the model at openllm_host doesn't support /v1/embeddings
485
+ embedding_vector = binding.embed(embedding_text)
486
+ ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
487
+ ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
488
+ except Exception as e:
489
+ ASCIIColors.warning(f"Could not get embedding with model '{binding.model_name}' at '{binding.host_address}': {e}")
490
+ ASCIIColors.warning("Ensure the OpenLLM server is running an embedding-capable model and supports the /v1/embeddings endpoint.")
491
+
492
+ # --- Vision Model Test ---
493
+ ASCIIColors.cyan("\n--- Vision Model Test (Conceptual) ---")
494
+ ASCIIColors.magenta("INFO: This test requires an OpenLLM server running a VISION model (e.g., LLaVA).")
495
+ ASCIIColors.magenta(f" And the model needs to accept images as base64 in prompt. This is a basic test.")
496
+
497
+ dummy_image_path = "dummy_test_image_openllm.png"
498
+ try:
499
+ from PIL import Image, ImageDraw
500
+ img = Image.new('RGB', (200, 50), color = ('blue'))
501
+ d = ImageDraw.Draw(img)
502
+ d.text((10,10), "OpenLLM Test", fill=('white'))
503
+ img.save(dummy_image_path)
504
+ ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
505
+
506
+ # Assuming your 'binding' is connected to a vision model server.
507
+ # If not, you'd initialize a new binding pointing to your vision model server.
508
+ # e.g., vision_binding = OpenLLMBinding(host_address=openllm_vision_host, model_name=test_vision_model_name)
509
+
510
+ # Check if current model_name hints at vision
511
+ if "llava" not in binding.model_name.lower() and "vision" not in binding.model_name.lower() :
512
+ ASCIIColors.warning(f"Current model '{binding.model_name}' might not be a vision model. Vision test may not be meaningful.")
513
+
514
+ vision_prompt = "What is written in the image and what color is the background?"
515
+ ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
516
+
517
+ vision_response = binding.generate_text(
518
+ prompt=vision_prompt,
519
+ images=[dummy_image_path], # The binding will attempt to base64 encode this
520
+ n_predict=50,
521
+ stream=False
522
+ )
523
+ if isinstance(vision_response, str):
524
+ ASCIIColors.green(f"Vision model response: {vision_response}")
525
+ else:
526
+ ASCIIColors.error(f"Vision generation failed: {vision_response}")
527
+ except ImportError:
528
+ ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
529
+ except Exception as e:
530
+ ASCIIColors.error(f"Error during vision test: {e}")
531
+ trace_exception(e)
532
+ finally:
533
+ import os
534
+ if os.path.exists(dummy_image_path):
535
+ os.remove(dummy_image_path)
536
+
537
+ except ConnectionRefusedError:
538
+ ASCIIColors.error(f"Connection to OpenLLM server at {openllm_host} refused. Is OpenLLM server running?")
539
+ ASCIIColors.error("Example: `openllm start mistralai/Mistral-7B-Instruct-v0.1`")
540
+ except openllm.exceptions.OpenLLMException as e:
541
+ ASCIIColors.error(f"OpenLLM specific error: {e}")
542
+ trace_exception(e)
543
+ except Exception as e:
544
+ ASCIIColors.error(f"An error occurred during testing: {e}")
545
+ trace_exception(e)
546
+
547
+ ASCIIColors.yellow("\nOpenLLMBinding test finished.")