lollms-client 1.5.6__py3-none-any.whl → 1.7.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
  3. lollms_client/llm_bindings/claude/__init__.py +125 -35
  4. lollms_client/llm_bindings/gemini/__init__.py +261 -159
  5. lollms_client/llm_bindings/grok/__init__.py +52 -15
  6. lollms_client/llm_bindings/groq/__init__.py +2 -2
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
  8. lollms_client/llm_bindings/litellm/__init__.py +1 -1
  9. lollms_client/llm_bindings/llama_cpp_server/__init__.py +605 -0
  10. lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
  11. lollms_client/llm_bindings/lollms/__init__.py +76 -21
  12. lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
  13. lollms_client/llm_bindings/mistral/__init__.py +2 -2
  14. lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
  15. lollms_client/llm_bindings/ollama/__init__.py +345 -89
  16. lollms_client/llm_bindings/open_router/__init__.py +2 -2
  17. lollms_client/llm_bindings/openai/__init__.py +81 -20
  18. lollms_client/llm_bindings/openllm/__init__.py +362 -506
  19. lollms_client/llm_bindings/openwebui/__init__.py +333 -171
  20. lollms_client/llm_bindings/perplexity/__init__.py +2 -2
  21. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
  22. lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
  23. lollms_client/llm_bindings/transformers/__init__.py +428 -632
  24. lollms_client/llm_bindings/vllm/__init__.py +1 -1
  25. lollms_client/lollms_agentic.py +4 -2
  26. lollms_client/lollms_base_binding.py +61 -0
  27. lollms_client/lollms_core.py +512 -1890
  28. lollms_client/lollms_discussion.py +65 -39
  29. lollms_client/lollms_llm_binding.py +126 -261
  30. lollms_client/lollms_mcp_binding.py +49 -77
  31. lollms_client/lollms_stt_binding.py +99 -52
  32. lollms_client/lollms_tti_binding.py +38 -38
  33. lollms_client/lollms_ttm_binding.py +38 -42
  34. lollms_client/lollms_tts_binding.py +43 -18
  35. lollms_client/lollms_ttv_binding.py +38 -42
  36. lollms_client/lollms_types.py +4 -2
  37. lollms_client/stt_bindings/whisper/__init__.py +108 -23
  38. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  39. lollms_client/tti_bindings/diffusers/__init__.py +464 -803
  40. lollms_client/tti_bindings/diffusers/server/main.py +1062 -0
  41. lollms_client/tti_bindings/gemini/__init__.py +182 -239
  42. lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
  43. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  44. lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
  45. lollms_client/tti_bindings/openai/__init__.py +10 -11
  46. lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
  47. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  48. lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
  49. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  50. lollms_client/ttm_bindings/replicate/__init__.py +7 -4
  51. lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
  52. lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
  53. lollms_client/tts_bindings/bark/__init__.py +7 -10
  54. lollms_client/tts_bindings/lollms/__init__.py +6 -1
  55. lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
  56. lollms_client/tts_bindings/xtts/__init__.py +157 -74
  57. lollms_client/tts_bindings/xtts/server/main.py +241 -280
  58. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/METADATA +113 -5
  59. lollms_client-1.7.13.dist-info/RECORD +90 -0
  60. lollms_client-1.5.6.dist-info/RECORD +0 -87
  61. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/WHEEL +0 -0
  62. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/licenses/LICENSE +0 -0
  63. {lollms_client-1.5.6.dist-info → lollms_client-1.7.13.dist-info}/top_level.txt +0 -0
@@ -1,550 +1,406 @@
1
- # bindings/openllm/binding.py
2
- import requests # May not be strictly needed if openllm client handles all
1
+ import requests
3
2
  import json
4
- from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
- from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
6
- from lollms_client.lollms_utilities import encode_image # Keep for potential image handling
3
+ import base64
4
+ import os
5
+ import mimetypes
6
+ import math
7
7
  from typing import Optional, Callable, List, Union, Dict
8
8
 
9
- from ascii_colors import ASCIIColors, trace_exception
9
+ import httpx
10
+ import tiktoken
10
11
  import pipmaster as pm
11
12
 
12
- # Ensure openllm, pillow (for dummy image), and tiktoken are installed
13
- pm.ensure_packages(["openllm", "pillow", "tiktoken"])
13
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
14
+ from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
15
+ from lollms_client.lollms_discussion import LollmsDiscussion
16
+ from lollms_client.lollms_utilities import encode_image
17
+ from ascii_colors import ASCIIColors, trace_exception
14
18
 
15
- import openllm
16
- import tiktoken # For fallback tokenization
19
+ # Ensure required packages are installed
20
+ pm.ensure_packages(["httpx", "tiktoken"])
17
21
 
18
22
  BindingName = "OpenLLMBinding"
19
23
 
20
- # Helper function to count tokens by making a minimal API call
21
- # This is more accurate for the specific model than a generic tokenizer
22
- def count_tokens_openllm(
23
- text_to_tokenize: str,
24
- openllm_client: openllm.client.HTTPClient,
25
- timeout: int = 60,
26
- ) -> int:
24
+
25
+ def _read_file_as_base64(path):
26
+ with open(path, "rb") as f:
27
+ return base64.b64encode(f.read()).decode("utf-8")
28
+
29
+
30
+ def _extract_markdown_path(s):
31
+ s = s.strip()
32
+ if s.startswith("[") and s.endswith(")"):
33
+ lb, rb = s.find("["), s.find("]")
34
+ if lb != -1 and rb != -1 and rb > lb:
35
+ return s[lb + 1 : rb].strip()
36
+ return s
37
+
38
+
39
+ def _guess_mime_from_name(name, default="image/jpeg"):
40
+ mime, _ = mimetypes.guess_type(name)
41
+ return mime or default
42
+
43
+
44
+ def _to_data_url(b64_str, mime):
45
+ return f"data:{mime};base64,{b64_str}"
46
+
47
+
48
+ def normalize_image_input(img, default_mime="image/jpeg"):
27
49
  """
28
- Counts the number of tokens in a given text for the connected OpenLLM model
29
- by making a minimal request to the /v1/generate endpoint and extracting
30
- the length of 'prompt_token_ids' from the response.
50
+ Returns an OpenAI API‑compatible content block for an image.
51
+ Accepts various input formats and converts them to a data URL.
31
52
  """
32
- try:
33
- # Make a generation request asking for 0 or 1 new token
34
- # Some models might require at least 1 max_new_tokens
35
- llm_config = openllm.LLMConfig(max_new_tokens=1).model_dump(flatten=True, omit_default=True)
36
- response = openllm_client.generate(prompt=text_to_tokenize, llm_config=llm_config, timeout=timeout)
37
-
38
- if response.prompt_token_ids is not None and len(response.prompt_token_ids) > 0:
39
- # The prompt_token_ids from OpenLLM often include special tokens (e.g., BOS)
40
- # depending on the model's tokenizer configuration.
41
- # For consistency with typical "user text token count", we might need to adjust.
42
- # However, for now, let's return the raw count from the model.
43
- # A simple heuristic might be to subtract 1 for a BOS token if always present.
44
- # This needs model-specific knowledge or further investigation.
45
- # For llama3 with ollama, it was prompt_eval_count - 5 (system, user, content etc)
46
- # For OpenLLM, it's harder to generalize the "overhead".
47
- # Let's assume prompt_token_ids is the count of tokens for the user's text.
48
- return len(response.prompt_token_ids)
49
- else:
50
- # Fallback if prompt_token_ids is not available or empty
51
- ASCIIColors.warning("prompt_token_ids not found in OpenLLM response, using tiktoken for count_tokens.")
52
- return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
53
- except Exception as e:
54
- ASCIIColors.warning(f"Failed to count tokens via OpenLLM API, using tiktoken fallback: {e}")
55
- return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text_to_tokenize))
53
+ if isinstance(img, str):
54
+ # Handle path‑like strings or raw base64
55
+ s = _extract_markdown_path(img)
56
+ if os.path.exists(s):
57
+ b64 = _read_file_as_base64(s)
58
+ mime = _guess_mime_from_name(s, default_mime)
59
+ url = _to_data_url(b64, mime)
60
+ else: # Assume it's a raw base64 string
61
+ url = _to_data_url(s, default_mime)
62
+ return {"type": "image_url", "image_url": {"url": url}}
63
+
64
+ raise ValueError("Unsupported image input type for OpenLLM")
56
65
 
57
66
 
58
67
  class OpenLLMBinding(LollmsLLMBinding):
59
- """OpenLLM-specific binding implementation using the openllm-python client."""
60
-
61
- DEFAULT_HOST_ADDRESS = "http://localhost:3000" # Default OpenLLM server address
62
-
63
- def __init__(self,
64
- **kwargs
65
- ):
66
- """ Initialize the OpenLLM binding.
68
+ """OpenLLMspecific binding implementation"""
69
+
70
+ def __init__(self, **kwargs):
71
+ """
72
+ Initialize the OpenLLM binding.
73
+
67
74
  Args:
68
- host_address (str): The address of the OpenLLM server (default: http://localhost:3000).
69
- model_name (str): The name of the model to connect to. This is primarily for informational purposes.
70
- service_key (Optional[str]): Optional service key for authentication, not used by openllm client.
71
- verify_ssl_certificate (bool): Whether to verify SSL certificates (default: True).
72
- timeout (int): Timeout for client requests in seconds (default: 120).
75
+ host_address (str): URL of the OpenLLM server (e.g. ``http://localhost:3000``).
76
+ model_name (str): Name of the model to use.
77
+ service_key (str): Authentication token for the service (optional).
78
+ verify_ssl_certificate (bool): Whether to verify SSL certificates.
73
79
  """
74
- host_address = kwargs.get("host_address")
75
- _host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
76
80
  super().__init__(BindingName, **kwargs)
77
- self.host_address = _host_address
78
- self.model_name = kwargs.get("model_name") # Can be set by load_model or from config
79
- self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
80
- self.timeout = kwargs.get("timeout")
81
-
82
- if openllm is None or openllm.client is None:
83
- raise ImportError("OpenLLM library is not installed or client module not found. Please run 'pip install openllm'.")
84
-
85
- try:
86
- self.openllm_client = openllm.client.HTTPClient(
87
- address=self.host_address,
88
- timeout=self.timeout
81
+ self.host_address = kwargs.get("host_address")
82
+ self.model_name = kwargs.get("model_name")
83
+ self.service_key = kwargs.get("service_key", os.getenv("OPENLLM_API_KEY"))
84
+ self.verify_ssl_certificate = kwargs.get("verify_ssl_certificate", True)
85
+
86
+ if not self.host_address:
87
+ raise ValueError("OpenLLM host address is required.")
88
+
89
+ # Build headers – only include Authorization if a key is actually provided
90
+ headers = {"Content-Type": "application/json"}
91
+ if self.service_key:
92
+ headers["Authorization"] = f"Bearer {self.service_key}"
93
+ else:
94
+ ASCIIColors.warning(
95
+ "No service key provided for OpenLLM. Requests will be made without Authorization header."
89
96
  )
90
- # Perform a quick health check or metadata fetch to confirm connection
91
- if not self._verify_connection():
92
- raise ConnectionError(f"Failed to connect or verify OpenLLM server at {self.host_address}")
93
-
94
- # Try to fetch model_name if not provided
95
- if not self.model_name:
96
- metadata = self._get_model_metadata_from_server()
97
- if metadata and 'model_id' in metadata:
98
- self.model_name = metadata['model_id']
99
- else:
100
- ASCIIColors.warning("Could not automatically determine model name from OpenLLM server.")
101
97
 
102
- except Exception as e:
103
- ASCIIColors.error(f"Failed to initialize OpenLLM client: {e}")
104
- self.openllm_client = None
105
- raise ConnectionError(f"Could not connect or initialize OpenLLM client at {self.host_address}: {e}") from e
98
+ # Append /v1 to the base URL for OpenAI compatibility
99
+ base_url = f"{self.host_address.rstrip('/')}/v1"
100
+
101
+ self.client = httpx.Client(
102
+ base_url=base_url,
103
+ headers=headers,
104
+ verify=self.verify_ssl_certificate,
105
+ timeout=None,
106
+ )
107
+
108
+ # --------------------------------------------------------------------- #
109
+ # Helper methods
110
+ # --------------------------------------------------------------------- #
111
+ def _build_request_params(self, messages: list, **kwargs) -> dict:
112
+ """Construct the JSON payload expected by the OpenLLM /chat/completions endpoint."""
113
+ params = {
114
+ "model": kwargs.get("model", self.model_name),
115
+ "messages": messages,
116
+ "stream": kwargs.get("stream", True),
117
+ }
106
118
 
107
- def _verify_connection(self) -> bool:
108
- if not self.openllm_client:
109
- return False
119
+ # Map Lollms parameters to OpenAI‑compatible fields
120
+ if "n_predict" in kwargs and kwargs["n_predict"] is not None:
121
+ params["max_tokens"] = kwargs["n_predict"]
122
+ if "temperature" in kwargs and kwargs["temperature"] is not None:
123
+ params["temperature"] = kwargs["temperature"]
124
+ if "top_p" in kwargs and kwargs["top_p"] is not None:
125
+ params["top_p"] = kwargs["top_p"]
126
+ if "top_k" in kwargs and kwargs["top_k"] is not None:
127
+ params["top_k"] = kwargs["top_k"]
128
+ if "repeat_penalty" in kwargs and kwargs["repeat_penalty"] is not None:
129
+ params["frequency_penalty"] = kwargs["repeat_penalty"]
130
+ if "seed" in kwargs and kwargs["seed"] is not None:
131
+ params["seed"] = kwargs["seed"]
132
+
133
+ return params
134
+
135
+ def _process_request(
136
+ self,
137
+ params: dict,
138
+ stream: Optional[bool],
139
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]],
140
+ ) -> Union[str, dict]:
141
+ """Execute the request – handling both streaming and non‑streaming modes."""
142
+ output = ""
110
143
  try:
111
- return self.openllm_client.health() # health() returns True if healthy, raises error otherwise
112
- except Exception as e:
113
- ASCIIColors.warning(f"OpenLLM server health check failed for {self.host_address}: {e}")
114
- return False
144
+ if stream:
145
+ with self.client.stream(
146
+ "POST", "/chat/completions", json=params
147
+ ) as response:
148
+ if response.status_code != 200:
149
+ err = response.read().decode("utf-8")
150
+ raise Exception(
151
+ f"API Error: {response.status_code} - {err}"
152
+ )
153
+
154
+ for line in response.iter_lines():
155
+ if not line:
156
+ continue
157
+ if line.startswith("data:"):
158
+ data_str = line[len("data:") :].strip()
159
+ if data_str == "[DONE]":
160
+ break
161
+ try:
162
+ chunk = json.loads(data_str)
163
+ if chunk.get("choices"):
164
+ delta = chunk["choices"][0].get("delta", {})
165
+ word = delta.get("content", "")
166
+ if word:
167
+ if streaming_callback:
168
+ if not streaming_callback(
169
+ word, MSG_TYPE.MSG_TYPE_CHUNK
170
+ ):
171
+ break
172
+ output += word
173
+ except json.JSONDecodeError:
174
+ continue
175
+ else:
176
+ response = self.client.post("/chat/completions", json=params)
177
+ if response.status_code != 200:
178
+ raise Exception(
179
+ f"API Error: {response.status_code} - {response.text}"
180
+ )
181
+ data = response.json()
182
+ output = data["choices"][0]["message"]["content"]
183
+ if streaming_callback:
184
+ streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
115
185
 
116
- def _get_model_metadata_from_server(self) -> Optional[Dict]:
117
- if not self.openllm_client:
118
- return None
119
- try:
120
- # metadata() returns a GenerationOutput object which contains model_name, backend etc.
121
- meta_output = self.openllm_client.metadata()
122
- # The actual LLMConfig and model details are in meta_output.configuration (a string JSON)
123
- # and meta_output.model_name, meta_output.backend etc.
124
- # For simplicity, let's try to parse configuration or use model_name
125
- config_dict = {}
126
- if meta_output.configuration:
127
- try:
128
- config_dict = json.loads(meta_output.configuration)
129
- except json.JSONDecodeError:
130
- ASCIIColors.warning("Failed to parse model configuration from OpenLLM metadata.")
131
-
132
- return {
133
- "model_id": config_dict.get("model_id", meta_output.model_name), # model_id from config is better
134
- "model_name": meta_output.model_name, # As reported by client.metadata()
135
- "backend": meta_output.backend,
136
- "timeout": meta_output.timeout,
137
- "configuration": config_dict
138
- }
139
186
  except Exception as e:
140
- ASCIIColors.warning(f"Could not fetch metadata from OpenLLM server: {e}")
141
- return None
142
-
143
- def generate_text(self,
144
- prompt: str,
145
- images: Optional[List[str]] = None, # List of image file paths
146
- system_prompt: str = "",
147
- n_predict: Optional[int] = None,
148
- stream: bool = False,
149
- temperature: float = 0.7,
150
- top_k: int = 40,
151
- top_p: float = 0.9,
152
- repeat_penalty: float = 1.1,
153
- # repeat_last_n: int = 64, # OpenLLM's LLMConfig doesn't have direct repeat_last_n
154
- seed: Optional[int] = None,
155
- # n_threads: Optional[int] = None, # Server-side config for OpenLLM
156
- # ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
157
- streaming_callback: Optional[Callable[[str, int], bool]] = None,
158
- split:Optional[bool]=False, # put to true if the prompt is a discussion
159
- user_keyword:Optional[str]="!@>user:",
160
- ai_keyword:Optional[str]="!@>assistant:",
161
- ) -> Union[str, Dict[str, any]]:
162
-
163
- if not self.openllm_client:
164
- return {"status": False, "error": "OpenLLM client not initialized."}
165
-
166
- # Construct LLMConfig
167
- # Note: Not all Lollms params map directly to OpenLLM's LLMConfig.
168
- # We map what's available.
169
- config_params = {
170
- "temperature": float(temperature),
171
- "top_k": top_k,
172
- "top_p": top_p,
173
- "repetition_penalty": repeat_penalty,
174
- }
175
- if n_predict is not None: config_params['max_new_tokens'] = n_predict
176
- if seed is not None: config_params['seed'] = seed # seed might not be supported by all backends/models
177
-
178
- llm_config = openllm.LLMConfig(**config_params).model_dump(flatten=True, omit_default=True)
179
-
180
- # Prepend system prompt if provided
181
- full_prompt = prompt
182
- if system_prompt and system_prompt.strip():
183
- full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Common instruct format
184
-
185
- # Handle images: This is highly model-dependent for OpenLLM.
186
- # For LLaVA-like models, images are base64 encoded and put in the prompt.
187
- # This is a simplified approach. A robust solution needs model-specific prompt templating.
187
+ trace_exception(e)
188
+ err_msg = f"An error occurred with the OpenLLM API: {e}"
189
+ if streaming_callback:
190
+ streaming_callback(err_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
191
+ return {"status": "error", "message": err_msg}
192
+
193
+ return output
194
+
195
+ # --------------------------------------------------------------------- #
196
+ # Public API required by LollmsLLMBinding
197
+ # --------------------------------------------------------------------- #
198
+ def generate_text(
199
+ self,
200
+ prompt: str,
201
+ images: Optional[List[str]] = None,
202
+ system_prompt: str = "",
203
+ n_predict: Optional[int] = None,
204
+ stream: Optional[bool] = None,
205
+ temperature: float = 0.7,
206
+ top_k: int = 40,
207
+ top_p: float = 0.9,
208
+ repeat_penalty: float = 1.1,
209
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
210
+ **kwargs,
211
+ ) -> Union[str, dict]:
212
+ """Generate text (or multimodal output) via OpenLLM."""
213
+ messages = []
214
+ if system_prompt:
215
+ messages.append({"role": "system", "content": system_prompt})
216
+
217
+ user_content = [{"type": "text", "text": prompt}]
188
218
  if images:
189
- ASCIIColors.warning("Image support in OpenLLMBinding is basic and assumes a LLaVA-like model "
190
- "that accepts base64 image data in the prompt.")
191
- image_parts = []
192
- for img_path in images:
193
- try:
194
- # encode_image from lollms_utilities returns base64 string
195
- base64_image = encode_image(img_path)
196
- # Basic assumption: image can be prepended or appended.
197
- # For LLaVA, it's often "<image>\nUSER: What is this? ASSISTANT:"
198
- # or the raw base64 data might be directly in the prompt.
199
- # This is a placeholder for where more complex prompt construction would go.
200
- # For now, let's just put the base64 string.
201
- image_parts.append(f"[Image data: {base64_image}]") # Simplistic
202
- except Exception as e:
203
- ASCIIColors.error(f"Could not encode image {img_path}: {e}")
204
-
205
- if image_parts:
206
- full_prompt = "\n".join(image_parts) + "\n" + full_prompt
207
-
208
- full_response_text = ""
219
+ for img in images:
220
+ user_content.append(normalize_image_input(img))
221
+
222
+ messages.append({"role": "user", "content": user_content})
223
+
224
+ params = self._build_request_params(
225
+ messages=messages,
226
+ n_predict=n_predict,
227
+ stream=stream,
228
+ temperature=temperature,
229
+ top_k=top_k,
230
+ top_p=top_p,
231
+ repeat_penalty=repeat_penalty,
232
+ **kwargs,
233
+ )
234
+ return self._process_request(params, stream, streaming_callback)
235
+
236
+ def generate_from_messages(
237
+ self,
238
+ messages: List[Dict],
239
+ n_predict: Optional[int] = None,
240
+ stream: Optional[bool] = None,
241
+ temperature: Optional[float] = None,
242
+ top_k: Optional[int] = None,
243
+ top_p: Optional[float] = None,
244
+ repeat_penalty: Optional[float] = None,
245
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
246
+ **kwargs,
247
+ ) -> Union[str, dict]:
248
+ """Generate from a pre‑formatted list of OpenAI‑compatible messages."""
249
+ params = self._build_request_params(
250
+ messages=messages,
251
+ n_predict=n_predict,
252
+ stream=stream,
253
+ temperature=temperature,
254
+ top_k=top_k,
255
+ top_p=top_p,
256
+ repeat_penalty=repeat_penalty,
257
+ **kwargs,
258
+ )
259
+ return self._process_request(params, stream, streaming_callback)
260
+
261
+ def chat(
262
+ self,
263
+ discussion: LollmsDiscussion,
264
+ branch_tip_id: Optional[str] = None,
265
+ n_predict: Optional[int] = None,
266
+ stream: Optional[bool] = None,
267
+ temperature: float = 0.7,
268
+ top_k: int = 40,
269
+ top_p: float = 0.9,
270
+ repeat_penalty: float = 1.1,
271
+ repeat_last_n: int = 64,
272
+ seed: Optional[int] = None,
273
+ n_threads: Optional[int] = None,
274
+ ctx_size: Optional[int] = None,
275
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
276
+ think: Optional[bool] = False,
277
+ reasoning_effort: Optional[bool] = "low",
278
+ reasoning_summary: Optional[bool] = "auto",
279
+ **kwargs,
280
+ ) -> Union[str, dict]:
281
+ """
282
+ Conduct a chat session using a :class:`LollmsDiscussion` object.
283
+ The discussion is exported in an OpenAI‑compatible format and then
284
+ passed to :meth:`_process_request`.
285
+ """
286
+ messages = discussion.export("openai_chat", branch_tip_id)
287
+
288
+ params = self._build_request_params(
289
+ messages=messages,
290
+ n_predict=n_predict,
291
+ stream=stream,
292
+ temperature=temperature,
293
+ top_k=top_k,
294
+ top_p=top_p,
295
+ repeat_penalty=repeat_penalty,
296
+ **kwargs,
297
+ )
298
+ return self._process_request(params, stream, streaming_callback)
299
+
300
+ def list_models(self) -> List[Dict]:
301
+ """Return a list of models known to the OpenLLM server."""
302
+ models_info = []
209
303
  try:
210
- if stream:
211
- response_stream = self.openllm_client.generate_stream(
212
- prompt=full_prompt,
213
- llm_config=llm_config,
214
- timeout=self.timeout
304
+ response = self.client.get("/models")
305
+
306
+ if response.status_code != 200:
307
+ ASCIIColors.error(
308
+ f"OpenLLM /v1/models returned status {response.status_code}. "
309
+ f"Response body: {response.text}"
215
310
  )
216
- for chunk in response_stream:
217
- # chunk is openllm.GenerationChunk
218
- chunk_content = chunk.text
219
- if chunk_content:
220
- full_response_text += chunk_content
221
- if streaming_callback:
222
- if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
223
- break # Callback requested stop
224
- return full_response_text
225
- else: # Not streaming
226
- response_output = self.openllm_client.generate(
227
- prompt=full_prompt,
228
- llm_config=llm_config,
229
- timeout=self.timeout
311
+ try:
312
+ response.raise_for_status()
313
+ except Exception as e:
314
+ trace_exception(e)
315
+ return models_info # Empty list due to error
316
+
317
+ models_data = response.json().get("data", [])
318
+ for model in models_data:
319
+ models_info.append(
320
+ {
321
+ "model_name": model.get("id", "N/A"),
322
+ "owned_by": model.get("owned_by", "N/A"),
323
+ "created": model.get("created", "N/A"),
324
+ "context_length": "unknown", # Not a standard field in OpenAI spec
325
+ }
230
326
  )
231
- # response_output is openllm.GenerationOutput
232
- # It can contain multiple responses if n > 1 (not used here)
233
- if response_output.responses:
234
- return response_output.responses[0].text
235
- else:
236
- return {"status": False, "error": "OpenLLM returned no response."}
237
- except openllm.exceptions.OpenLLMException as e:
238
- error_message = f"OpenLLM API Error: {str(e)}"
239
- ASCIIColors.error(error_message)
240
- # Attempt to get more details if it's an HTTPError from httpx
241
- if hasattr(e, '__cause__') and isinstance(e.__cause__, requests.exceptions.HTTPError):
242
- error_message += f" - HTTP Status: {e.__cause__.response.status_code}, Response: {e.__cause__.response.text}"
243
- elif hasattr(e, 'response') and hasattr(e.response, 'status_code'): # For httpx.HTTPStatusError
244
- error_message += f" - HTTP Status: {e.response.status_code}, Response: {e.response.text}"
245
-
246
- return {"status": False, "error": error_message}
247
- except Exception as ex:
248
- error_message = f"An unexpected error occurred: {str(ex)}"
249
- trace_exception(ex)
250
- return {"status": False, "error": error_message}
251
-
252
- def tokenize(self, text: str) -> list:
253
- """Tokenize text using tiktoken as a fallback."""
254
- # OpenLLM client doesn't provide a direct tokenization API.
255
- # For accurate tokenization, it would depend on the specific model served.
256
- # Using tiktoken as a general approximation.
257
- try:
258
- # Try to use a tokenizer related to the model if known, else default
259
- if "llama" in self.model_name.lower(): # Crude check
260
- enc = tiktoken.encoding_for_model("text-davinci-003") # Llama tokenizers are different but this is a proxy
261
- elif "gpt" in self.model_name.lower(): # e.g. gpt2 served by OpenLLM
262
- enc = tiktoken.get_encoding("gpt2")
263
- else:
264
- enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo") # Fallback
265
- return enc.encode(text)
266
- except Exception:
267
- # Further fallback
268
- return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
269
-
270
- def detokenize(self, tokens: list) -> str:
271
- """Detokenize tokens using tiktoken as a fallback."""
327
+ except Exception as e:
328
+ ASCIIColors.error(
329
+ f"Failed to list models from OpenLLM: {e.__class__.__name__}: {e}"
330
+ )
331
+ trace_exception(e)
332
+ return models_info
333
+
334
+
335
+ def _get_encoding(self, model_name: str | None = None):
336
+ """Fallback to tiktoken for generic tokenisation."""
272
337
  try:
273
- if "llama" in self.model_name.lower():
274
- enc = tiktoken.encoding_for_model("text-davinci-003")
275
- elif "gpt" in self.model_name.lower():
276
- enc = tiktoken.get_encoding("gpt2")
277
- else:
278
- enc = tiktoken.model.encoding_for_model("gpt-3.5-turbo")
279
- return enc.decode(tokens)
280
- except Exception:
281
- return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
282
-
283
- def count_tokens(self, text: str) -> int:
284
- """Count tokens using the OpenLLM server if possible, else tiktoken."""
285
- if not self.openllm_client:
286
- ASCIIColors.warning("OpenLLM client not initialized. Using tiktoken for count_tokens.")
287
- return len(self.tokenize(text)) # Fallback to tiktoken via self.tokenize
288
-
289
- # Try the API call method for better accuracy for the specific model
290
- # return count_tokens_openllm(text, self.openllm_client, self.timeout)
291
- # The API call above can be slow. For faster, but less model-specific count:
292
- return len(self.tokenize(text))
338
+ return tiktoken.encoding_for_model(model_name or self.model_name)
339
+ except KeyError:
340
+ return tiktoken.get_encoding("cl100k_base")
341
+
342
+ def tokenize(self, text: str) -> list[int]:
343
+ return self._get_encoding().encode(text)
293
344
 
345
+ def detokenize(self, tokens: list[int]) -> str:
346
+ return self._get_encoding().decode(tokens)
294
347
 
295
- def embed(self, text: str, **kwargs) -> List[float]:
296
- """Get embeddings for the input text using OpenLLM API."""
297
- if not self.openllm_client:
298
- raise Exception("OpenLLM client not initialized.")
348
+ def count_tokens(self, text: str) -> int:
349
+ return len(self.tokenize(text))
299
350
 
300
- # model_to_use kwarg is less relevant here as client is tied to one model server.
301
- # If that server is an embedding model, it will work.
302
- # llm_config can be passed via kwargs if needed for embeddings.
303
- llm_config_dict = kwargs.get("llm_config", {})
304
- llm_config = openllm.LLMConfig(**llm_config_dict).model_dump(flatten=True, omit_default=True) if llm_config_dict else None
351
+ def embed(self, text: str | List[str], **kwargs) -> List:
352
+ """
353
+ Obtain embeddings via the OpenLLM ``/embeddings`` endpoint.
354
+ If a single string is supplied, a single embedding vector is returned;
355
+ otherwise a list of vectors is returned.
356
+ """
357
+ embedding_model = kwargs.get("model", self.model_name)
358
+ single_input = isinstance(text, str)
359
+ inputs = [text] if single_input else list(text)
305
360
 
306
361
  try:
307
- # openllm_client.embeddings expects a list of prompts
308
- response = self.openllm_client.embeddings(
309
- prompts=[text],
310
- llm_config=llm_config,
311
- timeout=self.timeout
312
- )
313
- # response is a list of embeddings (list of lists of floats)
314
- if response and len(response) > 0:
315
- return response[0]
362
+ payload = {"model": embedding_model, "input": inputs}
363
+ response = self.client.post("/embeddings", json=payload)
364
+ response.raise_for_status()
365
+ data = response.json()
366
+
367
+ embeddings = [item["embedding"] for item in data.get("data", [])]
368
+
369
+ if single_input and embeddings:
370
+ return embeddings[0]
316
371
  else:
317
- raise Exception("OpenLLM returned no embeddings.")
318
- except openllm.exceptions.OpenLLMException as e:
319
- error_message = f"OpenLLM API Embeddings Error: {str(e)}"
320
- ASCIIColors.error(error_message)
321
- raise Exception(error_message) from e
322
- except Exception as ex:
323
- trace_exception(ex)
324
- raise Exception(f"Embedding failed: {str(ex)}") from ex
325
-
326
- def get_model_info(self) -> dict:
327
- """Return information about the current OpenLLM model setup."""
328
- server_metadata = self._get_model_metadata_from_server()
329
- model_id_from_server = "unknown"
330
- if server_metadata and 'model_id' in server_metadata:
331
- model_id_from_server = server_metadata['model_id']
332
-
333
- # Try to determine vision support based on model name (very basic)
334
- supports_vision = False
335
- if self.model_name and any(vm_name in self.model_name.lower() for vm_name in ["llava", "bakllava", "vision"]):
336
- supports_vision = True
372
+ return embeddings
373
+
374
+ except Exception as e:
375
+ ASCIIColors.error(
376
+ f"Failed to generate embeddings using model '{embedding_model}': {e}"
377
+ )
378
+ trace_exception(e)
379
+ return []
337
380
 
381
+ def get_model_info(self) -> dict:
382
+ """Return basic information about the current binding configuration."""
338
383
  return {
339
384
  "name": self.binding_name,
340
- "version": openllm.__version__ if openllm else "unknown",
385
+ "version": pm.get_installed_version("openllm")
386
+ if pm.is_installed("openllm")
387
+ else "unknown",
341
388
  "host_address": self.host_address,
342
- "model_name": self.model_name or model_id_from_server, # Use self.model_name if set, else from server
343
- "supports_structured_output": False, # Generic OpenLLM text generation doesn't guarantee this
344
- "supports_vision": supports_vision # Highly dependent on the specific model served
389
+ "model_name": self.model_name,
390
+ "supports_structured_output": False,
391
+ "supports_vision": True, # Assuming vision support based on original code
345
392
  }
346
393
 
347
- def listModels(self) -> List[Dict[str, str]]:
348
- """
349
- Lists the model currently served by the connected OpenLLM instance.
350
- OpenLLM client connects to one model server at a time.
351
- """
352
- if not self.openllm_client:
353
- ASCIIColors.error("OpenLLM client not initialized. Cannot list models.")
354
- return []
355
-
356
- metadata = self._get_model_metadata_from_server()
357
- if metadata:
358
- return [{
359
- 'model_name': metadata.get('model_id', metadata.get('model_name', 'Unknown Model')), # Prefer model_id
360
- 'owned_by': metadata.get('backend', 'OpenLLM'), # Using backend as a proxy for owner/type
361
- # OpenLLM metadata doesn't typically include a creation/modification date for the model files themselves.
362
- 'created_datetime': None
363
- }]
364
- return []
365
-
366
394
  def load_model(self, model_name: str) -> bool:
367
- """
368
- For OpenLLM, this primarily sets the model_name for reference, as the
369
- model is already loaded by the server the client connects to.
370
- Optionally, it could re-initialize the client if host_address also changes,
371
- or verify the existing connection serves this model.
372
- Args:
373
- model_name (str): Name of the model (e.g., 'mistralai/Mistral-7B-Instruct-v0.1').
374
- This should match what the server at self.host_address is running.
375
- Returns:
376
- bool: True if model name is set and connection seems okay.
377
- """
395
+ """Select a model for subsequent calls."""
378
396
  self.model_name = model_name
379
- ASCIIColors.info(f"OpenLLM binding model_name set to: {model_name}.")
380
- ASCIIColors.info(f"Ensure OpenLLM server at {self.host_address} is running this model.")
381
-
382
- # Optionally, verify the connected server's model matches
383
- server_meta = self._get_model_metadata_from_server()
384
- if server_meta:
385
- current_server_model_id = server_meta.get('model_id', server_meta.get('model_name'))
386
- if current_server_model_id and model_name not in current_server_model_id : # Check if model_name is substring of actual ID
387
- ASCIIColors.warning(f"Warning: Requested model '{model_name}' may not match model '{current_server_model_id}' served at {self.host_address}.")
388
- else:
389
- ASCIIColors.green(f"Connected OpenLLM server model appears to be '{current_server_model_id}'.")
390
-
391
- return self._verify_connection()
392
-
393
-
394
- if __name__ == '__main__':
395
- global full_streamed_text
396
- ASCIIColors.yellow("Testing OpenLLMBinding...")
397
-
398
- # --- Configuration ---
399
- # Ensure an OpenLLM server is running. Example:
400
- # `openllm start mistralai/Mistral-7B-Instruct-v0.1`
401
- # or for embeddings: `openllm start baai/bge-small-en-v1.5`
402
- # or for vision (if you have a LLaVA model compatible with OpenLLM):
403
- # `openllm start llava-hf/llava-1.5-7b-hf` (You might need to convert/setup some vision models for OpenLLM)
404
-
405
- openllm_host = "http://localhost:3000"
406
- # This should match the model_id you started OpenLLM with
407
- test_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Example, change if your server runs a different model
408
- # test_model_name = "facebook/opt-125m" # A smaller model for quicker tests if available
409
-
410
- # For embedding test, you'd point to an OpenLLM server running an embedding model
411
- # openllm_embedding_host = "http://localhost:3001" # If running embedding model on different port
412
- # test_embedding_model_name = "baai/bge-small-en-v1.5"
413
-
414
- # For vision, if you have a LLaVA model running with OpenLLM
415
- # openllm_vision_host = "http://localhost:3002"
416
- # test_vision_model_name = "llava-hf/llava-1.5-7b-hf" # Example
417
-
418
- try:
419
- ASCIIColors.cyan("\n--- Initializing Binding for Text Generation ---")
420
- # Initialize with the host where your text generation model is running
421
- binding = OpenLLMBinding(host_address=openllm_host, model_name=test_model_name)
422
- ASCIIColors.green(f"Binding initialized successfully. Connected to model: {binding.model_name}")
423
- ASCIIColors.info(f"Using OpenLLM client version: {openllm.__version__ if openllm else 'N/A'}")
424
-
425
- ASCIIColors.cyan("\n--- Listing Model (should be the one connected) ---")
426
- models = binding.listModels()
427
- if models:
428
- ASCIIColors.green(f"Connected model info:")
429
- for m in models:
430
- print(m)
431
- else:
432
- ASCIIColors.warning("Failed to list model from server. Ensure OpenLLM server is running.")
433
-
434
- ASCIIColors.cyan(f"\n--- Setting model to (for info): {test_model_name} ---")
435
- binding.load_model(test_model_name) # This confirms the model name and checks connection
436
-
437
- ASCIIColors.cyan("\n--- Counting Tokens (using tiktoken fallback or API) ---")
438
- sample_text = "Hello, OpenLLM world! This is a test."
439
- token_count = binding.count_tokens(sample_text)
440
- ASCIIColors.green(f"Token count for '{sample_text}': {token_count} (may use tiktoken approximation)")
441
-
442
- ASCIIColors.cyan("\n--- Tokenize/Detokenize (using tiktoken fallback) ---")
443
- tokens = binding.tokenize(sample_text)
444
- ASCIIColors.green(f"Tokens (tiktoken): {tokens[:10]}...")
445
- detokenized_text = binding.detokenize(tokens)
446
- ASCIIColors.green(f"Detokenized text (tiktoken): {detokenized_text}")
447
-
448
- ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
449
- prompt_text = "Why is the sky blue?"
450
- system_prompt_text = "You are a helpful AI assistant providing concise answers."
451
- ASCIIColors.info(f"System Prompt: {system_prompt_text}")
452
- ASCIIColors.info(f"User Prompt: {prompt_text}")
453
- generated_text = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=50, stream=False)
454
- if isinstance(generated_text, str):
455
- ASCIIColors.green(f"Generated text: {generated_text}")
456
- else:
457
- ASCIIColors.error(f"Generation failed: {generated_text}")
458
-
459
- ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
460
- full_streamed_text = ""
461
- def stream_callback(chunk: str, msg_type: int):
462
- global full_streamed_text
463
- print(f"{ASCIIColors.GREEN}{chunk}{ASCIIColors.RESET}", end="", flush=True)
464
- full_streamed_text += chunk
465
- return True
466
-
467
- ASCIIColors.info(f"Prompt: {prompt_text}")
468
- result = binding.generate_text(prompt_text, system_prompt=system_prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
469
- print("\n--- End of Stream ---")
470
- if isinstance(result, str):
471
- ASCIIColors.green(f"Full streamed text: {result}")
472
- else:
473
- ASCIIColors.error(f"Streaming generation failed: {result}")
474
-
475
- # --- Embeddings Test ---
476
- # You need to run an OpenLLM server with an embedding model for this.
477
- # Example: `openllm start baai/bge-small-en-v1.5 --port 3001`
478
- # Then change openllm_host to "http://localhost:3001" for this section.
479
- ASCIIColors.cyan("\n--- Embeddings Test ---")
480
- ASCIIColors.magenta("INFO: This test requires an OpenLLM server running an EMBEDDING model (e.g., bge, E5).")
481
- ASCIIColors.magenta(f" If your server at {openllm_host} is a text generation model, this might fail.")
482
- embedding_text = "Lollms is a cool project using OpenLLM."
483
- try:
484
- # If your main binding is for text-gen, you might need a separate binding instance
485
- # for an embedding model if it's on a different host/port.
486
- # For this example, we'll try with the current binding.
487
- # If it fails, it means the model at openllm_host doesn't support /v1/embeddings
488
- embedding_vector = binding.embed(embedding_text)
489
- ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
490
- ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
491
- except Exception as e:
492
- ASCIIColors.warning(f"Could not get embedding with model '{binding.model_name}' at '{binding.host_address}': {e}")
493
- ASCIIColors.warning("Ensure the OpenLLM server is running an embedding-capable model and supports the /v1/embeddings endpoint.")
397
+ ASCIIColors.info(f"OpenLLM model set to: {model_name}")
398
+ return True
494
399
 
495
- # --- Vision Model Test ---
496
- ASCIIColors.cyan("\n--- Vision Model Test (Conceptual) ---")
497
- ASCIIColors.magenta("INFO: This test requires an OpenLLM server running a VISION model (e.g., LLaVA).")
498
- ASCIIColors.magenta(f" And the model needs to accept images as base64 in prompt. This is a basic test.")
499
-
500
- dummy_image_path = "dummy_test_image_openllm.png"
501
- try:
502
- from PIL import Image, ImageDraw
503
- img = Image.new('RGB', (200, 50), color = ('blue'))
504
- d = ImageDraw.Draw(img)
505
- d.text((10,10), "OpenLLM Test", fill=('white'))
506
- img.save(dummy_image_path)
507
- ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
508
-
509
- # Assuming your 'binding' is connected to a vision model server.
510
- # If not, you'd initialize a new binding pointing to your vision model server.
511
- # e.g., vision_binding = OpenLLMBinding(host_address=openllm_vision_host, model_name=test_vision_model_name)
512
-
513
- # Check if current model_name hints at vision
514
- if "llava" not in binding.model_name.lower() and "vision" not in binding.model_name.lower() :
515
- ASCIIColors.warning(f"Current model '{binding.model_name}' might not be a vision model. Vision test may not be meaningful.")
400
+ def ps(self):
401
+ """Placeholder OpenLLM does not expose a process‑list endpoint."""
402
+ return []
516
403
 
517
- vision_prompt = "What is written in the image and what color is the background?"
518
- ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
519
-
520
- vision_response = binding.generate_text(
521
- prompt=vision_prompt,
522
- images=[dummy_image_path], # The binding will attempt to base64 encode this
523
- n_predict=50,
524
- stream=False
525
- )
526
- if isinstance(vision_response, str):
527
- ASCIIColors.green(f"Vision model response: {vision_response}")
528
- else:
529
- ASCIIColors.error(f"Vision generation failed: {vision_response}")
530
- except ImportError:
531
- ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
532
- except Exception as e:
533
- ASCIIColors.error(f"Error during vision test: {e}")
534
- trace_exception(e)
535
- finally:
536
- import os
537
- if os.path.exists(dummy_image_path):
538
- os.remove(dummy_image_path)
539
-
540
- except ConnectionRefusedError:
541
- ASCIIColors.error(f"Connection to OpenLLM server at {openllm_host} refused. Is OpenLLM server running?")
542
- ASCIIColors.error("Example: `openllm start mistralai/Mistral-7B-Instruct-v0.1`")
543
- except openllm.exceptions.OpenLLMException as e:
544
- ASCIIColors.error(f"OpenLLM specific error: {e}")
545
- trace_exception(e)
546
- except Exception as e:
547
- ASCIIColors.error(f"An error occurred during testing: {e}")
548
- trace_exception(e)
549
-
550
- ASCIIColors.yellow("\nOpenLLMBinding test finished.")
404
+
405
+ # Ensure the class is treated as concrete (no remaining abstract methods)
406
+ OpenLLMBinding.__abstractmethods__ = set()