lollms-client 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,51 +1,62 @@
1
- # bindings/lollms/binding.py
1
+ # bindings/Lollms_chat/binding.py
2
2
  import requests
3
+ import json
3
4
  from lollms_client.lollms_llm_binding import LollmsLLMBinding
4
5
  from lollms_client.lollms_types import MSG_TYPE
5
6
  from lollms_client.lollms_utilities import encode_image
6
7
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
7
8
  from lollms_client.lollms_discussion import LollmsDiscussion
8
- from ascii_colors import ASCIIColors, trace_exception
9
9
  from typing import Optional, Callable, List, Union
10
- import json
10
+ from ascii_colors import ASCIIColors, trace_exception
11
+ from typing import List, Dict
12
+
13
+ import pipmaster as pm
14
+
15
+ pm.ensure_packages(["openai","tiktoken"])
11
16
 
12
- BindingName = "LollmsLLMBinding"
17
+ import openai
18
+ import tiktoken
19
+ import os
13
20
 
21
+ BindingName = "LollmsBinding"
14
22
 
15
- class LollmsLLMBinding(LollmsLLMBinding):
16
- """LOLLMS-specific binding implementation"""
23
+
24
+ class LollmsBinding(LollmsLLMBinding):
25
+ """Lollms-specific binding implementation (open ai compatible with some extra parameters)"""
17
26
 
18
- DEFAULT_HOST_ADDRESS = "http://localhost:9600"
19
27
 
20
- def __init__(self,
21
- host_address: str = None,
28
+ def __init__(self,
29
+ host_address: str = "http://localhost:9642", #This is the default local installation
22
30
  model_name: str = "",
23
- service_key: str = None,
31
+ service_key: str|None = None, # a key generated on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
24
32
  verify_ssl_certificate: bool = True,
25
- personality: Optional[int] = None,
26
- **kwargs
27
- ):
33
+ default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
34
+ **kwargs):
28
35
  """
29
- Initialize the LOLLMS binding.
36
+ Initialize the OpenAI binding.
30
37
 
31
38
  Args:
32
- host_address (str): Host address for the LOLLMS service. Defaults to DEFAULT_HOST_ADDRESS.
39
+ host_address (str): Host address for the OpenAI service. Defaults to DEFAULT_HOST_ADDRESS.
33
40
  model_name (str): Name of the model to use. Defaults to empty string.
34
- service_key (str): Authentication key for the service. Defaults to None.
41
+ service_key (str): Authentication key for the service. Defaults to None. This is a key generated
42
+ on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
35
43
  verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
36
- personality (Optional[int]): Personality ID for generation. Defaults to None.
44
+ personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
37
45
  """
38
46
  super().__init__(
39
- binding_name = "lollms"
47
+ binding_name = "openai",
40
48
  )
41
-
42
- self.host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
49
+ self.host_address=host_address
43
50
  self.model_name=model_name
44
51
  self.service_key=service_key
45
52
  self.verify_ssl_certificate=verify_ssl_certificate
46
- self.default_completion_format=kwargs.get("default_completion_format",ELF_COMPLETION_FORMAT.Chat)
47
- self.personality = personality
48
- self.model = None
53
+ self.default_completion_format=default_completion_format
54
+
55
+ if not self.service_key:
56
+ self.service_key = os.getenv("LOLLMS_API_KEY", self.service_key)
57
+ self.client = openai.OpenAI(api_key=self.service_key, base_url=None if host_address is None else host_address if len(host_address)>0 else None)
58
+ self.completion_format = ELF_COMPLETION_FORMAT.Chat
59
+
49
60
 
50
61
  def generate_text(self,
51
62
  prompt: str,
@@ -53,11 +64,11 @@ class LollmsLLMBinding(LollmsLLMBinding):
53
64
  system_prompt: str = "",
54
65
  n_predict: Optional[int] = None,
55
66
  stream: Optional[bool] = None,
56
- temperature: Optional[float] = None,
57
- top_k: Optional[int] = None,
58
- top_p: Optional[float] = None,
59
- repeat_penalty: Optional[float] = None,
60
- repeat_last_n: Optional[int] = None,
67
+ temperature: float = 0.7,
68
+ top_k: int = 40,
69
+ top_p: float = 0.9,
70
+ repeat_penalty: float = 1.1,
71
+ repeat_last_n: int = 64,
61
72
  seed: Optional[int] = None,
62
73
  n_threads: Optional[int] = None,
63
74
  ctx_size: int | None = None,
@@ -92,242 +103,330 @@ class LollmsLLMBinding(LollmsLLMBinding):
92
103
  Returns:
93
104
  Union[str, dict]: Generated text or error dictionary if failed.
94
105
  """
95
- # Determine endpoint based on presence of images
96
- endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
97
- url = f"{self.host_address}{endpoint}"
98
-
99
- # Set headers
100
- headers = {
101
- 'Content-Type': 'application/json',
102
- }
103
- if self.service_key:
104
- headers['Authorization'] = f'Bearer {self.service_key}'
106
+ count = 0
107
+ output = ""
108
+ messages = [
109
+ {
110
+ "role": "system",
111
+ "content": system_prompt or "You are a helpful assistant.",
112
+ }
113
+ ]
105
114
 
106
- # Handle images if provided
107
- image_data = []
115
+ # Prepare messages based on whether images are provided
108
116
  if images:
109
- for image_path in images:
110
- try:
111
- encoded_image = encode_image(image_path)
112
- image_data.append(encoded_image)
113
- except Exception as e:
114
- return {"status": False, "error": f"Failed to process image {image_path}: {str(e)}"}
115
-
116
- # Prepare request data
117
- data = {
118
- "prompt":"!@>system: "+system_prompt+"\n"+"!@>user: "+prompt if system_prompt else prompt,
119
- "model_name": self.model_name,
120
- "personality": self.personality,
121
- "n_predict": n_predict,
122
- "stream": stream,
117
+ if split:
118
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
119
+ if images:
120
+ messages[-1]["content"] = [
121
+ {
122
+ "type": "text",
123
+ "text": messages[-1]["content"]
124
+ }
125
+ ]+[
126
+ {
127
+ "type": "image_url",
128
+ "image_url": {
129
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
130
+ }
131
+ }
132
+ for image_path in images
133
+ ]
134
+ else:
135
+ messages.append({
136
+ 'role': 'user',
137
+ 'content': [
138
+ {
139
+ "type": "text",
140
+ "text": prompt
141
+ }
142
+ ] + [
143
+ {
144
+ "type": "image_url",
145
+ "image_url": {
146
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
147
+ }
148
+ }
149
+ for image_path in images
150
+ ]
151
+ }
152
+ )
153
+
154
+ else:
155
+
156
+ if split:
157
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
158
+ if images:
159
+ messages[-1]["content"] = [
160
+ {
161
+ "type": "text",
162
+ "text": messages[-1]["content"]
163
+ }
164
+ ]
165
+ else:
166
+ messages.append({
167
+ 'role': 'user',
168
+ 'content': [
169
+ {
170
+ "type": "text",
171
+ "text": prompt
172
+ }
173
+ ]
174
+ }
175
+ )
176
+
177
+ # Generate text using the OpenAI API
178
+ if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
179
+ chat_completion = self.client.chat.completions.create(
180
+ model=self.model_name, # Choose the engine according to your OpenAI plan
181
+ messages=messages,
182
+ max_tokens=n_predict, # Adjust the desired length of the generated response
183
+ n=1, # Specify the number of responses you want
184
+ temperature=temperature, # Adjust the temperature for more or less randomness in the output
185
+ stream=stream
186
+ )
187
+
188
+ if stream:
189
+ for resp in chat_completion:
190
+ if count >= n_predict:
191
+ break
192
+ try:
193
+ word = resp.choices[0].delta.content
194
+ except Exception as ex:
195
+ word = ""
196
+ if streaming_callback is not None:
197
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
198
+ break
199
+ if word:
200
+ output += word
201
+ count += 1
202
+ else:
203
+ output = chat_completion.choices[0].message.content
204
+ else:
205
+ completion = self.client.completions.create(
206
+ model=self.model_name, # Choose the engine according to your OpenAI plan
207
+ prompt=prompt,
208
+ max_tokens=n_predict, # Adjust the desired length of the generated response
209
+ n=1, # Specify the number of responses you want
210
+ temperature=temperature, # Adjust the temperature for more or less randomness in the output
211
+ stream=stream
212
+ )
213
+
214
+ if stream:
215
+ for resp in completion:
216
+ if count >= n_predict:
217
+ break
218
+ try:
219
+ word = resp.choices[0].text
220
+ except Exception as ex:
221
+ word = ""
222
+ if streaming_callback is not None:
223
+ if not streaming_callback(word, "MSG_TYPE_CHUNK"):
224
+ break
225
+ if word:
226
+ output += word
227
+ count += 1
228
+ else:
229
+ output = completion.choices[0].text
230
+
231
+ return output
232
+
233
+ def generate_from_messages(self,
234
+ messages: List[Dict],
235
+ n_predict: Optional[int] = None,
236
+ stream: Optional[bool] = None,
237
+ temperature: Optional[float] = None,
238
+ top_k: Optional[int] = None,
239
+ top_p: Optional[float] = None,
240
+ repeat_penalty: Optional[float] = None,
241
+ repeat_last_n: Optional[int] = None,
242
+ seed: Optional[int] = None,
243
+ n_threads: Optional[int] = None,
244
+ ctx_size: int | None = None,
245
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
246
+ **kwargs
247
+ ) -> Union[str, dict]:
248
+ # Build the request parameters
249
+ params = {
250
+ "model": self.model_name,
251
+ "messages": messages,
252
+ "max_tokens": n_predict,
253
+ "n": 1,
123
254
  "temperature": temperature,
124
- "top_k": top_k,
125
255
  "top_p": top_p,
126
- "repeat_penalty": repeat_penalty,
127
- "repeat_last_n": repeat_last_n,
128
- "seed": seed,
129
- "n_threads": n_threads
256
+ "frequency_penalty": repeat_penalty,
257
+ "stream": stream
130
258
  }
259
+ # Add seed if available, as it's supported by newer OpenAI models
260
+ if seed is not None:
261
+ params["seed"] = seed
262
+
263
+ # Remove None values, as the API expects them to be absent
264
+ params = {k: v for k, v in params.items() if v is not None}
131
265
 
132
- if image_data:
133
- data["images"] = image_data
134
-
135
- # Make the request
136
- response = requests.post(
137
- url,
138
- json=data,
139
- headers=headers,
140
- stream=stream,
141
- verify=self.verify_ssl_certificate
142
- )
143
-
144
- if not stream:
145
- if response.status_code == 200:
146
- try:
147
- text = response.text.strip()
148
- return text
149
- except Exception as ex:
150
- return {"status": False, "error": str(ex)}
151
- else:
152
- return {"status": False, "error": response.text}
153
- else:
154
- text = ""
155
- if response.status_code == 200:
156
- try:
157
- for line in response.iter_lines():
158
- chunk = line.decode("utf-8")
159
- text += chunk
160
- if streaming_callback:
161
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
162
- # Handle potential quotes from streaming response
163
- if text and text[0] == '"':
164
- text = text[1:]
165
- if text and text[-1] == '"':
166
- text = text[:-1]
167
- return text.rstrip('!')
168
- except Exception as ex:
169
- return {"status": False, "error": str(ex)}
266
+ output = ""
267
+ # 2. Call the API
268
+ try:
269
+ completion = self.client.chat.completions.create(**params)
270
+
271
+ if stream:
272
+ for chunk in completion:
273
+ # The streaming response for chat has a different structure
274
+ delta = chunk.choices[0].delta
275
+ if delta.content:
276
+ word = delta.content
277
+ if streaming_callback is not None:
278
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
279
+ break
280
+ output += word
170
281
  else:
171
- return {"status": False, "error": response.text}
282
+ output = completion.choices[0].message.content
283
+
284
+ except Exception as e:
285
+ # Handle API errors gracefully
286
+ error_message = f"An error occurred with the OpenAI API: {e}"
287
+ if streaming_callback:
288
+ streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
289
+ return {"status": "error", "message": error_message}
290
+
291
+ return output
292
+
172
293
  def chat(self,
173
294
  discussion: LollmsDiscussion,
174
295
  branch_tip_id: Optional[str] = None,
175
296
  n_predict: Optional[int] = None,
176
297
  stream: Optional[bool] = None,
177
- temperature: Optional[float] = None,
178
- top_k: Optional[int] = None,
179
- top_p: Optional[float] = None,
180
- repeat_penalty: Optional[float] = None,
181
- repeat_last_n: Optional[int] = None,
298
+ temperature: float = 0.7,
299
+ top_k: int = 40,
300
+ top_p: float = 0.9,
301
+ repeat_penalty: float = 1.1,
302
+ repeat_last_n: int = 64,
182
303
  seed: Optional[int] = None,
183
304
  n_threads: Optional[int] = None,
184
- ctx_size: int | None = None,
305
+ ctx_size: Optional[int] = None,
185
306
  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
186
307
  ) -> Union[str, dict]:
187
308
  """
188
- Conduct a chat session with a lollms-webui server using a LollmsDiscussion object.
309
+ Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
189
310
 
190
311
  Args:
191
312
  discussion (LollmsDiscussion): The discussion object containing the conversation history.
192
313
  branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
193
- ... (other parameters) ...
314
+ n_predict (Optional[int]): Maximum number of tokens to generate.
315
+ stream (Optional[bool]): Whether to stream the output.
316
+ temperature (float): Sampling temperature.
317
+ top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
318
+ top_p (float): Top-p sampling parameter.
319
+ repeat_penalty (float): Frequency penalty for repeated tokens.
320
+ seed (Optional[int]): Random seed for generation.
321
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
194
322
 
195
323
  Returns:
196
324
  Union[str, dict]: The generated text or an error dictionary.
197
325
  """
198
- # 1. Export the discussion to the lollms-native text format
199
- prompt_text = discussion.export("lollms_text", branch_tip_id)
200
-
201
- # 2. Extract images from the LAST message of the branch
202
- # lollms-webui's endpoint associates images with the final prompt
203
- active_branch_id = branch_tip_id or discussion.active_branch_id
204
- branch = discussion.get_branch(active_branch_id)
205
- last_message = branch[-1] if branch else None
206
-
207
- image_data = []
208
- if last_message and last_message.images:
209
- # The endpoint expects a list of base64 strings.
210
- # We will only process images of type 'base64'. URL types are not supported by this endpoint.
211
- for img in last_message.images:
212
- if img['type'] == 'base64':
213
- image_data.append(img['data'])
214
- # Note: 'url' type images are ignored for this binding.
215
-
216
- # 3. Determine endpoint and build payload
217
- endpoint = "/lollms_generate_with_images" if image_data else "/lollms_generate"
218
- url = f"{self.host_address}{endpoint}"
219
-
220
- headers = {'Content-Type': 'application/json'}
221
- if self.service_key:
222
- headers['Authorization'] = f'Bearer {self.service_key}'
223
-
224
- data = {
225
- "prompt": prompt_text,
226
- "model_name": self.model_name,
227
- "personality": self.personality,
228
- "n_predict": n_predict,
229
- "stream": stream,
326
+ # 1. Export the discussion to the OpenAI chat format
327
+ # This handles system prompts, user/assistant roles, and multi-modal content automatically.
328
+ messages = discussion.export("openai_chat", branch_tip_id)
329
+
330
+ # Build the request parameters
331
+ params = {
332
+ "model": self.model_name,
333
+ "messages": messages,
334
+ "max_tokens": n_predict,
335
+ "n": 1,
230
336
  "temperature": temperature,
231
- "top_k": top_k,
232
337
  "top_p": top_p,
233
- "repeat_penalty": repeat_penalty,
234
- "repeat_last_n": repeat_last_n,
235
- "seed": seed,
236
- "n_threads": n_threads
338
+ "frequency_penalty": repeat_penalty,
339
+ "stream": stream
237
340
  }
238
- if image_data:
239
- data["images"] = image_data
341
+ # Add seed if available, as it's supported by newer OpenAI models
342
+ if seed is not None:
343
+ params["seed"] = seed
240
344
 
241
- # 4. Make the request (logic copied and adapted from generate_text)
345
+ # Remove None values, as the API expects them to be absent
346
+ params = {k: v for k, v in params.items() if v is not None}
347
+
348
+ output = ""
349
+ # 2. Call the API
242
350
  try:
243
- response = requests.post(
244
- url,
245
- json=data,
246
- headers=headers,
247
- stream=stream,
248
- verify=self.verify_ssl_certificate
249
- )
250
- response.raise_for_status() # Raise an exception for bad status codes
251
-
252
- if not stream:
253
- return response.text.strip()
254
- else:
255
- full_response_text = ""
256
- for line in response.iter_lines():
257
- if line:
258
- chunk = line.decode("utf-8")
259
- full_response_text += chunk
260
- if streaming_callback:
261
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
351
+ # Check if we should use the chat completions or legacy completions endpoint
352
+ if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
353
+ completion = self.client.chat.completions.create(**params)
354
+
355
+ if stream:
356
+ for chunk in completion:
357
+ # The streaming response for chat has a different structure
358
+ delta = chunk.choices[0].delta
359
+ if delta.content:
360
+ word = delta.content
361
+ if streaming_callback is not None:
362
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
363
+ break
364
+ output += word
365
+ else:
366
+ output = completion.choices[0].message.content
367
+
368
+ else: # Fallback to legacy completion format (not recommended for chat)
369
+ # We need to format the messages list into a single string prompt
370
+ legacy_prompt = discussion.export("openai_completion", branch_tip_id)
371
+ legacy_params = {
372
+ "model": self.model_name,
373
+ "prompt": legacy_prompt,
374
+ "max_tokens": n_predict,
375
+ "n": 1,
376
+ "temperature": temperature,
377
+ "top_p": top_p,
378
+ "frequency_penalty": repeat_penalty,
379
+ "stream": stream
380
+ }
381
+ completion = self.client.completions.create(**legacy_params)
382
+
383
+ if stream:
384
+ for chunk in completion:
385
+ word = chunk.choices[0].text
386
+ if streaming_callback is not None:
387
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
262
388
  break
263
- # Clean up potential quotes from some streaming formats
264
- if full_response_text.startswith('"') and full_response_text.endswith('"'):
265
- full_response_text = full_response_text[1:-1]
266
- return full_response_text.rstrip('!')
267
-
268
- except requests.exceptions.RequestException as e:
269
- error_message = f"lollms-webui request error: {e}"
389
+ output += word
390
+ else:
391
+ output = completion.choices[0].text
392
+
393
+ except Exception as e:
394
+ # Handle API errors gracefully
395
+ error_message = f"An error occurred with the OpenAI API: {e}"
396
+ if streaming_callback:
397
+ streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
270
398
  return {"status": "error", "message": error_message}
271
- except Exception as ex:
272
- error_message = f"lollms-webui generation error: {str(ex)}"
273
- return {"status": "error", "message": error_message}
399
+
400
+ return output
274
401
  def tokenize(self, text: str) -> list:
275
402
  """
276
- Tokenize the input text into a list of tokens using the /lollms_tokenize endpoint.
403
+ Tokenize the input text into a list of characters.
277
404
 
278
405
  Args:
279
406
  text (str): The text to tokenize.
280
407
 
281
408
  Returns:
282
- list: List of tokens.
409
+ list: List of individual characters.
283
410
  """
284
- response=None
285
411
  try:
286
- # Prepare the request payload
287
- payload = {
288
- "prompt": text,
289
- "return_named": False # Set to True if you want named tokens
290
- }
291
-
292
- # Make the POST request to the /lollms_tokenize endpoint
293
- response = requests.post(f"{self.host_address}/lollms_tokenize", json=payload)
412
+ return tiktoken.model.encoding_for_model(self.model_name).encode(text)
413
+ except:
414
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
294
415
 
295
- # Check if the request was successful
296
- if response.status_code == 200:
297
- return response.json()
298
- else:
299
- raise Exception(f"Failed to tokenize text: {response.text}")
300
- except Exception as ex:
301
- trace_exception(ex)
302
- raise Exception(f"Failed to tokenize text: {response.text}")
303
-
304
416
  def detokenize(self, tokens: list) -> str:
305
417
  """
306
- Convert a list of tokens back to text using the /lollms_detokenize endpoint.
418
+ Convert a list of tokens back to text.
307
419
 
308
420
  Args:
309
- tokens (list): List of tokens to detokenize.
421
+ tokens (list): List of tokens (characters) to detokenize.
310
422
 
311
423
  Returns:
312
424
  str: Detokenized text.
313
425
  """
314
426
  try:
315
- # Prepare the request payload
316
- payload = {
317
- "tokens": tokens,
318
- "return_named": False # Set to True if you want named tokens
319
- }
320
-
321
- # Make the POST request to the /lollms_detokenize endpoint
322
- response = requests.post(f"{self.host_address}/lollms_detokenize", json=payload)
323
-
324
- # Check if the request was successful
325
- if response.status_code == 200:
326
- return response.json()
327
- else:
328
- raise Exception(f"Failed to detokenize tokens: {response.text}")
329
- except Exception as ex:
330
- return {"status": False, "error": str(ex)}
427
+ return tiktoken.model.encoding_for_model(self.model_name).decode(tokens)
428
+ except:
429
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
331
430
 
332
431
  def count_tokens(self, text: str) -> int:
333
432
  """
@@ -340,66 +439,127 @@ class LollmsLLMBinding(LollmsLLMBinding):
340
439
  int: Number of tokens in text.
341
440
  """
342
441
  return len(self.tokenize(text))
343
-
442
+
443
+
344
444
  def embed(self, text: str, **kwargs) -> list:
345
445
  """
346
- Get embeddings for the input text using Ollama API
347
-
446
+ Get embeddings for the input text using OpenAI API.
447
+
348
448
  Args:
349
- text (str or List[str]): Input text to embed
350
- **kwargs: Additional arguments like model, truncate, options, keep_alive
351
-
449
+ text (str): Input text to embed.
450
+ **kwargs: Additional arguments. The 'model' argument can be used
451
+ to specify the embedding model (e.g., "text-embedding-3-small").
452
+ Defaults to "text-embedding-ada-002".
453
+
352
454
  Returns:
353
- dict: Response containing embeddings
455
+ list: The embedding vector as a list of floats, or an empty list on failure.
354
456
  """
355
- api_key = kwargs.pop("api_key", None)
356
- headers = (
357
- {"Content-Type": "application/json", "Authorization": api_key}
358
- if api_key
359
- else {"Content-Type": "application/json"}
360
- )
361
- embeddings = []
362
- request_data = {"text": text}
363
- response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
364
- response.raise_for_status()
365
- result = response.json()
366
- return result["vector"]
457
+ # Determine the embedding model, prioritizing kwargs, with a default
458
+ embedding_model = kwargs.get("model", self.model_name)
459
+
460
+ try:
461
+ # The OpenAI API expects the input to be a list of strings
462
+ response = self.client.embeddings.create(
463
+ model=embedding_model,
464
+ input=[text] # Wrap the single text string in a list
465
+ )
466
+
467
+ # Extract the embedding from the response
468
+ if response.data and len(response.data) > 0:
469
+ return response.data[0].embedding
470
+ else:
471
+ ASCIIColors.warning("OpenAI API returned no data for the embedding request.")
472
+ return []
473
+
474
+ except Exception as e:
475
+ ASCIIColors.error(f"Failed to generate embeddings using OpenAI API: {e}")
476
+ trace_exception(e)
477
+ return []
478
+
367
479
 
368
480
  def get_model_info(self) -> dict:
369
481
  """
370
- Return information about the current LOLLMS model.
482
+ Return information about the current OpenAI model.
371
483
 
372
484
  Returns:
373
- dict: Dictionary containing model name, version, host address, and personality.
485
+ dict: Dictionary containing model name, version, and host address.
374
486
  """
375
487
  return {
376
- "name": "lollms",
377
- "version": "1.0",
488
+ "name": "OpenAI",
489
+ "version": "2.0",
378
490
  "host_address": self.host_address,
379
- "model_name": self.model_name,
380
- "personality": self.personality
491
+ "model_name": self.model_name
381
492
  }
382
493
 
494
+ def listModels(self) -> List[Dict]:
495
+ # Known context lengths
496
+ known_context_lengths = {
497
+ "gpt-4o": 128000,
498
+ "gpt-4": 8192,
499
+ "gpt-4-0613": 8192,
500
+ "gpt-4-1106-preview": 128000,
501
+ "gpt-4-0125-preview": 128000,
502
+ "gpt-4-turbo": 128000,
503
+ "gpt-3.5-turbo": 4096,
504
+ "gpt-3.5-turbo-16k": 16000,
505
+ "gpt-3.5-turbo-1106": 16385,
506
+ "gpt-3.5-turbo-0125": 16385,
507
+ "text-davinci-003": 4097,
508
+ "text-davinci-002": 4097,
509
+ "davinci": 2049,
510
+ "curie": 2049,
511
+ "babbage": 2049,
512
+ "ada": 2049,
513
+ }
383
514
 
384
- def listModels(self) -> dict:
385
- """Lists models"""
386
- url = f"{self.host_address}/list_models"
515
+ generation_prefixes = (
516
+ "gpt-",
517
+ "text-davinci",
518
+ "davinci",
519
+ "curie",
520
+ "babbage",
521
+ "ada"
522
+ )
387
523
 
388
- response = requests.get(url)
524
+ models_info = []
525
+ prompt_buffer = 500
389
526
 
390
- if response.status_code == 200:
391
- try:
392
- models = json.loads(response.content.decode("utf-8"))
393
- return [{"model_name":m} for m in models]
394
- except Exception as ex:
395
- return {"status": False, "error": str(ex)}
396
- else:
397
- return {"status": False, "error": response.text}
527
+ try:
528
+ models = self.client.models.list()
529
+ for model in models.data:
530
+ model_id = model.id
531
+ if model_id.startswith(generation_prefixes):
532
+ context_length = known_context_lengths.get(model_id, "unknown")
533
+ max_generation = (
534
+ context_length - prompt_buffer
535
+ if isinstance(context_length, int)
536
+ else "unknown"
537
+ )
538
+ models_info.append({
539
+ "model_name": model_id,
540
+ "owned_by": getattr(model, "owned_by", "N/A"),
541
+ "created": getattr(model, "created", "N/A"),
542
+ "context_length": context_length,
543
+ "max_generation": max_generation,
544
+ })
545
+ else:
546
+ models_info.append({
547
+ "model_name": model_id,
548
+ "owned_by": getattr(model, "owned_by", "N/A"),
549
+ "created": getattr(model, "created", "N/A"),
550
+ "context_length": None,
551
+ "max_generation": None,
552
+ })
553
+
554
+ except Exception as e:
555
+ print(f"Failed to list models: {e}")
556
+
557
+ return models_info
398
558
 
399
559
 
400
560
  def load_model(self, model_name: str) -> bool:
401
561
  """
402
- Load a specific model into the LOLLMS binding.
562
+ Load a specific model into the OpenAI binding.
403
563
 
404
564
  Args:
405
565
  model_name (str): Name of the model to load.
@@ -410,19 +570,3 @@ class LollmsLLMBinding(LollmsLLMBinding):
410
570
  self.model = model_name
411
571
  self.model_name = model_name
412
572
  return True
413
-
414
- # Lollms specific methods
415
- def lollms_listMountedPersonalities(self, host_address:str=None):
416
- host_address = host_address if host_address else self.host_address
417
- url = f"{host_address}/list_mounted_personalities"
418
-
419
- response = requests.get(url)
420
-
421
- if response.status_code == 200:
422
- try:
423
- text = json.loads(response.content.decode("utf-8"))
424
- return text
425
- except Exception as ex:
426
- return {"status": False, "error": str(ex)}
427
- else:
428
- return {"status": False, "error": response.text}