lollms-client 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/text_gen.py +1 -1
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +1 -0
- lollms_client/llm_bindings/lollms/__init__.py +411 -267
- lollms_client/llm_bindings/lollms_webui/__init__.py +428 -0
- lollms_client/lollms_core.py +151 -124
- lollms_client/lollms_discussion.py +262 -38
- lollms_client/lollms_utilities.py +10 -2
- {lollms_client-0.29.0.dist-info → lollms_client-0.29.1.dist-info}/METADATA +248 -47
- {lollms_client-0.29.0.dist-info → lollms_client-0.29.1.dist-info}/RECORD +13 -13
- lollms_client/llm_bindings/lollms_chat/__init__.py +0 -571
- {lollms_client-0.29.0.dist-info → lollms_client-0.29.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.29.0.dist-info → lollms_client-0.29.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.29.0.dist-info → lollms_client-0.29.1.dist-info}/top_level.txt +0 -0
|
@@ -1,571 +0,0 @@
|
|
|
1
|
-
# bindings/Lollms_chat/binding.py
|
|
2
|
-
import requests
|
|
3
|
-
import json
|
|
4
|
-
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
5
|
-
from lollms_client.lollms_types import MSG_TYPE
|
|
6
|
-
from lollms_client.lollms_utilities import encode_image
|
|
7
|
-
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
8
|
-
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
9
|
-
from typing import Optional, Callable, List, Union
|
|
10
|
-
from ascii_colors import ASCIIColors, trace_exception
|
|
11
|
-
from typing import List, Dict
|
|
12
|
-
|
|
13
|
-
import pipmaster as pm
|
|
14
|
-
|
|
15
|
-
pm.ensure_packages(["openai","tiktoken"])
|
|
16
|
-
|
|
17
|
-
import openai
|
|
18
|
-
import tiktoken
|
|
19
|
-
import os
|
|
20
|
-
|
|
21
|
-
BindingName = "LollmsChatBinding"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class LollmsChatBinding(LollmsLLMBinding):
|
|
25
|
-
"""LollmsChat-specific binding implementation (open ai compatible with some extra parameters)"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def __init__(self,
|
|
29
|
-
host_address: str = None,
|
|
30
|
-
model_name: str = "",
|
|
31
|
-
service_key: str = None,
|
|
32
|
-
verify_ssl_certificate: bool = True,
|
|
33
|
-
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
|
|
34
|
-
**kwargs):
|
|
35
|
-
"""
|
|
36
|
-
Initialize the OpenAI binding.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
host_address (str): Host address for the OpenAI service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
40
|
-
model_name (str): Name of the model to use. Defaults to empty string.
|
|
41
|
-
service_key (str): Authentication key for the service. Defaults to None.
|
|
42
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
43
|
-
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
44
|
-
"""
|
|
45
|
-
super().__init__(
|
|
46
|
-
binding_name = "openai",
|
|
47
|
-
)
|
|
48
|
-
self.host_address=host_address
|
|
49
|
-
self.model_name=model_name
|
|
50
|
-
self.service_key=service_key
|
|
51
|
-
self.verify_ssl_certificate=verify_ssl_certificate
|
|
52
|
-
self.default_completion_format=default_completion_format
|
|
53
|
-
|
|
54
|
-
if not self.service_key:
|
|
55
|
-
self.service_key = os.getenv("OPENAI_API_KEY", self.service_key)
|
|
56
|
-
self.client = openai.OpenAI(api_key=self.service_key, base_url=None if host_address is None else host_address if len(host_address)>0 else None)
|
|
57
|
-
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def generate_text(self,
|
|
61
|
-
prompt: str,
|
|
62
|
-
images: Optional[List[str]] = None,
|
|
63
|
-
system_prompt: str = "",
|
|
64
|
-
n_predict: Optional[int] = None,
|
|
65
|
-
stream: Optional[bool] = None,
|
|
66
|
-
temperature: float = 0.7,
|
|
67
|
-
top_k: int = 40,
|
|
68
|
-
top_p: float = 0.9,
|
|
69
|
-
repeat_penalty: float = 1.1,
|
|
70
|
-
repeat_last_n: int = 64,
|
|
71
|
-
seed: Optional[int] = None,
|
|
72
|
-
n_threads: Optional[int] = None,
|
|
73
|
-
ctx_size: int | None = None,
|
|
74
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
75
|
-
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
76
|
-
user_keyword:Optional[str]="!@>user:",
|
|
77
|
-
ai_keyword:Optional[str]="!@>assistant:",
|
|
78
|
-
) -> Union[str, dict]:
|
|
79
|
-
"""
|
|
80
|
-
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
prompt (str): The input prompt for text generation.
|
|
84
|
-
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
85
|
-
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
86
|
-
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
87
|
-
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
88
|
-
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
89
|
-
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
90
|
-
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
91
|
-
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
92
|
-
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
93
|
-
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
94
|
-
ctx_size (int | None): Context size override for this generation.
|
|
95
|
-
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
96
|
-
- First parameter (str): The chunk of text received.
|
|
97
|
-
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
98
|
-
split:Optional[bool]: put to true if the prompt is a discussion
|
|
99
|
-
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
100
|
-
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
101
|
-
|
|
102
|
-
Returns:
|
|
103
|
-
Union[str, dict]: Generated text or error dictionary if failed.
|
|
104
|
-
"""
|
|
105
|
-
count = 0
|
|
106
|
-
output = ""
|
|
107
|
-
messages = [
|
|
108
|
-
{
|
|
109
|
-
"role": "system",
|
|
110
|
-
"content": system_prompt or "You are a helpful assistant.",
|
|
111
|
-
}
|
|
112
|
-
]
|
|
113
|
-
|
|
114
|
-
# Prepare messages based on whether images are provided
|
|
115
|
-
if images:
|
|
116
|
-
if split:
|
|
117
|
-
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
118
|
-
if images:
|
|
119
|
-
messages[-1]["content"] = [
|
|
120
|
-
{
|
|
121
|
-
"type": "text",
|
|
122
|
-
"text": messages[-1]["content"]
|
|
123
|
-
}
|
|
124
|
-
]+[
|
|
125
|
-
{
|
|
126
|
-
"type": "image_url",
|
|
127
|
-
"image_url": {
|
|
128
|
-
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
for image_path in images
|
|
132
|
-
]
|
|
133
|
-
else:
|
|
134
|
-
messages.append({
|
|
135
|
-
'role': 'user',
|
|
136
|
-
'content': [
|
|
137
|
-
{
|
|
138
|
-
"type": "text",
|
|
139
|
-
"text": prompt
|
|
140
|
-
}
|
|
141
|
-
] + [
|
|
142
|
-
{
|
|
143
|
-
"type": "image_url",
|
|
144
|
-
"image_url": {
|
|
145
|
-
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
for image_path in images
|
|
149
|
-
]
|
|
150
|
-
}
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
else:
|
|
154
|
-
|
|
155
|
-
if split:
|
|
156
|
-
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
157
|
-
if images:
|
|
158
|
-
messages[-1]["content"] = [
|
|
159
|
-
{
|
|
160
|
-
"type": "text",
|
|
161
|
-
"text": messages[-1]["content"]
|
|
162
|
-
}
|
|
163
|
-
]
|
|
164
|
-
else:
|
|
165
|
-
messages.append({
|
|
166
|
-
'role': 'user',
|
|
167
|
-
'content': [
|
|
168
|
-
{
|
|
169
|
-
"type": "text",
|
|
170
|
-
"text": prompt
|
|
171
|
-
}
|
|
172
|
-
]
|
|
173
|
-
}
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
# Generate text using the OpenAI API
|
|
177
|
-
if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
178
|
-
chat_completion = self.client.chat.completions.create(
|
|
179
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
180
|
-
messages=messages,
|
|
181
|
-
max_tokens=n_predict, # Adjust the desired length of the generated response
|
|
182
|
-
n=1, # Specify the number of responses you want
|
|
183
|
-
temperature=temperature, # Adjust the temperature for more or less randomness in the output
|
|
184
|
-
stream=stream
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
if stream:
|
|
188
|
-
for resp in chat_completion:
|
|
189
|
-
if count >= n_predict:
|
|
190
|
-
break
|
|
191
|
-
try:
|
|
192
|
-
word = resp.choices[0].delta.content
|
|
193
|
-
except Exception as ex:
|
|
194
|
-
word = ""
|
|
195
|
-
if streaming_callback is not None:
|
|
196
|
-
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
197
|
-
break
|
|
198
|
-
if word:
|
|
199
|
-
output += word
|
|
200
|
-
count += 1
|
|
201
|
-
else:
|
|
202
|
-
output = chat_completion.choices[0].message.content
|
|
203
|
-
else:
|
|
204
|
-
completion = self.client.completions.create(
|
|
205
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
206
|
-
prompt=prompt,
|
|
207
|
-
max_tokens=n_predict, # Adjust the desired length of the generated response
|
|
208
|
-
n=1, # Specify the number of responses you want
|
|
209
|
-
temperature=temperature, # Adjust the temperature for more or less randomness in the output
|
|
210
|
-
stream=stream
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
if stream:
|
|
214
|
-
for resp in completion:
|
|
215
|
-
if count >= n_predict:
|
|
216
|
-
break
|
|
217
|
-
try:
|
|
218
|
-
word = resp.choices[0].text
|
|
219
|
-
except Exception as ex:
|
|
220
|
-
word = ""
|
|
221
|
-
if streaming_callback is not None:
|
|
222
|
-
if not streaming_callback(word, "MSG_TYPE_CHUNK"):
|
|
223
|
-
break
|
|
224
|
-
if word:
|
|
225
|
-
output += word
|
|
226
|
-
count += 1
|
|
227
|
-
else:
|
|
228
|
-
output = completion.choices[0].text
|
|
229
|
-
|
|
230
|
-
return output
|
|
231
|
-
|
|
232
|
-
def generate_from_messages(self,
|
|
233
|
-
messages: List[Dict],
|
|
234
|
-
n_predict: Optional[int] = None,
|
|
235
|
-
stream: Optional[bool] = None,
|
|
236
|
-
temperature: Optional[float] = None,
|
|
237
|
-
top_k: Optional[int] = None,
|
|
238
|
-
top_p: Optional[float] = None,
|
|
239
|
-
repeat_penalty: Optional[float] = None,
|
|
240
|
-
repeat_last_n: Optional[int] = None,
|
|
241
|
-
seed: Optional[int] = None,
|
|
242
|
-
n_threads: Optional[int] = None,
|
|
243
|
-
ctx_size: int | None = None,
|
|
244
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
245
|
-
**kwargs
|
|
246
|
-
) -> Union[str, dict]:
|
|
247
|
-
# Build the request parameters
|
|
248
|
-
params = {
|
|
249
|
-
"model": self.model_name,
|
|
250
|
-
"messages": messages,
|
|
251
|
-
"max_tokens": n_predict,
|
|
252
|
-
"n": 1,
|
|
253
|
-
"temperature": temperature,
|
|
254
|
-
"top_p": top_p,
|
|
255
|
-
"frequency_penalty": repeat_penalty,
|
|
256
|
-
"stream": stream
|
|
257
|
-
}
|
|
258
|
-
# Add seed if available, as it's supported by newer OpenAI models
|
|
259
|
-
if seed is not None:
|
|
260
|
-
params["seed"] = seed
|
|
261
|
-
|
|
262
|
-
# Remove None values, as the API expects them to be absent
|
|
263
|
-
params = {k: v for k, v in params.items() if v is not None}
|
|
264
|
-
|
|
265
|
-
output = ""
|
|
266
|
-
# 2. Call the API
|
|
267
|
-
try:
|
|
268
|
-
completion = self.client.chat.completions.create(**params)
|
|
269
|
-
|
|
270
|
-
if stream:
|
|
271
|
-
for chunk in completion:
|
|
272
|
-
# The streaming response for chat has a different structure
|
|
273
|
-
delta = chunk.choices[0].delta
|
|
274
|
-
if delta.content:
|
|
275
|
-
word = delta.content
|
|
276
|
-
if streaming_callback is not None:
|
|
277
|
-
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
278
|
-
break
|
|
279
|
-
output += word
|
|
280
|
-
else:
|
|
281
|
-
output = completion.choices[0].message.content
|
|
282
|
-
|
|
283
|
-
except Exception as e:
|
|
284
|
-
# Handle API errors gracefully
|
|
285
|
-
error_message = f"An error occurred with the OpenAI API: {e}"
|
|
286
|
-
if streaming_callback:
|
|
287
|
-
streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
288
|
-
return {"status": "error", "message": error_message}
|
|
289
|
-
|
|
290
|
-
return output
|
|
291
|
-
|
|
292
|
-
def chat(self,
|
|
293
|
-
discussion: LollmsDiscussion,
|
|
294
|
-
branch_tip_id: Optional[str] = None,
|
|
295
|
-
n_predict: Optional[int] = None,
|
|
296
|
-
stream: Optional[bool] = None,
|
|
297
|
-
temperature: float = 0.7,
|
|
298
|
-
top_k: int = 40,
|
|
299
|
-
top_p: float = 0.9,
|
|
300
|
-
repeat_penalty: float = 1.1,
|
|
301
|
-
repeat_last_n: int = 64,
|
|
302
|
-
seed: Optional[int] = None,
|
|
303
|
-
n_threads: Optional[int] = None,
|
|
304
|
-
ctx_size: Optional[int] = None,
|
|
305
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
306
|
-
) -> Union[str, dict]:
|
|
307
|
-
"""
|
|
308
|
-
Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
|
|
309
|
-
|
|
310
|
-
Args:
|
|
311
|
-
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
312
|
-
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
313
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
314
|
-
stream (Optional[bool]): Whether to stream the output.
|
|
315
|
-
temperature (float): Sampling temperature.
|
|
316
|
-
top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
|
|
317
|
-
top_p (float): Top-p sampling parameter.
|
|
318
|
-
repeat_penalty (float): Frequency penalty for repeated tokens.
|
|
319
|
-
seed (Optional[int]): Random seed for generation.
|
|
320
|
-
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
321
|
-
|
|
322
|
-
Returns:
|
|
323
|
-
Union[str, dict]: The generated text or an error dictionary.
|
|
324
|
-
"""
|
|
325
|
-
# 1. Export the discussion to the OpenAI chat format
|
|
326
|
-
# This handles system prompts, user/assistant roles, and multi-modal content automatically.
|
|
327
|
-
messages = discussion.export("openai_chat", branch_tip_id)
|
|
328
|
-
|
|
329
|
-
# Build the request parameters
|
|
330
|
-
params = {
|
|
331
|
-
"model": self.model_name,
|
|
332
|
-
"messages": messages,
|
|
333
|
-
"max_tokens": n_predict,
|
|
334
|
-
"n": 1,
|
|
335
|
-
"temperature": temperature,
|
|
336
|
-
"top_p": top_p,
|
|
337
|
-
"frequency_penalty": repeat_penalty,
|
|
338
|
-
"stream": stream
|
|
339
|
-
}
|
|
340
|
-
# Add seed if available, as it's supported by newer OpenAI models
|
|
341
|
-
if seed is not None:
|
|
342
|
-
params["seed"] = seed
|
|
343
|
-
|
|
344
|
-
# Remove None values, as the API expects them to be absent
|
|
345
|
-
params = {k: v for k, v in params.items() if v is not None}
|
|
346
|
-
|
|
347
|
-
output = ""
|
|
348
|
-
# 2. Call the API
|
|
349
|
-
try:
|
|
350
|
-
# Check if we should use the chat completions or legacy completions endpoint
|
|
351
|
-
if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
352
|
-
completion = self.client.chat.completions.create(**params)
|
|
353
|
-
|
|
354
|
-
if stream:
|
|
355
|
-
for chunk in completion:
|
|
356
|
-
# The streaming response for chat has a different structure
|
|
357
|
-
delta = chunk.choices[0].delta
|
|
358
|
-
if delta.content:
|
|
359
|
-
word = delta.content
|
|
360
|
-
if streaming_callback is not None:
|
|
361
|
-
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
362
|
-
break
|
|
363
|
-
output += word
|
|
364
|
-
else:
|
|
365
|
-
output = completion.choices[0].message.content
|
|
366
|
-
|
|
367
|
-
else: # Fallback to legacy completion format (not recommended for chat)
|
|
368
|
-
# We need to format the messages list into a single string prompt
|
|
369
|
-
legacy_prompt = discussion.export("openai_completion", branch_tip_id)
|
|
370
|
-
legacy_params = {
|
|
371
|
-
"model": self.model_name,
|
|
372
|
-
"prompt": legacy_prompt,
|
|
373
|
-
"max_tokens": n_predict,
|
|
374
|
-
"n": 1,
|
|
375
|
-
"temperature": temperature,
|
|
376
|
-
"top_p": top_p,
|
|
377
|
-
"frequency_penalty": repeat_penalty,
|
|
378
|
-
"stream": stream
|
|
379
|
-
}
|
|
380
|
-
completion = self.client.completions.create(**legacy_params)
|
|
381
|
-
|
|
382
|
-
if stream:
|
|
383
|
-
for chunk in completion:
|
|
384
|
-
word = chunk.choices[0].text
|
|
385
|
-
if streaming_callback is not None:
|
|
386
|
-
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
387
|
-
break
|
|
388
|
-
output += word
|
|
389
|
-
else:
|
|
390
|
-
output = completion.choices[0].text
|
|
391
|
-
|
|
392
|
-
except Exception as e:
|
|
393
|
-
# Handle API errors gracefully
|
|
394
|
-
error_message = f"An error occurred with the OpenAI API: {e}"
|
|
395
|
-
if streaming_callback:
|
|
396
|
-
streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
397
|
-
return {"status": "error", "message": error_message}
|
|
398
|
-
|
|
399
|
-
return output
|
|
400
|
-
def tokenize(self, text: str) -> list:
|
|
401
|
-
"""
|
|
402
|
-
Tokenize the input text into a list of characters.
|
|
403
|
-
|
|
404
|
-
Args:
|
|
405
|
-
text (str): The text to tokenize.
|
|
406
|
-
|
|
407
|
-
Returns:
|
|
408
|
-
list: List of individual characters.
|
|
409
|
-
"""
|
|
410
|
-
try:
|
|
411
|
-
return tiktoken.model.encoding_for_model(self.model_name).encode(text)
|
|
412
|
-
except:
|
|
413
|
-
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
|
|
414
|
-
|
|
415
|
-
def detokenize(self, tokens: list) -> str:
|
|
416
|
-
"""
|
|
417
|
-
Convert a list of tokens back to text.
|
|
418
|
-
|
|
419
|
-
Args:
|
|
420
|
-
tokens (list): List of tokens (characters) to detokenize.
|
|
421
|
-
|
|
422
|
-
Returns:
|
|
423
|
-
str: Detokenized text.
|
|
424
|
-
"""
|
|
425
|
-
try:
|
|
426
|
-
return tiktoken.model.encoding_for_model(self.model_name).decode(tokens)
|
|
427
|
-
except:
|
|
428
|
-
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
429
|
-
|
|
430
|
-
def count_tokens(self, text: str) -> int:
|
|
431
|
-
"""
|
|
432
|
-
Count tokens from a text.
|
|
433
|
-
|
|
434
|
-
Args:
|
|
435
|
-
tokens (list): List of tokens to detokenize.
|
|
436
|
-
|
|
437
|
-
Returns:
|
|
438
|
-
int: Number of tokens in text.
|
|
439
|
-
"""
|
|
440
|
-
return len(self.tokenize(text))
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
def embed(self, text: str, **kwargs) -> list:
|
|
444
|
-
"""
|
|
445
|
-
Get embeddings for the input text using OpenAI API.
|
|
446
|
-
|
|
447
|
-
Args:
|
|
448
|
-
text (str): Input text to embed.
|
|
449
|
-
**kwargs: Additional arguments. The 'model' argument can be used
|
|
450
|
-
to specify the embedding model (e.g., "text-embedding-3-small").
|
|
451
|
-
Defaults to "text-embedding-ada-002".
|
|
452
|
-
|
|
453
|
-
Returns:
|
|
454
|
-
list: The embedding vector as a list of floats, or an empty list on failure.
|
|
455
|
-
"""
|
|
456
|
-
# Determine the embedding model, prioritizing kwargs, with a default
|
|
457
|
-
embedding_model = kwargs.get("model", self.model_name)
|
|
458
|
-
|
|
459
|
-
try:
|
|
460
|
-
# The OpenAI API expects the input to be a list of strings
|
|
461
|
-
response = self.client.embeddings.create(
|
|
462
|
-
model=embedding_model,
|
|
463
|
-
input=[text] # Wrap the single text string in a list
|
|
464
|
-
)
|
|
465
|
-
|
|
466
|
-
# Extract the embedding from the response
|
|
467
|
-
if response.data and len(response.data) > 0:
|
|
468
|
-
return response.data[0].embedding
|
|
469
|
-
else:
|
|
470
|
-
ASCIIColors.warning("OpenAI API returned no data for the embedding request.")
|
|
471
|
-
return []
|
|
472
|
-
|
|
473
|
-
except Exception as e:
|
|
474
|
-
ASCIIColors.error(f"Failed to generate embeddings using OpenAI API: {e}")
|
|
475
|
-
trace_exception(e)
|
|
476
|
-
return []
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
def get_model_info(self) -> dict:
|
|
480
|
-
"""
|
|
481
|
-
Return information about the current OpenAI model.
|
|
482
|
-
|
|
483
|
-
Returns:
|
|
484
|
-
dict: Dictionary containing model name, version, and host address.
|
|
485
|
-
"""
|
|
486
|
-
return {
|
|
487
|
-
"name": "OpenAI",
|
|
488
|
-
"version": "2.0",
|
|
489
|
-
"host_address": self.host_address,
|
|
490
|
-
"model_name": self.model_name
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
def listModels(self) -> List[Dict]:
|
|
494
|
-
# Known context lengths
|
|
495
|
-
known_context_lengths = {
|
|
496
|
-
"gpt-4o": 128000,
|
|
497
|
-
"gpt-4": 8192,
|
|
498
|
-
"gpt-4-0613": 8192,
|
|
499
|
-
"gpt-4-1106-preview": 128000,
|
|
500
|
-
"gpt-4-0125-preview": 128000,
|
|
501
|
-
"gpt-4-turbo": 128000,
|
|
502
|
-
"gpt-3.5-turbo": 4096,
|
|
503
|
-
"gpt-3.5-turbo-16k": 16000,
|
|
504
|
-
"gpt-3.5-turbo-1106": 16385,
|
|
505
|
-
"gpt-3.5-turbo-0125": 16385,
|
|
506
|
-
"text-davinci-003": 4097,
|
|
507
|
-
"text-davinci-002": 4097,
|
|
508
|
-
"davinci": 2049,
|
|
509
|
-
"curie": 2049,
|
|
510
|
-
"babbage": 2049,
|
|
511
|
-
"ada": 2049,
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
generation_prefixes = (
|
|
515
|
-
"gpt-",
|
|
516
|
-
"text-davinci",
|
|
517
|
-
"davinci",
|
|
518
|
-
"curie",
|
|
519
|
-
"babbage",
|
|
520
|
-
"ada"
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
models_info = []
|
|
524
|
-
prompt_buffer = 500
|
|
525
|
-
|
|
526
|
-
try:
|
|
527
|
-
models = self.client.models.list()
|
|
528
|
-
for model in models.data:
|
|
529
|
-
model_id = model.id
|
|
530
|
-
if model_id.startswith(generation_prefixes):
|
|
531
|
-
context_length = known_context_lengths.get(model_id, "unknown")
|
|
532
|
-
max_generation = (
|
|
533
|
-
context_length - prompt_buffer
|
|
534
|
-
if isinstance(context_length, int)
|
|
535
|
-
else "unknown"
|
|
536
|
-
)
|
|
537
|
-
models_info.append({
|
|
538
|
-
"model_name": model_id,
|
|
539
|
-
"owned_by": getattr(model, "owned_by", "N/A"),
|
|
540
|
-
"created": getattr(model, "created", "N/A"),
|
|
541
|
-
"context_length": context_length,
|
|
542
|
-
"max_generation": max_generation,
|
|
543
|
-
})
|
|
544
|
-
else:
|
|
545
|
-
models_info.append({
|
|
546
|
-
"model_name": model_id,
|
|
547
|
-
"owned_by": getattr(model, "owned_by", "N/A"),
|
|
548
|
-
"created": getattr(model, "created", "N/A"),
|
|
549
|
-
"context_length": None,
|
|
550
|
-
"max_generation": None,
|
|
551
|
-
})
|
|
552
|
-
|
|
553
|
-
except Exception as e:
|
|
554
|
-
print(f"Failed to list models: {e}")
|
|
555
|
-
|
|
556
|
-
return models_info
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
def load_model(self, model_name: str) -> bool:
|
|
560
|
-
"""
|
|
561
|
-
Load a specific model into the OpenAI binding.
|
|
562
|
-
|
|
563
|
-
Args:
|
|
564
|
-
model_name (str): Name of the model to load.
|
|
565
|
-
|
|
566
|
-
Returns:
|
|
567
|
-
bool: True if model loaded successfully.
|
|
568
|
-
"""
|
|
569
|
-
self.model = model_name
|
|
570
|
-
self.model_name = model_name
|
|
571
|
-
return True
|
|
File without changes
|
|
File without changes
|
|
File without changes
|