lollms-client 0.28.0__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/text_gen.py +1 -1
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +1 -0
- lollms_client/llm_bindings/lollms/__init__.py +411 -267
- lollms_client/llm_bindings/lollms_webui/__init__.py +428 -0
- lollms_client/lollms_core.py +157 -130
- lollms_client/lollms_discussion.py +343 -61
- lollms_client/lollms_personality.py +8 -0
- lollms_client/lollms_utilities.py +10 -2
- lollms_client-0.29.1.dist-info/METADATA +963 -0
- {lollms_client-0.28.0.dist-info → lollms_client-0.29.1.dist-info}/RECORD +14 -14
- lollms_client/llm_bindings/lollms_chat/__init__.py +0 -571
- lollms_client-0.28.0.dist-info/METADATA +0 -604
- {lollms_client-0.28.0.dist-info → lollms_client-0.29.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.28.0.dist-info → lollms_client-0.29.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.28.0.dist-info → lollms_client-0.29.1.dist-info}/top_level.txt +0 -0
|
@@ -1,51 +1,62 @@
|
|
|
1
|
-
# bindings/
|
|
1
|
+
# bindings/Lollms_chat/binding.py
|
|
2
2
|
import requests
|
|
3
|
+
import json
|
|
3
4
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
4
5
|
from lollms_client.lollms_types import MSG_TYPE
|
|
5
6
|
from lollms_client.lollms_utilities import encode_image
|
|
6
7
|
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
7
8
|
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
8
|
-
from ascii_colors import ASCIIColors, trace_exception
|
|
9
9
|
from typing import Optional, Callable, List, Union
|
|
10
|
-
import
|
|
10
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
11
|
+
from typing import List, Dict
|
|
12
|
+
|
|
13
|
+
import pipmaster as pm
|
|
14
|
+
|
|
15
|
+
pm.ensure_packages(["openai","tiktoken"])
|
|
11
16
|
|
|
12
|
-
|
|
17
|
+
import openai
|
|
18
|
+
import tiktoken
|
|
19
|
+
import os
|
|
13
20
|
|
|
21
|
+
BindingName = "LollmsBinding"
|
|
14
22
|
|
|
15
|
-
|
|
16
|
-
|
|
23
|
+
|
|
24
|
+
class LollmsBinding(LollmsLLMBinding):
|
|
25
|
+
"""Lollms-specific binding implementation (open ai compatible with some extra parameters)"""
|
|
17
26
|
|
|
18
|
-
DEFAULT_HOST_ADDRESS = "http://localhost:9600"
|
|
19
27
|
|
|
20
|
-
def __init__(self,
|
|
21
|
-
host_address: str =
|
|
28
|
+
def __init__(self,
|
|
29
|
+
host_address: str = "http://localhost:9642", #This is the default local installation
|
|
22
30
|
model_name: str = "",
|
|
23
|
-
service_key: str = None,
|
|
31
|
+
service_key: str|None = None, # a key generated on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
|
|
24
32
|
verify_ssl_certificate: bool = True,
|
|
25
|
-
|
|
26
|
-
**kwargs
|
|
27
|
-
):
|
|
33
|
+
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
|
|
34
|
+
**kwargs):
|
|
28
35
|
"""
|
|
29
|
-
Initialize the
|
|
36
|
+
Initialize the OpenAI binding.
|
|
30
37
|
|
|
31
38
|
Args:
|
|
32
|
-
host_address (str): Host address for the
|
|
39
|
+
host_address (str): Host address for the OpenAI service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
33
40
|
model_name (str): Name of the model to use. Defaults to empty string.
|
|
34
|
-
service_key (str): Authentication key for the service. Defaults to None.
|
|
41
|
+
service_key (str): Authentication key for the service. Defaults to None. This is a key generated
|
|
42
|
+
on the lollms interface (it is advised to use LOLLMS_API_KEY environment variable instead)
|
|
35
43
|
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
36
|
-
personality (Optional[int]):
|
|
44
|
+
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
37
45
|
"""
|
|
38
46
|
super().__init__(
|
|
39
|
-
binding_name = "
|
|
47
|
+
binding_name = "openai",
|
|
40
48
|
)
|
|
41
|
-
|
|
42
|
-
self.host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
|
|
49
|
+
self.host_address=host_address
|
|
43
50
|
self.model_name=model_name
|
|
44
51
|
self.service_key=service_key
|
|
45
52
|
self.verify_ssl_certificate=verify_ssl_certificate
|
|
46
|
-
self.default_completion_format=
|
|
47
|
-
|
|
48
|
-
self.
|
|
53
|
+
self.default_completion_format=default_completion_format
|
|
54
|
+
|
|
55
|
+
if not self.service_key:
|
|
56
|
+
self.service_key = os.getenv("LOLLMS_API_KEY", self.service_key)
|
|
57
|
+
self.client = openai.OpenAI(api_key=self.service_key, base_url=None if host_address is None else host_address if len(host_address)>0 else None)
|
|
58
|
+
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
59
|
+
|
|
49
60
|
|
|
50
61
|
def generate_text(self,
|
|
51
62
|
prompt: str,
|
|
@@ -53,11 +64,11 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
53
64
|
system_prompt: str = "",
|
|
54
65
|
n_predict: Optional[int] = None,
|
|
55
66
|
stream: Optional[bool] = None,
|
|
56
|
-
temperature:
|
|
57
|
-
top_k:
|
|
58
|
-
top_p:
|
|
59
|
-
repeat_penalty:
|
|
60
|
-
repeat_last_n:
|
|
67
|
+
temperature: float = 0.7,
|
|
68
|
+
top_k: int = 40,
|
|
69
|
+
top_p: float = 0.9,
|
|
70
|
+
repeat_penalty: float = 1.1,
|
|
71
|
+
repeat_last_n: int = 64,
|
|
61
72
|
seed: Optional[int] = None,
|
|
62
73
|
n_threads: Optional[int] = None,
|
|
63
74
|
ctx_size: int | None = None,
|
|
@@ -92,242 +103,330 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
92
103
|
Returns:
|
|
93
104
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
94
105
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if self.service_key:
|
|
104
|
-
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
106
|
+
count = 0
|
|
107
|
+
output = ""
|
|
108
|
+
messages = [
|
|
109
|
+
{
|
|
110
|
+
"role": "system",
|
|
111
|
+
"content": system_prompt or "You are a helpful assistant.",
|
|
112
|
+
}
|
|
113
|
+
]
|
|
105
114
|
|
|
106
|
-
#
|
|
107
|
-
image_data = []
|
|
115
|
+
# Prepare messages based on whether images are provided
|
|
108
116
|
if images:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
117
|
+
if split:
|
|
118
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
119
|
+
if images:
|
|
120
|
+
messages[-1]["content"] = [
|
|
121
|
+
{
|
|
122
|
+
"type": "text",
|
|
123
|
+
"text": messages[-1]["content"]
|
|
124
|
+
}
|
|
125
|
+
]+[
|
|
126
|
+
{
|
|
127
|
+
"type": "image_url",
|
|
128
|
+
"image_url": {
|
|
129
|
+
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
for image_path in images
|
|
133
|
+
]
|
|
134
|
+
else:
|
|
135
|
+
messages.append({
|
|
136
|
+
'role': 'user',
|
|
137
|
+
'content': [
|
|
138
|
+
{
|
|
139
|
+
"type": "text",
|
|
140
|
+
"text": prompt
|
|
141
|
+
}
|
|
142
|
+
] + [
|
|
143
|
+
{
|
|
144
|
+
"type": "image_url",
|
|
145
|
+
"image_url": {
|
|
146
|
+
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
for image_path in images
|
|
150
|
+
]
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
else:
|
|
155
|
+
|
|
156
|
+
if split:
|
|
157
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
158
|
+
if images:
|
|
159
|
+
messages[-1]["content"] = [
|
|
160
|
+
{
|
|
161
|
+
"type": "text",
|
|
162
|
+
"text": messages[-1]["content"]
|
|
163
|
+
}
|
|
164
|
+
]
|
|
165
|
+
else:
|
|
166
|
+
messages.append({
|
|
167
|
+
'role': 'user',
|
|
168
|
+
'content': [
|
|
169
|
+
{
|
|
170
|
+
"type": "text",
|
|
171
|
+
"text": prompt
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Generate text using the OpenAI API
|
|
178
|
+
if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
179
|
+
chat_completion = self.client.chat.completions.create(
|
|
180
|
+
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
181
|
+
messages=messages,
|
|
182
|
+
max_tokens=n_predict, # Adjust the desired length of the generated response
|
|
183
|
+
n=1, # Specify the number of responses you want
|
|
184
|
+
temperature=temperature, # Adjust the temperature for more or less randomness in the output
|
|
185
|
+
stream=stream
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if stream:
|
|
189
|
+
for resp in chat_completion:
|
|
190
|
+
if count >= n_predict:
|
|
191
|
+
break
|
|
192
|
+
try:
|
|
193
|
+
word = resp.choices[0].delta.content
|
|
194
|
+
except Exception as ex:
|
|
195
|
+
word = ""
|
|
196
|
+
if streaming_callback is not None:
|
|
197
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
198
|
+
break
|
|
199
|
+
if word:
|
|
200
|
+
output += word
|
|
201
|
+
count += 1
|
|
202
|
+
else:
|
|
203
|
+
output = chat_completion.choices[0].message.content
|
|
204
|
+
else:
|
|
205
|
+
completion = self.client.completions.create(
|
|
206
|
+
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
207
|
+
prompt=prompt,
|
|
208
|
+
max_tokens=n_predict, # Adjust the desired length of the generated response
|
|
209
|
+
n=1, # Specify the number of responses you want
|
|
210
|
+
temperature=temperature, # Adjust the temperature for more or less randomness in the output
|
|
211
|
+
stream=stream
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if stream:
|
|
215
|
+
for resp in completion:
|
|
216
|
+
if count >= n_predict:
|
|
217
|
+
break
|
|
218
|
+
try:
|
|
219
|
+
word = resp.choices[0].text
|
|
220
|
+
except Exception as ex:
|
|
221
|
+
word = ""
|
|
222
|
+
if streaming_callback is not None:
|
|
223
|
+
if not streaming_callback(word, "MSG_TYPE_CHUNK"):
|
|
224
|
+
break
|
|
225
|
+
if word:
|
|
226
|
+
output += word
|
|
227
|
+
count += 1
|
|
228
|
+
else:
|
|
229
|
+
output = completion.choices[0].text
|
|
230
|
+
|
|
231
|
+
return output
|
|
232
|
+
|
|
233
|
+
def generate_from_messages(self,
|
|
234
|
+
messages: List[Dict],
|
|
235
|
+
n_predict: Optional[int] = None,
|
|
236
|
+
stream: Optional[bool] = None,
|
|
237
|
+
temperature: Optional[float] = None,
|
|
238
|
+
top_k: Optional[int] = None,
|
|
239
|
+
top_p: Optional[float] = None,
|
|
240
|
+
repeat_penalty: Optional[float] = None,
|
|
241
|
+
repeat_last_n: Optional[int] = None,
|
|
242
|
+
seed: Optional[int] = None,
|
|
243
|
+
n_threads: Optional[int] = None,
|
|
244
|
+
ctx_size: int | None = None,
|
|
245
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
246
|
+
**kwargs
|
|
247
|
+
) -> Union[str, dict]:
|
|
248
|
+
# Build the request parameters
|
|
249
|
+
params = {
|
|
250
|
+
"model": self.model_name,
|
|
251
|
+
"messages": messages,
|
|
252
|
+
"max_tokens": n_predict,
|
|
253
|
+
"n": 1,
|
|
123
254
|
"temperature": temperature,
|
|
124
|
-
"top_k": top_k,
|
|
125
255
|
"top_p": top_p,
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"seed": seed,
|
|
129
|
-
"n_threads": n_threads
|
|
256
|
+
"frequency_penalty": repeat_penalty,
|
|
257
|
+
"stream": stream
|
|
130
258
|
}
|
|
259
|
+
# Add seed if available, as it's supported by newer OpenAI models
|
|
260
|
+
if seed is not None:
|
|
261
|
+
params["seed"] = seed
|
|
262
|
+
|
|
263
|
+
# Remove None values, as the API expects them to be absent
|
|
264
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
131
265
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
text = response.text.strip()
|
|
148
|
-
return text
|
|
149
|
-
except Exception as ex:
|
|
150
|
-
return {"status": False, "error": str(ex)}
|
|
151
|
-
else:
|
|
152
|
-
return {"status": False, "error": response.text}
|
|
153
|
-
else:
|
|
154
|
-
text = ""
|
|
155
|
-
if response.status_code == 200:
|
|
156
|
-
try:
|
|
157
|
-
for line in response.iter_lines():
|
|
158
|
-
chunk = line.decode("utf-8")
|
|
159
|
-
text += chunk
|
|
160
|
-
if streaming_callback:
|
|
161
|
-
streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
162
|
-
# Handle potential quotes from streaming response
|
|
163
|
-
if text and text[0] == '"':
|
|
164
|
-
text = text[1:]
|
|
165
|
-
if text and text[-1] == '"':
|
|
166
|
-
text = text[:-1]
|
|
167
|
-
return text.rstrip('!')
|
|
168
|
-
except Exception as ex:
|
|
169
|
-
return {"status": False, "error": str(ex)}
|
|
266
|
+
output = ""
|
|
267
|
+
# 2. Call the API
|
|
268
|
+
try:
|
|
269
|
+
completion = self.client.chat.completions.create(**params)
|
|
270
|
+
|
|
271
|
+
if stream:
|
|
272
|
+
for chunk in completion:
|
|
273
|
+
# The streaming response for chat has a different structure
|
|
274
|
+
delta = chunk.choices[0].delta
|
|
275
|
+
if delta.content:
|
|
276
|
+
word = delta.content
|
|
277
|
+
if streaming_callback is not None:
|
|
278
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
279
|
+
break
|
|
280
|
+
output += word
|
|
170
281
|
else:
|
|
171
|
-
|
|
282
|
+
output = completion.choices[0].message.content
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
# Handle API errors gracefully
|
|
286
|
+
error_message = f"An error occurred with the OpenAI API: {e}"
|
|
287
|
+
if streaming_callback:
|
|
288
|
+
streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
289
|
+
return {"status": "error", "message": error_message}
|
|
290
|
+
|
|
291
|
+
return output
|
|
292
|
+
|
|
172
293
|
def chat(self,
|
|
173
294
|
discussion: LollmsDiscussion,
|
|
174
295
|
branch_tip_id: Optional[str] = None,
|
|
175
296
|
n_predict: Optional[int] = None,
|
|
176
297
|
stream: Optional[bool] = None,
|
|
177
|
-
temperature:
|
|
178
|
-
top_k:
|
|
179
|
-
top_p:
|
|
180
|
-
repeat_penalty:
|
|
181
|
-
repeat_last_n:
|
|
298
|
+
temperature: float = 0.7,
|
|
299
|
+
top_k: int = 40,
|
|
300
|
+
top_p: float = 0.9,
|
|
301
|
+
repeat_penalty: float = 1.1,
|
|
302
|
+
repeat_last_n: int = 64,
|
|
182
303
|
seed: Optional[int] = None,
|
|
183
304
|
n_threads: Optional[int] = None,
|
|
184
|
-
ctx_size: int
|
|
305
|
+
ctx_size: Optional[int] = None,
|
|
185
306
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
186
307
|
) -> Union[str, dict]:
|
|
187
308
|
"""
|
|
188
|
-
Conduct a chat session with
|
|
309
|
+
Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
|
|
189
310
|
|
|
190
311
|
Args:
|
|
191
312
|
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
192
313
|
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
193
|
-
|
|
314
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
315
|
+
stream (Optional[bool]): Whether to stream the output.
|
|
316
|
+
temperature (float): Sampling temperature.
|
|
317
|
+
top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
|
|
318
|
+
top_p (float): Top-p sampling parameter.
|
|
319
|
+
repeat_penalty (float): Frequency penalty for repeated tokens.
|
|
320
|
+
seed (Optional[int]): Random seed for generation.
|
|
321
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
194
322
|
|
|
195
323
|
Returns:
|
|
196
324
|
Union[str, dict]: The generated text or an error dictionary.
|
|
197
325
|
"""
|
|
198
|
-
# 1. Export the discussion to the
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
if last_message and last_message.images:
|
|
209
|
-
# The endpoint expects a list of base64 strings.
|
|
210
|
-
# We will only process images of type 'base64'. URL types are not supported by this endpoint.
|
|
211
|
-
for img in last_message.images:
|
|
212
|
-
if img['type'] == 'base64':
|
|
213
|
-
image_data.append(img['data'])
|
|
214
|
-
# Note: 'url' type images are ignored for this binding.
|
|
215
|
-
|
|
216
|
-
# 3. Determine endpoint and build payload
|
|
217
|
-
endpoint = "/lollms_generate_with_images" if image_data else "/lollms_generate"
|
|
218
|
-
url = f"{self.host_address}{endpoint}"
|
|
219
|
-
|
|
220
|
-
headers = {'Content-Type': 'application/json'}
|
|
221
|
-
if self.service_key:
|
|
222
|
-
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
223
|
-
|
|
224
|
-
data = {
|
|
225
|
-
"prompt": prompt_text,
|
|
226
|
-
"model_name": self.model_name,
|
|
227
|
-
"personality": self.personality,
|
|
228
|
-
"n_predict": n_predict,
|
|
229
|
-
"stream": stream,
|
|
326
|
+
# 1. Export the discussion to the OpenAI chat format
|
|
327
|
+
# This handles system prompts, user/assistant roles, and multi-modal content automatically.
|
|
328
|
+
messages = discussion.export("openai_chat", branch_tip_id)
|
|
329
|
+
|
|
330
|
+
# Build the request parameters
|
|
331
|
+
params = {
|
|
332
|
+
"model": self.model_name,
|
|
333
|
+
"messages": messages,
|
|
334
|
+
"max_tokens": n_predict,
|
|
335
|
+
"n": 1,
|
|
230
336
|
"temperature": temperature,
|
|
231
|
-
"top_k": top_k,
|
|
232
337
|
"top_p": top_p,
|
|
233
|
-
"
|
|
234
|
-
"
|
|
235
|
-
"seed": seed,
|
|
236
|
-
"n_threads": n_threads
|
|
338
|
+
"frequency_penalty": repeat_penalty,
|
|
339
|
+
"stream": stream
|
|
237
340
|
}
|
|
238
|
-
if
|
|
239
|
-
|
|
341
|
+
# Add seed if available, as it's supported by newer OpenAI models
|
|
342
|
+
if seed is not None:
|
|
343
|
+
params["seed"] = seed
|
|
240
344
|
|
|
241
|
-
#
|
|
345
|
+
# Remove None values, as the API expects them to be absent
|
|
346
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
347
|
+
|
|
348
|
+
output = ""
|
|
349
|
+
# 2. Call the API
|
|
242
350
|
try:
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
stream
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
351
|
+
# Check if we should use the chat completions or legacy completions endpoint
|
|
352
|
+
if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
353
|
+
completion = self.client.chat.completions.create(**params)
|
|
354
|
+
|
|
355
|
+
if stream:
|
|
356
|
+
for chunk in completion:
|
|
357
|
+
# The streaming response for chat has a different structure
|
|
358
|
+
delta = chunk.choices[0].delta
|
|
359
|
+
if delta.content:
|
|
360
|
+
word = delta.content
|
|
361
|
+
if streaming_callback is not None:
|
|
362
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
363
|
+
break
|
|
364
|
+
output += word
|
|
365
|
+
else:
|
|
366
|
+
output = completion.choices[0].message.content
|
|
367
|
+
|
|
368
|
+
else: # Fallback to legacy completion format (not recommended for chat)
|
|
369
|
+
# We need to format the messages list into a single string prompt
|
|
370
|
+
legacy_prompt = discussion.export("openai_completion", branch_tip_id)
|
|
371
|
+
legacy_params = {
|
|
372
|
+
"model": self.model_name,
|
|
373
|
+
"prompt": legacy_prompt,
|
|
374
|
+
"max_tokens": n_predict,
|
|
375
|
+
"n": 1,
|
|
376
|
+
"temperature": temperature,
|
|
377
|
+
"top_p": top_p,
|
|
378
|
+
"frequency_penalty": repeat_penalty,
|
|
379
|
+
"stream": stream
|
|
380
|
+
}
|
|
381
|
+
completion = self.client.completions.create(**legacy_params)
|
|
382
|
+
|
|
383
|
+
if stream:
|
|
384
|
+
for chunk in completion:
|
|
385
|
+
word = chunk.choices[0].text
|
|
386
|
+
if streaming_callback is not None:
|
|
387
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
262
388
|
break
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
error_message = f"
|
|
389
|
+
output += word
|
|
390
|
+
else:
|
|
391
|
+
output = completion.choices[0].text
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
# Handle API errors gracefully
|
|
395
|
+
error_message = f"An error occurred with the OpenAI API: {e}"
|
|
396
|
+
if streaming_callback:
|
|
397
|
+
streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
270
398
|
return {"status": "error", "message": error_message}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
return {"status": "error", "message": error_message}
|
|
399
|
+
|
|
400
|
+
return output
|
|
274
401
|
def tokenize(self, text: str) -> list:
|
|
275
402
|
"""
|
|
276
|
-
Tokenize the input text into a list of
|
|
403
|
+
Tokenize the input text into a list of characters.
|
|
277
404
|
|
|
278
405
|
Args:
|
|
279
406
|
text (str): The text to tokenize.
|
|
280
407
|
|
|
281
408
|
Returns:
|
|
282
|
-
list: List of
|
|
409
|
+
list: List of individual characters.
|
|
283
410
|
"""
|
|
284
|
-
response=None
|
|
285
411
|
try:
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
"return_named": False # Set to True if you want named tokens
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
# Make the POST request to the /lollms_tokenize endpoint
|
|
293
|
-
response = requests.post(f"{self.host_address}/lollms_tokenize", json=payload)
|
|
412
|
+
return tiktoken.model.encoding_for_model(self.model_name).encode(text)
|
|
413
|
+
except:
|
|
414
|
+
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
|
|
294
415
|
|
|
295
|
-
# Check if the request was successful
|
|
296
|
-
if response.status_code == 200:
|
|
297
|
-
return response.json()
|
|
298
|
-
else:
|
|
299
|
-
raise Exception(f"Failed to tokenize text: {response.text}")
|
|
300
|
-
except Exception as ex:
|
|
301
|
-
trace_exception(ex)
|
|
302
|
-
raise Exception(f"Failed to tokenize text: {response.text}")
|
|
303
|
-
|
|
304
416
|
def detokenize(self, tokens: list) -> str:
|
|
305
417
|
"""
|
|
306
|
-
Convert a list of tokens back to text
|
|
418
|
+
Convert a list of tokens back to text.
|
|
307
419
|
|
|
308
420
|
Args:
|
|
309
|
-
tokens (list): List of tokens to detokenize.
|
|
421
|
+
tokens (list): List of tokens (characters) to detokenize.
|
|
310
422
|
|
|
311
423
|
Returns:
|
|
312
424
|
str: Detokenized text.
|
|
313
425
|
"""
|
|
314
426
|
try:
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
"return_named": False # Set to True if you want named tokens
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
# Make the POST request to the /lollms_detokenize endpoint
|
|
322
|
-
response = requests.post(f"{self.host_address}/lollms_detokenize", json=payload)
|
|
323
|
-
|
|
324
|
-
# Check if the request was successful
|
|
325
|
-
if response.status_code == 200:
|
|
326
|
-
return response.json()
|
|
327
|
-
else:
|
|
328
|
-
raise Exception(f"Failed to detokenize tokens: {response.text}")
|
|
329
|
-
except Exception as ex:
|
|
330
|
-
return {"status": False, "error": str(ex)}
|
|
427
|
+
return tiktoken.model.encoding_for_model(self.model_name).decode(tokens)
|
|
428
|
+
except:
|
|
429
|
+
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
331
430
|
|
|
332
431
|
def count_tokens(self, text: str) -> int:
|
|
333
432
|
"""
|
|
@@ -340,66 +439,127 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
340
439
|
int: Number of tokens in text.
|
|
341
440
|
"""
|
|
342
441
|
return len(self.tokenize(text))
|
|
343
|
-
|
|
442
|
+
|
|
443
|
+
|
|
344
444
|
def embed(self, text: str, **kwargs) -> list:
|
|
345
445
|
"""
|
|
346
|
-
Get embeddings for the input text using
|
|
347
|
-
|
|
446
|
+
Get embeddings for the input text using OpenAI API.
|
|
447
|
+
|
|
348
448
|
Args:
|
|
349
|
-
text (str
|
|
350
|
-
**kwargs: Additional arguments
|
|
351
|
-
|
|
449
|
+
text (str): Input text to embed.
|
|
450
|
+
**kwargs: Additional arguments. The 'model' argument can be used
|
|
451
|
+
to specify the embedding model (e.g., "text-embedding-3-small").
|
|
452
|
+
Defaults to "text-embedding-ada-002".
|
|
453
|
+
|
|
352
454
|
Returns:
|
|
353
|
-
|
|
455
|
+
list: The embedding vector as a list of floats, or an empty list on failure.
|
|
354
456
|
"""
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
457
|
+
# Determine the embedding model, prioritizing kwargs, with a default
|
|
458
|
+
embedding_model = kwargs.get("model", self.model_name)
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
# The OpenAI API expects the input to be a list of strings
|
|
462
|
+
response = self.client.embeddings.create(
|
|
463
|
+
model=embedding_model,
|
|
464
|
+
input=[text] # Wrap the single text string in a list
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Extract the embedding from the response
|
|
468
|
+
if response.data and len(response.data) > 0:
|
|
469
|
+
return response.data[0].embedding
|
|
470
|
+
else:
|
|
471
|
+
ASCIIColors.warning("OpenAI API returned no data for the embedding request.")
|
|
472
|
+
return []
|
|
473
|
+
|
|
474
|
+
except Exception as e:
|
|
475
|
+
ASCIIColors.error(f"Failed to generate embeddings using OpenAI API: {e}")
|
|
476
|
+
trace_exception(e)
|
|
477
|
+
return []
|
|
478
|
+
|
|
367
479
|
|
|
368
480
|
def get_model_info(self) -> dict:
|
|
369
481
|
"""
|
|
370
|
-
Return information about the current
|
|
482
|
+
Return information about the current OpenAI model.
|
|
371
483
|
|
|
372
484
|
Returns:
|
|
373
|
-
dict: Dictionary containing model name, version, host address
|
|
485
|
+
dict: Dictionary containing model name, version, and host address.
|
|
374
486
|
"""
|
|
375
487
|
return {
|
|
376
|
-
"name": "
|
|
377
|
-
"version": "
|
|
488
|
+
"name": "OpenAI",
|
|
489
|
+
"version": "2.0",
|
|
378
490
|
"host_address": self.host_address,
|
|
379
|
-
"model_name": self.model_name
|
|
380
|
-
"personality": self.personality
|
|
491
|
+
"model_name": self.model_name
|
|
381
492
|
}
|
|
382
493
|
|
|
494
|
+
def listModels(self) -> List[Dict]:
|
|
495
|
+
# Known context lengths
|
|
496
|
+
known_context_lengths = {
|
|
497
|
+
"gpt-4o": 128000,
|
|
498
|
+
"gpt-4": 8192,
|
|
499
|
+
"gpt-4-0613": 8192,
|
|
500
|
+
"gpt-4-1106-preview": 128000,
|
|
501
|
+
"gpt-4-0125-preview": 128000,
|
|
502
|
+
"gpt-4-turbo": 128000,
|
|
503
|
+
"gpt-3.5-turbo": 4096,
|
|
504
|
+
"gpt-3.5-turbo-16k": 16000,
|
|
505
|
+
"gpt-3.5-turbo-1106": 16385,
|
|
506
|
+
"gpt-3.5-turbo-0125": 16385,
|
|
507
|
+
"text-davinci-003": 4097,
|
|
508
|
+
"text-davinci-002": 4097,
|
|
509
|
+
"davinci": 2049,
|
|
510
|
+
"curie": 2049,
|
|
511
|
+
"babbage": 2049,
|
|
512
|
+
"ada": 2049,
|
|
513
|
+
}
|
|
383
514
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
515
|
+
generation_prefixes = (
|
|
516
|
+
"gpt-",
|
|
517
|
+
"text-davinci",
|
|
518
|
+
"davinci",
|
|
519
|
+
"curie",
|
|
520
|
+
"babbage",
|
|
521
|
+
"ada"
|
|
522
|
+
)
|
|
387
523
|
|
|
388
|
-
|
|
524
|
+
models_info = []
|
|
525
|
+
prompt_buffer = 500
|
|
389
526
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
527
|
+
try:
|
|
528
|
+
models = self.client.models.list()
|
|
529
|
+
for model in models.data:
|
|
530
|
+
model_id = model.id
|
|
531
|
+
if model_id.startswith(generation_prefixes):
|
|
532
|
+
context_length = known_context_lengths.get(model_id, "unknown")
|
|
533
|
+
max_generation = (
|
|
534
|
+
context_length - prompt_buffer
|
|
535
|
+
if isinstance(context_length, int)
|
|
536
|
+
else "unknown"
|
|
537
|
+
)
|
|
538
|
+
models_info.append({
|
|
539
|
+
"model_name": model_id,
|
|
540
|
+
"owned_by": getattr(model, "owned_by", "N/A"),
|
|
541
|
+
"created": getattr(model, "created", "N/A"),
|
|
542
|
+
"context_length": context_length,
|
|
543
|
+
"max_generation": max_generation,
|
|
544
|
+
})
|
|
545
|
+
else:
|
|
546
|
+
models_info.append({
|
|
547
|
+
"model_name": model_id,
|
|
548
|
+
"owned_by": getattr(model, "owned_by", "N/A"),
|
|
549
|
+
"created": getattr(model, "created", "N/A"),
|
|
550
|
+
"context_length": None,
|
|
551
|
+
"max_generation": None,
|
|
552
|
+
})
|
|
553
|
+
|
|
554
|
+
except Exception as e:
|
|
555
|
+
print(f"Failed to list models: {e}")
|
|
556
|
+
|
|
557
|
+
return models_info
|
|
398
558
|
|
|
399
559
|
|
|
400
560
|
def load_model(self, model_name: str) -> bool:
|
|
401
561
|
"""
|
|
402
|
-
Load a specific model into the
|
|
562
|
+
Load a specific model into the OpenAI binding.
|
|
403
563
|
|
|
404
564
|
Args:
|
|
405
565
|
model_name (str): Name of the model to load.
|
|
@@ -410,19 +570,3 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
410
570
|
self.model = model_name
|
|
411
571
|
self.model_name = model_name
|
|
412
572
|
return True
|
|
413
|
-
|
|
414
|
-
# Lollms specific methods
|
|
415
|
-
def lollms_listMountedPersonalities(self, host_address:str=None):
|
|
416
|
-
host_address = host_address if host_address else self.host_address
|
|
417
|
-
url = f"{host_address}/list_mounted_personalities"
|
|
418
|
-
|
|
419
|
-
response = requests.get(url)
|
|
420
|
-
|
|
421
|
-
if response.status_code == 200:
|
|
422
|
-
try:
|
|
423
|
-
text = json.loads(response.content.decode("utf-8"))
|
|
424
|
-
return text
|
|
425
|
-
except Exception as ex:
|
|
426
|
-
return {"status": False, "error": str(ex)}
|
|
427
|
-
else:
|
|
428
|
-
return {"status": False, "error": response.text}
|