lollms-client 0.9.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/__init__.py +1 -0
- lollms_client/llm_bindings/lollms/__init__.py +302 -0
- lollms_client/llm_bindings/ollama/__init__.py +297 -0
- lollms_client/llm_bindings/openai/__init__.py +261 -0
- lollms_client/llm_bindings/transformers/__init__.py +277 -0
- lollms_client/lollms_core.py +451 -1456
- lollms_client/lollms_llm_binding.py +210 -0
- lollms_client/lollms_tasks.py +42 -109
- lollms_client/lollms_tts.py +7 -3
- lollms_client/lollms_types.py +19 -1
- lollms_client/stt_bindings/__init__.py +0 -0
- lollms_client/stt_bindings/lollms/__init__.py +0 -0
- lollms_client/tti_bindings/__init__.py +0 -0
- lollms_client/tti_bindings/lollms/__init__.py +0 -0
- lollms_client/tts_bindings/__init__.py +0 -0
- lollms_client/tts_bindings/lollms/__init__.py +0 -0
- lollms_client/ttv_bindings/__init__.py +0 -0
- lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.11.0.dist-info}/METADATA +26 -13
- lollms_client-0.11.0.dist-info/RECORD +34 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.11.0.dist-info}/WHEEL +1 -1
- lollms_client-0.9.2.dist-info/RECORD +0 -20
- {lollms_client-0.9.2.dist-info → lollms_client-0.11.0.dist-info/licenses}/LICENSE +0 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.11.0.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -1,143 +1,109 @@
|
|
|
1
1
|
import requests
|
|
2
2
|
from ascii_colors import ASCIIColors, trace_exception
|
|
3
|
-
from lollms_client.lollms_types import MSG_TYPE
|
|
3
|
+
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
4
4
|
from lollms_client.lollms_utilities import encode_image
|
|
5
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
5
6
|
import json
|
|
6
7
|
from enum import Enum
|
|
7
|
-
import tiktoken
|
|
8
8
|
import base64
|
|
9
9
|
import requests
|
|
10
10
|
import pipmaster as pm
|
|
11
|
-
from typing import List, Optional, Callable, Union
|
|
11
|
+
from typing import List, Optional, Callable, Union, Dict
|
|
12
12
|
import numpy as np
|
|
13
13
|
import pipmaster as pm
|
|
14
|
+
from pathlib import Path
|
|
14
15
|
import os
|
|
15
|
-
|
|
16
|
-
class ELF_GENERATION_FORMAT(Enum):
|
|
17
|
-
LOLLMS = 0
|
|
18
|
-
OPENAI = 1
|
|
19
|
-
OLLAMA = 2
|
|
20
|
-
LITELLM = 3
|
|
21
|
-
TRANSFORMERS = 4
|
|
22
|
-
VLLM = 5
|
|
23
|
-
|
|
24
|
-
@classmethod
|
|
25
|
-
def from_string(cls, format_string: str) -> 'ELF_GENERATION_FORMAT':
|
|
26
|
-
format_mapping = {
|
|
27
|
-
"LOLLMS": cls.LOLLMS,
|
|
28
|
-
"OPENAI": cls.OPENAI,
|
|
29
|
-
"OLLAMA": cls.OLLAMA,
|
|
30
|
-
"LITELLM": cls.LITELLM,
|
|
31
|
-
"TRANSFORMERS": cls.TRANSFORMERS,
|
|
32
|
-
"VLLM": cls.VLLM
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
try:
|
|
36
|
-
return format_mapping[format_string.upper()]
|
|
37
|
-
except KeyError:
|
|
38
|
-
raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
|
|
39
|
-
|
|
40
|
-
def __str__(self):
|
|
41
|
-
return self.name
|
|
42
|
-
class ELF_COMPLETION_FORMAT(Enum):
|
|
43
|
-
Instruct = 0
|
|
44
|
-
Chat = 1
|
|
45
|
-
@classmethod
|
|
46
|
-
def from_string(cls, format_string: str) -> 'ELF_COMPLETION_FORMAT':
|
|
47
|
-
format_mapping = {
|
|
48
|
-
"Instruct": cls.Instruct,
|
|
49
|
-
"Chat": cls.Chat,
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
try:
|
|
53
|
-
return format_mapping[format_string.upper()]
|
|
54
|
-
except KeyError:
|
|
55
|
-
raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
|
|
56
|
-
|
|
57
|
-
def __str__(self):
|
|
58
|
-
return self.name
|
|
59
16
|
|
|
60
17
|
class LollmsClient():
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
18
|
+
"""Core class for interacting with LOLLMS bindings"""
|
|
19
|
+
def __init__(self,
|
|
20
|
+
binding_name: str = "lollms",
|
|
21
|
+
host_address: Optional[str] = None,
|
|
22
|
+
model_name: str = "",
|
|
23
|
+
service_key: Optional[str] = None,
|
|
24
|
+
verify_ssl_certificate: bool = True,
|
|
25
|
+
personality: Optional[int] = None,
|
|
26
|
+
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
27
|
+
binding_config: Optional[Dict[str, any]] = None,
|
|
28
|
+
ctx_size: Optional[int] = 8192,
|
|
29
|
+
n_predict: Optional[int] = 4096,
|
|
30
|
+
stream: bool = False,
|
|
31
|
+
temperature: float = 0.1,
|
|
32
|
+
top_k: int = 50,
|
|
33
|
+
top_p: float = 0.95,
|
|
34
|
+
repeat_penalty: float = 0.8,
|
|
35
|
+
repeat_last_n: int = 40,
|
|
36
|
+
seed: Optional[int] = None,
|
|
37
|
+
n_threads: int = 8,
|
|
38
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
39
|
+
user_name ="user",
|
|
40
|
+
ai_name = "assistant"):
|
|
41
|
+
"""
|
|
42
|
+
Initialize the LollmsCore with a binding and generation parameters.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
|
|
46
|
+
host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
|
|
47
|
+
model_name (str): Name of the model to use. Defaults to empty string.
|
|
48
|
+
service_key (Optional[str]): Authentication key for the service.
|
|
49
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
50
|
+
personality (Optional[int]): Personality ID (used only by LOLLMS binding).
|
|
51
|
+
llm_bindings_dir (Path): Directory containing binding implementations.
|
|
52
|
+
Defaults to the "bindings" subdirectory relative to this file's location.
|
|
53
|
+
binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
|
|
54
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
|
|
55
|
+
stream (bool): Whether to stream the output. Defaults to False for generate_text.
|
|
56
|
+
temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
|
|
57
|
+
top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
|
|
58
|
+
top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
|
|
59
|
+
repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
|
|
60
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
|
|
61
|
+
seed (Optional[int]): Random seed for generation. Default for generate_text.
|
|
62
|
+
n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
|
|
63
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
64
|
+
Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
ValueError: If the specified binding cannot be created.
|
|
68
|
+
"""
|
|
69
|
+
self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
|
|
70
|
+
self.binding_config = binding_config or {}
|
|
71
|
+
|
|
72
|
+
# Store generation parameters as instance variables
|
|
73
|
+
self.default_ctx_size = ctx_size
|
|
74
|
+
self.default_n_predict = n_predict
|
|
75
|
+
self.default_stream = stream
|
|
76
|
+
self.default_temperature = temperature
|
|
77
|
+
self.default_top_k = top_k
|
|
78
|
+
self.default_top_p = top_p
|
|
79
|
+
self.default_repeat_penalty = repeat_penalty
|
|
80
|
+
self.default_repeat_last_n = repeat_last_n
|
|
81
|
+
self.default_seed = seed
|
|
82
|
+
self.default_n_threads = n_threads
|
|
83
|
+
self.default_streaming_callback = streaming_callback
|
|
84
|
+
|
|
85
|
+
# Create the binding instance
|
|
86
|
+
self.binding = self.binding_manager.create_binding(
|
|
87
|
+
binding_name=binding_name,
|
|
88
|
+
host_address=host_address,
|
|
89
|
+
model_name=model_name,
|
|
90
|
+
service_key=service_key,
|
|
91
|
+
verify_ssl_certificate=verify_ssl_certificate,
|
|
92
|
+
personality=personality
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if self.binding is None:
|
|
96
|
+
raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
|
|
97
|
+
|
|
98
|
+
# Apply additional configuration if provided
|
|
99
|
+
if binding_config:
|
|
100
|
+
for key, value in binding_config.items():
|
|
101
|
+
setattr(self.binding, key, value)
|
|
84
102
|
self.user_name = user_name
|
|
85
103
|
self.ai_name = ai_name
|
|
86
|
-
self.host_address=host_address
|
|
87
|
-
if not self.host_address:
|
|
88
|
-
if default_generation_mode==ELF_GENERATION_FORMAT.LOLLMS:
|
|
89
|
-
self.host_address = "http://localhost:9600"
|
|
90
|
-
elif default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
|
|
91
|
-
self.host_address = "https://api.openai.com"
|
|
92
|
-
elif default_generation_mode==ELF_GENERATION_FORMAT.OLLAMA:
|
|
93
|
-
self.host_address = "http://localhost:11434"
|
|
94
|
-
else:
|
|
95
|
-
self.host_address = "http://localhost:9600"
|
|
96
|
-
|
|
97
|
-
self.model_name = model_name
|
|
98
|
-
self.ctx_size = ctx_size
|
|
99
|
-
self.n_predict = n_predict
|
|
100
|
-
self.min_n_predict = min_n_predict
|
|
101
|
-
self.personality = personality
|
|
102
|
-
self.temperature = temperature
|
|
103
|
-
self.top_k = top_k
|
|
104
|
-
self.top_p = top_p
|
|
105
|
-
self.repeat_penalty = repeat_penalty
|
|
106
|
-
self.repeat_last_n = repeat_last_n
|
|
107
|
-
self.seed = seed
|
|
108
|
-
self.n_threads = n_threads
|
|
109
104
|
self.service_key = service_key
|
|
110
|
-
if not self.service_key and default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
|
|
111
|
-
self.service_key = os.getenv("OPENAI_API_KEY","")
|
|
112
|
-
self.default_generation_mode = default_generation_mode
|
|
113
|
-
self.verify_ssl_certificate = verify_ssl_certificate
|
|
114
|
-
self.tokenizer = tiktoken.model.encoding_for_model("gpt-3.5-turbo-1106") if tokenizer is None else tokenizer
|
|
115
|
-
if default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
|
|
116
|
-
if not pm.is_installed("torch"):
|
|
117
|
-
ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
|
|
118
|
-
pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
119
|
-
|
|
120
|
-
import torch
|
|
121
|
-
if not torch.cuda.is_available():
|
|
122
|
-
ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
|
|
123
|
-
pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
124
|
-
import torch
|
|
125
|
-
|
|
126
|
-
if not pm.is_installed("transformers"):
|
|
127
|
-
pm.install_or_update("transformers")
|
|
128
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
|
129
|
-
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
130
|
-
str(model_name), trust_remote_code=False
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
self.model = AutoModelForCausalLM.from_pretrained(
|
|
134
|
-
str(model_name),
|
|
135
|
-
device_map="auto",
|
|
136
|
-
load_in_4bit=True,
|
|
137
|
-
torch_dtype=torch.bfloat16 # Load in float16 for quantization
|
|
138
|
-
)
|
|
139
|
-
self.generation_config = GenerationConfig.from_pretrained(str(model_name))
|
|
140
105
|
|
|
106
|
+
self.verify_ssl_certificate = verify_ssl_certificate
|
|
141
107
|
self.start_header_id_template ="!@>"
|
|
142
108
|
self.end_header_id_template =": "
|
|
143
109
|
self.system_message_template ="system"
|
|
@@ -149,12 +115,6 @@ class LollmsClient():
|
|
|
149
115
|
self.end_ai_header_id_template =": "
|
|
150
116
|
self.end_ai_message_id_template =""
|
|
151
117
|
|
|
152
|
-
if default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
|
|
153
|
-
if not pm.is_installed("openai"):
|
|
154
|
-
pm.install("openai")
|
|
155
|
-
import openai
|
|
156
|
-
self.client = openai.OpenAI(base_url=host_address)
|
|
157
|
-
|
|
158
118
|
|
|
159
119
|
@property
|
|
160
120
|
def system_full_header(self) -> str:
|
|
@@ -185,1150 +145,121 @@ class LollmsClient():
|
|
|
185
145
|
|
|
186
146
|
def sink(self, s=None,i=None,d=None):
|
|
187
147
|
pass
|
|
188
|
-
|
|
189
|
-
def tokenize(self, prompt:str):
|
|
148
|
+
def tokenize(self, text: str) -> list:
|
|
190
149
|
"""
|
|
191
|
-
|
|
150
|
+
Tokenize text using the active binding.
|
|
192
151
|
|
|
193
152
|
Args:
|
|
194
|
-
|
|
153
|
+
text (str): The text to tokenize.
|
|
195
154
|
|
|
196
155
|
Returns:
|
|
197
|
-
list:
|
|
156
|
+
list: List of tokens.
|
|
198
157
|
"""
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def detokenize(self, tokens_list:list):
|
|
158
|
+
return self.binding.tokenize(text)
|
|
159
|
+
|
|
160
|
+
def detokenize(self, tokens: list) -> str:
|
|
204
161
|
"""
|
|
205
|
-
|
|
162
|
+
Detokenize tokens using the active binding.
|
|
206
163
|
|
|
207
164
|
Args:
|
|
208
|
-
|
|
165
|
+
tokens (list): List of tokens to detokenize.
|
|
209
166
|
|
|
210
167
|
Returns:
|
|
211
|
-
str:
|
|
168
|
+
str: Detokenized text.
|
|
212
169
|
"""
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
return text
|
|
170
|
+
return self.binding.detokenize(tokens)
|
|
216
171
|
|
|
217
|
-
def
|
|
218
|
-
if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
|
|
219
|
-
return self.lollms_embed(text)
|
|
220
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
|
|
221
|
-
return self.ollama_embed(text)
|
|
222
|
-
else:
|
|
223
|
-
return #not implemented
|
|
224
|
-
|
|
225
|
-
def ollama_embed(self, text, **kwargs):
|
|
172
|
+
def get_model_details(self) -> dict:
|
|
226
173
|
"""
|
|
227
|
-
Get
|
|
228
|
-
|
|
229
|
-
Args:
|
|
230
|
-
text (str or List[str]): Input text to embed
|
|
231
|
-
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
232
|
-
|
|
174
|
+
Get model information from the active binding.
|
|
175
|
+
|
|
233
176
|
Returns:
|
|
234
|
-
dict:
|
|
177
|
+
dict: Model information dictionary.
|
|
235
178
|
"""
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
url = f"{self.base_url}/api/embed"
|
|
239
|
-
|
|
240
|
-
# Prepare the request payload
|
|
241
|
-
payload = {
|
|
242
|
-
"input": text,
|
|
243
|
-
"model": kwargs.get("model", "llama2") # default model
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
# Add optional parameters if provided
|
|
247
|
-
if "truncate" in kwargs:
|
|
248
|
-
payload["truncate"] = kwargs["truncate"]
|
|
249
|
-
if "options" in kwargs:
|
|
250
|
-
payload["options"] = kwargs["options"]
|
|
251
|
-
if "keep_alive" in kwargs:
|
|
252
|
-
payload["keep_alive"] = kwargs["keep_alive"]
|
|
253
|
-
|
|
254
|
-
try:
|
|
255
|
-
response = requests.post(url, json=payload)
|
|
256
|
-
response.raise_for_status() # Raise exception for bad status codes
|
|
257
|
-
return response.json()
|
|
258
|
-
except requests.exceptions.RequestException as e:
|
|
259
|
-
raise Exception(f"Embedding request failed: {str(e)}")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def lollms_embed(self, texts, **kwargs):
|
|
263
|
-
api_key = kwargs.pop("api_key", None)
|
|
264
|
-
headers = (
|
|
265
|
-
{"Content-Type": "application/json", "Authorization": api_key}
|
|
266
|
-
if api_key
|
|
267
|
-
else {"Content-Type": "application/json"}
|
|
268
|
-
)
|
|
269
|
-
embeddings = []
|
|
270
|
-
for text in texts:
|
|
271
|
-
request_data = {"text": text}
|
|
272
|
-
response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
|
|
273
|
-
response.raise_for_status()
|
|
274
|
-
result = response.json()
|
|
275
|
-
embeddings.append(result["vector"])
|
|
276
|
-
return np.array(embeddings)
|
|
179
|
+
return self.binding.get_model_info()
|
|
277
180
|
|
|
278
|
-
def
|
|
279
|
-
if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
|
|
280
|
-
return self.lollms_generate_with_images(prompt, images, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
|
|
281
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
|
|
282
|
-
return self.openai_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
|
|
283
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
|
|
284
|
-
return self.ollama_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
|
|
285
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
|
|
286
|
-
return # To be implemented #self.litellm_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
def generate(self, prompt, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None, completion_format = ELF_COMPLETION_FORMAT.Chat):
|
|
290
|
-
if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
|
|
291
|
-
return self.lollms_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
|
|
292
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
|
|
293
|
-
return self.openai_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
|
|
294
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
|
|
295
|
-
return self.ollama_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
|
|
296
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
|
|
297
|
-
return self.litellm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
|
|
298
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.VLLM:
|
|
299
|
-
return self.vllm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
|
|
300
|
-
|
|
301
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
|
|
302
|
-
return self.transformers_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
def generate_text(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
|
|
306
|
-
return self.lollms_generate(prompt, host_address, model_name, personality, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
|
|
307
|
-
|
|
308
|
-
def lollms_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
|
|
309
|
-
# Set default values to instance variables if optional arguments are None
|
|
310
|
-
host_address = host_address if host_address else self.host_address
|
|
311
|
-
model_name = model_name if model_name else self.model_name
|
|
312
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
313
|
-
personality = personality if personality is not None else self.personality
|
|
314
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
315
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
316
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
317
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
318
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
319
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
320
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
321
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
url = f"{host_address}/lollms_generate"
|
|
325
|
-
if service_key!="":
|
|
326
|
-
headers = {
|
|
327
|
-
'Content-Type': 'application/json;',
|
|
328
|
-
'Authorization': f'Bearer {service_key}',
|
|
329
|
-
}
|
|
330
|
-
else:
|
|
331
|
-
headers = {
|
|
332
|
-
'Content-Type': 'application/json',
|
|
333
|
-
}
|
|
334
|
-
data = {
|
|
335
|
-
"prompt": prompt,
|
|
336
|
-
"model_name": self.model_name,
|
|
337
|
-
"personality": self.personality,
|
|
338
|
-
"n_predict": n_predict,
|
|
339
|
-
"stream": stream,
|
|
340
|
-
"temperature": self.temperature,
|
|
341
|
-
"top_k": self.top_k,
|
|
342
|
-
"top_p": self.top_p,
|
|
343
|
-
"repeat_penalty": repeat_penalty,
|
|
344
|
-
"repeat_last_n": repeat_last_n,
|
|
345
|
-
"seed": seed,
|
|
346
|
-
"n_threads": n_threads
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
response = requests.post(url, json=data, headers=headers, stream=stream)
|
|
350
|
-
if not stream:
|
|
351
|
-
if response.status_code == 200:
|
|
352
|
-
try:
|
|
353
|
-
text = response.text.strip().rstrip('!')
|
|
354
|
-
return text
|
|
355
|
-
except Exception as ex:
|
|
356
|
-
return {"status": False, "error": str(ex)}
|
|
357
|
-
else:
|
|
358
|
-
return {"status": False, "error": response.text}
|
|
359
|
-
else:
|
|
360
|
-
text = ""
|
|
361
|
-
if response.status_code==200:
|
|
362
|
-
try:
|
|
363
|
-
for line in response.iter_lines():
|
|
364
|
-
chunk = line.decode("utf-8")
|
|
365
|
-
text += chunk
|
|
366
|
-
if streaming_callback:
|
|
367
|
-
streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
368
|
-
return text.rstrip('!')
|
|
369
|
-
except Exception as ex:
|
|
370
|
-
return {"status": False, "error": str(ex)}
|
|
371
|
-
else:
|
|
372
|
-
return {"status": False, "error": response.text}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
def lollms_generate_with_images(
|
|
376
|
-
self,
|
|
377
|
-
prompt: str,
|
|
378
|
-
images: List[str],
|
|
379
|
-
host_address: Optional[str] = None,
|
|
380
|
-
model_name: Optional[str] = None,
|
|
381
|
-
personality: Optional[str] = None,
|
|
382
|
-
n_predict: Optional[int] = None,
|
|
383
|
-
stream: bool = False,
|
|
384
|
-
temperature: float = 0.1,
|
|
385
|
-
top_k: int = 50,
|
|
386
|
-
top_p: float = 0.95,
|
|
387
|
-
repeat_penalty: float = 0.8,
|
|
388
|
-
repeat_last_n: int = 40,
|
|
389
|
-
seed: Optional[int] = None,
|
|
390
|
-
n_threads: int = 8,
|
|
391
|
-
service_key: str = "",
|
|
392
|
-
streaming_callback: Optional[Callable[[str, int], None]] = None
|
|
393
|
-
) -> Union[str, dict]:
|
|
181
|
+
def switch_model(self, model_name: str) -> bool:
|
|
394
182
|
"""
|
|
395
|
-
|
|
183
|
+
Load a new model in the active binding.
|
|
396
184
|
|
|
397
185
|
Args:
|
|
398
|
-
|
|
399
|
-
images (List[str]): A list of file paths to images to be included in the generation.
|
|
400
|
-
host_address (Optional[str]): The host address for the service. Defaults to instance variable.
|
|
401
|
-
model_name (Optional[str]): The model name to use. Defaults to instance variable.
|
|
402
|
-
personality (Optional[str]): The personality setting for the generation. Defaults to instance variable.
|
|
403
|
-
n_predict (Optional[int]): The number of tokens to predict. Defaults to instance variable.
|
|
404
|
-
stream (bool): Whether to stream the response. Defaults to False.
|
|
405
|
-
temperature (float): Sampling temperature. Defaults to 0.1.
|
|
406
|
-
top_k (int): Top-k sampling parameter. Defaults to 50.
|
|
407
|
-
top_p (float): Top-p (nucleus) sampling parameter. Defaults to 0.95.
|
|
408
|
-
repeat_penalty (float): Penalty for repeating tokens. Defaults to 0.8.
|
|
409
|
-
repeat_last_n (int): Number of last tokens to consider for repeat penalty. Defaults to 40.
|
|
410
|
-
seed (Optional[int]): Random seed for generation. Defaults to instance variable.
|
|
411
|
-
n_threads (int): Number of threads to use. Defaults to 8.
|
|
412
|
-
service_key (str): Optional service key for authorization.
|
|
413
|
-
streaming_callback (Optional[Callable[[str, int], None]]): Callback for streaming responses.
|
|
186
|
+
model_name (str): Name of the model to load.
|
|
414
187
|
|
|
415
188
|
Returns:
|
|
416
|
-
|
|
189
|
+
bool: True if model loaded successfully, False otherwise.
|
|
417
190
|
"""
|
|
418
|
-
|
|
419
|
-
# Set default values to instance variables if optional arguments are None
|
|
420
|
-
host_address = host_address if host_address else self.host_address
|
|
421
|
-
model_name = model_name if model_name else self.model_name
|
|
422
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
423
|
-
personality = personality if personality is not None else self.personality
|
|
424
|
-
|
|
425
|
-
# Set parameters to instance variables if they are not provided or None
|
|
426
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
427
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
428
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
429
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
430
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
431
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
432
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
433
|
-
|
|
434
|
-
def encode_image_to_base64(image_path: str) -> str:
|
|
435
|
-
"""Encodes an image file to a base64 string."""
|
|
436
|
-
with open(image_path, "rb") as image_file:
|
|
437
|
-
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
|
|
438
|
-
return encoded_string
|
|
439
|
-
|
|
440
|
-
# Encode images in base64
|
|
441
|
-
encoded_images = [encode_image_to_base64(image) for image in images]
|
|
442
|
-
|
|
443
|
-
url = f"{host_address}/lollms_generate_with_images"
|
|
444
|
-
headers = {
|
|
445
|
-
'Content-Type': 'application/json',
|
|
446
|
-
'Authorization': f'Bearer {service_key}' if service_key else '',
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
data = {
|
|
450
|
-
"prompt": prompt,
|
|
451
|
-
"model_name": model_name,
|
|
452
|
-
"personality": personality,
|
|
453
|
-
"n_predict": n_predict,
|
|
454
|
-
"stream": stream,
|
|
455
|
-
"temperature": temperature,
|
|
456
|
-
"top_k": top_k,
|
|
457
|
-
"top_p": top_p,
|
|
458
|
-
"repeat_penalty": repeat_penalty,
|
|
459
|
-
"repeat_last_n": repeat_last_n,
|
|
460
|
-
"seed": seed,
|
|
461
|
-
"n_threads": n_threads,
|
|
462
|
-
"images": encoded_images # Add encoded images to the request payload
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
response = requests.post(url, json=data, headers=headers, stream=stream)
|
|
466
|
-
if not stream:
|
|
467
|
-
if response.status_code == 200:
|
|
468
|
-
try:
|
|
469
|
-
text = response.text.rstrip('!')
|
|
470
|
-
return text
|
|
471
|
-
except Exception as ex:
|
|
472
|
-
return {"status": False, "error": str(ex)}
|
|
473
|
-
else:
|
|
474
|
-
return {"status": False, "error": response.text}
|
|
475
|
-
else:
|
|
476
|
-
text = ""
|
|
477
|
-
if response.status_code == 200:
|
|
478
|
-
try:
|
|
479
|
-
for line in response.iter_lines():
|
|
480
|
-
chunk = line.decode("utf-8")
|
|
481
|
-
text += chunk
|
|
482
|
-
if streaming_callback:
|
|
483
|
-
streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
484
|
-
if text[0] == '"':
|
|
485
|
-
text = text[1:]
|
|
486
|
-
if text[-1] == '"':
|
|
487
|
-
text = text[:-1]
|
|
488
|
-
return text
|
|
489
|
-
except Exception as ex:
|
|
490
|
-
return {"status": False, "error": str(ex)}
|
|
491
|
-
else:
|
|
492
|
-
return {"status": False, "error": response.text}
|
|
493
|
-
|
|
191
|
+
return self.binding.load_model(model_name)
|
|
494
192
|
|
|
495
|
-
def
|
|
496
|
-
# Set default values to instance variables if optional arguments are None
|
|
497
|
-
model_name = model_name if model_name else self.model_name
|
|
498
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
499
|
-
personality = personality if personality is not None else self.personality
|
|
500
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
501
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
502
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
503
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
504
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
505
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
506
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
507
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
508
|
-
|
|
509
|
-
self.generation_config.max_new_tokens = int(n_predict)
|
|
510
|
-
self.generation_config.temperature = float(temperature)
|
|
511
|
-
self.generation_config.top_k = int(top_k)
|
|
512
|
-
self.generation_config.top_p = float(top_p)
|
|
513
|
-
self.generation_config.repetition_penalty = float(repeat_penalty)
|
|
514
|
-
self.generation_config.do_sample = True if float(temperature)>0 else False
|
|
515
|
-
self.generation_config.pad_token_id = self.tokenizer.pad_token_id
|
|
516
|
-
self.generation_config.eos_token_id = self.tokenizer.eos_token_id
|
|
517
|
-
self.generation_config.output_attentions = False
|
|
518
|
-
|
|
519
|
-
try:
|
|
520
|
-
input_ids = self.tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
|
|
521
|
-
class StreamerClass:
|
|
522
|
-
def __init__(self, tokenizer, callback):
|
|
523
|
-
self.output = ""
|
|
524
|
-
self.skip_prompt = True
|
|
525
|
-
self.decode_kwargs = {}
|
|
526
|
-
self.tokenizer = tokenizer
|
|
527
|
-
|
|
528
|
-
# variables used in the streaming process
|
|
529
|
-
self.token_cache = []
|
|
530
|
-
self.print_len = 0
|
|
531
|
-
self.next_tokens_are_prompt = True
|
|
532
|
-
self.callback = callback
|
|
533
|
-
def put(self, value):
|
|
534
|
-
"""
|
|
535
|
-
Recives tokens, decodes them, and prints them to stdout as soon as they form entire words.
|
|
536
|
-
"""
|
|
537
|
-
if len(value.shape)==1 and (value[0] == self.tokenizer.eos_token_id or value[0] == self.tokenizer.bos_token_id):
|
|
538
|
-
print("eos detected")
|
|
539
|
-
return
|
|
540
|
-
if len(value.shape) > 1 and value.shape[0] > 1:
|
|
541
|
-
raise ValueError("TextStreamer only supports batch size 1")
|
|
542
|
-
elif len(value.shape) > 1:
|
|
543
|
-
value = value[0]
|
|
544
|
-
|
|
545
|
-
if self.skip_prompt and self.next_tokens_are_prompt:
|
|
546
|
-
self.next_tokens_are_prompt = False
|
|
547
|
-
return
|
|
548
|
-
|
|
549
|
-
# Add the new token to the cache and decodes the entire thing.
|
|
550
|
-
self.token_cache.extend(value.tolist())
|
|
551
|
-
text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
|
|
552
|
-
|
|
553
|
-
# After the symbol for a new line, we flush the cache.
|
|
554
|
-
if text.endswith("\n"):
|
|
555
|
-
printable_text = text[self.print_len :]
|
|
556
|
-
self.token_cache = []
|
|
557
|
-
self.print_len = 0
|
|
558
|
-
# If the last token is a CJK character, we print the characters.
|
|
559
|
-
elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
|
|
560
|
-
printable_text = text[self.print_len :]
|
|
561
|
-
self.print_len += len(printable_text)
|
|
562
|
-
# Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
|
|
563
|
-
# which may change with the subsequent token -- there are probably smarter ways to do this!)
|
|
564
|
-
else:
|
|
565
|
-
printable_text = text[self.print_len : text.rfind(" ") + 1]
|
|
566
|
-
self.print_len += len(printable_text)
|
|
567
|
-
|
|
568
|
-
self.output += printable_text
|
|
569
|
-
if self.callback:
|
|
570
|
-
if not self.callback(printable_text, 0):
|
|
571
|
-
raise Exception("canceled")
|
|
572
|
-
|
|
573
|
-
def _is_chinese_char(self, cp):
|
|
574
|
-
"""Checks whether CP is the codepoint of a CJK character."""
|
|
575
|
-
# This defines a "chinese character" as anything in the CJK Unicode block:
|
|
576
|
-
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
|
|
577
|
-
#
|
|
578
|
-
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
|
|
579
|
-
# despite its name. The modern Korean Hangul alphabet is a different block,
|
|
580
|
-
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
|
|
581
|
-
# space-separated words, so they are not treated specially and handled
|
|
582
|
-
# like the all of the other languages.
|
|
583
|
-
if (
|
|
584
|
-
(cp >= 0x4E00 and cp <= 0x9FFF)
|
|
585
|
-
or (cp >= 0x3400 and cp <= 0x4DBF) #
|
|
586
|
-
or (cp >= 0x20000 and cp <= 0x2A6DF) #
|
|
587
|
-
or (cp >= 0x2A700 and cp <= 0x2B73F) #
|
|
588
|
-
or (cp >= 0x2B740 and cp <= 0x2B81F) #
|
|
589
|
-
or (cp >= 0x2B820 and cp <= 0x2CEAF) #
|
|
590
|
-
or (cp >= 0xF900 and cp <= 0xFAFF)
|
|
591
|
-
or (cp >= 0x2F800 and cp <= 0x2FA1F) #
|
|
592
|
-
): #
|
|
593
|
-
return True
|
|
594
|
-
|
|
595
|
-
return False
|
|
596
|
-
def end(self):
|
|
597
|
-
"""Flushes any remaining cache and prints a newline to stdout."""
|
|
598
|
-
# Flush the cache, if it exists
|
|
599
|
-
if len(self.token_cache) > 0:
|
|
600
|
-
text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
|
|
601
|
-
printable_text = text[self.print_len :]
|
|
602
|
-
self.token_cache = []
|
|
603
|
-
self.print_len = 0
|
|
604
|
-
else:
|
|
605
|
-
printable_text = ""
|
|
606
|
-
|
|
607
|
-
self.next_tokens_are_prompt = True
|
|
608
|
-
if self.callback:
|
|
609
|
-
if self.callback(printable_text, 0):
|
|
610
|
-
raise Exception("canceled")
|
|
611
|
-
streamer = StreamerClass(self.tokenizer, streaming_callback)
|
|
612
|
-
self.generate(
|
|
613
|
-
inputs=input_ids,
|
|
614
|
-
generation_config=self.generation_config,
|
|
615
|
-
streamer = streamer,
|
|
616
|
-
)
|
|
617
|
-
return streamer.output.rstrip('!')
|
|
618
|
-
except Exception as ex:
|
|
619
|
-
return {"status": False, "error": str(ex)}
|
|
620
|
-
|
|
621
|
-
def openai_generate(self,
|
|
622
|
-
prompt,
|
|
623
|
-
host_address=None,
|
|
624
|
-
model_name=None,
|
|
625
|
-
personality=None,
|
|
626
|
-
n_predict=None,
|
|
627
|
-
stream=False,
|
|
628
|
-
temperature=0.1,
|
|
629
|
-
top_k=50,
|
|
630
|
-
top_p=0.95,
|
|
631
|
-
repeat_penalty=0.8,
|
|
632
|
-
repeat_last_n=40,
|
|
633
|
-
seed=None,
|
|
634
|
-
n_threads=8,
|
|
635
|
-
completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
|
|
636
|
-
service_key: str = "",
|
|
637
|
-
streaming_callback=None):
|
|
193
|
+
def get_available_bindings(self) -> List[str]:
|
|
638
194
|
"""
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
Parameters:
|
|
642
|
-
prompt (str): The input text prompt to generate completions for.
|
|
643
|
-
host_address (str, optional): The API host address. Defaults to instance variable.
|
|
644
|
-
model_name (str, optional): The model to use for generation. Defaults to instance variable.
|
|
645
|
-
personality (str, optional): The personality setting for the model. Defaults to instance variable.
|
|
646
|
-
n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
|
|
647
|
-
stream (bool, optional): Whether to stream the response. Defaults to False.
|
|
648
|
-
temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
|
|
649
|
-
top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
|
|
650
|
-
top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
|
|
651
|
-
repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
|
|
652
|
-
repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
|
|
653
|
-
seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
|
|
654
|
-
n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
|
|
655
|
-
completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
|
|
656
|
-
service_key (str, optional): The API service key for authorization. Defaults to an empty string.
|
|
657
|
-
streaming_callback (callable, optional): A callback function to handle streaming responses.
|
|
195
|
+
Get list of available bindings.
|
|
658
196
|
|
|
659
197
|
Returns:
|
|
660
|
-
str:
|
|
198
|
+
List[str]: List of binding names that can be used.
|
|
661
199
|
"""
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
output= ""
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
if "vision" in self.model_name:
|
|
682
|
-
messages = [
|
|
683
|
-
{
|
|
684
|
-
"role": "user",
|
|
685
|
-
"content": [
|
|
686
|
-
{
|
|
687
|
-
"type":"text",
|
|
688
|
-
"text":prompt
|
|
689
|
-
}
|
|
690
|
-
]
|
|
691
|
-
}
|
|
692
|
-
]
|
|
693
|
-
else:
|
|
694
|
-
messages = [{"role": "user", "content": prompt}]
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
if completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
698
|
-
if "o1" in self.model_name:
|
|
699
|
-
chat_completion = self.client.chat.completions.create(
|
|
700
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
701
|
-
messages=messages,
|
|
702
|
-
n=1, # Specify the number of responses you want
|
|
703
|
-
)
|
|
704
|
-
output = chat_completion.choices[0].message.content
|
|
705
|
-
else:
|
|
706
|
-
chat_completion = self.client.chat.completions.create(
|
|
707
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
708
|
-
messages=messages,
|
|
709
|
-
max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
|
|
710
|
-
n=1, # Specify the number of responses you want
|
|
711
|
-
temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
|
|
712
|
-
stream=True)
|
|
713
|
-
|
|
714
|
-
for resp in chat_completion:
|
|
715
|
-
if count >= n_predict:
|
|
716
|
-
break
|
|
717
|
-
try:
|
|
718
|
-
word = resp.choices[0].delta.content
|
|
719
|
-
except Exception as ex:
|
|
720
|
-
word = ""
|
|
721
|
-
if streaming_callback is not None:
|
|
722
|
-
if not streaming_callback(word):
|
|
723
|
-
break
|
|
724
|
-
if word:
|
|
725
|
-
output += word
|
|
726
|
-
count += 1
|
|
727
|
-
else:
|
|
728
|
-
completion = self.client.completions.create(
|
|
729
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
730
|
-
prompt=prompt,
|
|
731
|
-
max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
|
|
732
|
-
n=1, # Specify the number of responses you want
|
|
733
|
-
temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
|
|
734
|
-
stream=True)
|
|
735
|
-
|
|
736
|
-
for resp in completion:
|
|
737
|
-
if count >= n_predict:
|
|
738
|
-
break
|
|
739
|
-
try:
|
|
740
|
-
word = resp.choices[0].text
|
|
741
|
-
except Exception as ex:
|
|
742
|
-
word = ""
|
|
743
|
-
if streaming_callback is not None:
|
|
744
|
-
if not streaming_callback(word):
|
|
745
|
-
break
|
|
746
|
-
if word:
|
|
747
|
-
output += word
|
|
748
|
-
count += 1
|
|
749
|
-
|
|
750
|
-
return output
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
def vllm_generate(self,
|
|
754
|
-
prompt,
|
|
755
|
-
host_address=None,
|
|
756
|
-
model_name=None,
|
|
757
|
-
personality=None,
|
|
758
|
-
n_predict=None,
|
|
759
|
-
stream=False,
|
|
760
|
-
temperature=0.1,
|
|
761
|
-
top_k=50,
|
|
762
|
-
top_p=0.95,
|
|
763
|
-
repeat_penalty=0.8,
|
|
764
|
-
repeat_last_n=40,
|
|
765
|
-
seed=None,
|
|
766
|
-
n_threads=8,
|
|
767
|
-
completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Instruct,
|
|
768
|
-
service_key: str = "",
|
|
769
|
-
streaming_callback=None):
|
|
200
|
+
return self.binding_manager.get_available_bindings()
|
|
201
|
+
|
|
202
|
+
def generate_text(self,
|
|
203
|
+
prompt: str,
|
|
204
|
+
images: Optional[List[str]] = None,
|
|
205
|
+
n_predict: Optional[int] = None,
|
|
206
|
+
stream: Optional[bool] = None,
|
|
207
|
+
temperature: Optional[float] = None,
|
|
208
|
+
top_k: Optional[int] = None,
|
|
209
|
+
top_p: Optional[float] = None,
|
|
210
|
+
repeat_penalty: Optional[float] = None,
|
|
211
|
+
repeat_last_n: Optional[int] = None,
|
|
212
|
+
seed: Optional[int] = None,
|
|
213
|
+
n_threads: Optional[int] = None,
|
|
214
|
+
ctx_size: int | None = None,
|
|
215
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
|
|
770
216
|
"""
|
|
771
|
-
|
|
217
|
+
Generate text using the active binding, using instance defaults if parameters are not provided.
|
|
772
218
|
|
|
773
|
-
|
|
774
|
-
prompt (str): The input
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
streaming_callback (callable, optional): A callback function to handle streaming responses.
|
|
219
|
+
Args:
|
|
220
|
+
prompt (str): The input prompt for text generation.
|
|
221
|
+
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
222
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
223
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
224
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
225
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
226
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
227
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
228
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
229
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
230
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
231
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
232
|
+
Uses instance default if None.
|
|
233
|
+
- First parameter (str): The chunk of text received from the stream.
|
|
234
|
+
- Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
790
235
|
|
|
791
236
|
Returns:
|
|
792
|
-
str:
|
|
237
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
793
238
|
"""
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
headers = {
|
|
810
|
-
'Content-Type': 'application/json',
|
|
811
|
-
'Authorization': f'Bearer {service_key}',
|
|
812
|
-
}
|
|
813
|
-
else:
|
|
814
|
-
headers = {
|
|
815
|
-
'Content-Type': 'application/json',
|
|
816
|
-
}
|
|
239
|
+
return self.binding.generate_text(
|
|
240
|
+
prompt=prompt,
|
|
241
|
+
images=images,
|
|
242
|
+
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
243
|
+
stream=stream if stream is not None else self.default_stream,
|
|
244
|
+
temperature=temperature if temperature is not None else self.default_temperature,
|
|
245
|
+
top_k=top_k if top_k is not None else self.default_top_k,
|
|
246
|
+
top_p=top_p if top_p is not None else self.default_top_p,
|
|
247
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
248
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
249
|
+
seed=seed if seed is not None else self.default_seed,
|
|
250
|
+
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
251
|
+
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
252
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
253
|
+
)
|
|
817
254
|
|
|
818
|
-
if completion_format == ELF_COMPLETION_FORMAT.Instruct:
|
|
819
|
-
data = {
|
|
820
|
-
'model': model_name,
|
|
821
|
-
'prompt': prompt,
|
|
822
|
-
"stream": True,
|
|
823
|
-
"temperature": float(temperature),
|
|
824
|
-
"max_tokens": n_predict
|
|
825
|
-
}
|
|
826
|
-
completion_format_path = "/v1/completions"
|
|
827
|
-
elif completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
828
|
-
data = {
|
|
829
|
-
'model': model_name,
|
|
830
|
-
'messages': [{
|
|
831
|
-
'role': "user",
|
|
832
|
-
'content': prompt
|
|
833
|
-
}],
|
|
834
|
-
"stream": True,
|
|
835
|
-
"temperature": float(temperature),
|
|
836
|
-
"max_tokens": n_predict
|
|
837
|
-
}
|
|
838
|
-
completion_format_path = "/v1/chat/completions"
|
|
839
|
-
|
|
840
|
-
if host_address.endswith("/"):
|
|
841
|
-
host_address = host_address[:-1]
|
|
842
|
-
|
|
843
|
-
url = f'{host_address}{completion_format_path}'
|
|
844
|
-
|
|
845
|
-
response = requests.post(url, headers=headers, data=json.dumps(data), stream=True, verify=self.verify_ssl_certificate)
|
|
846
|
-
|
|
847
|
-
if response.status_code == 400:
|
|
848
|
-
try:
|
|
849
|
-
content = response.content.decode("utf8")
|
|
850
|
-
content = json.loads(content)
|
|
851
|
-
self.error(content["error"]["message"])
|
|
852
|
-
return
|
|
853
|
-
except:
|
|
854
|
-
content = response.content.decode("utf8")
|
|
855
|
-
content = json.loads(content)
|
|
856
|
-
self.error(content["message"])
|
|
857
|
-
return
|
|
858
|
-
elif response.status_code == 404:
|
|
859
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
860
|
-
|
|
861
|
-
text = ""
|
|
862
|
-
for line in response.iter_lines():
|
|
863
|
-
decoded = line.decode("utf-8")
|
|
864
|
-
if decoded.startswith("data: "):
|
|
865
|
-
try:
|
|
866
|
-
json_data = json.loads(decoded[5:].strip())
|
|
867
|
-
if completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
868
|
-
try:
|
|
869
|
-
chunk = json_data["choices"][0]["delta"]["content"]
|
|
870
|
-
except:
|
|
871
|
-
chunk = ""
|
|
872
|
-
else:
|
|
873
|
-
chunk = json_data["choices"][0]["text"]
|
|
874
|
-
# Process the JSON data here
|
|
875
|
-
text += chunk
|
|
876
|
-
if streaming_callback:
|
|
877
|
-
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
878
|
-
break
|
|
879
|
-
except:
|
|
880
|
-
break
|
|
881
|
-
else:
|
|
882
|
-
if decoded.startswith("{"):
|
|
883
|
-
for line_ in response.iter_lines():
|
|
884
|
-
decoded += line_.decode("utf-8")
|
|
885
|
-
try:
|
|
886
|
-
json_data = json.loads(decoded)
|
|
887
|
-
if json_data["object"] == "error":
|
|
888
|
-
self.error(json_data["message"])
|
|
889
|
-
break
|
|
890
|
-
except:
|
|
891
|
-
self.error("Couldn't generate text, verify your key or model name")
|
|
892
|
-
else:
|
|
893
|
-
text += decoded
|
|
894
|
-
if streaming_callback:
|
|
895
|
-
if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
896
|
-
break
|
|
897
|
-
return text
|
|
898
255
|
|
|
899
|
-
def
|
|
900
|
-
|
|
901
|
-
images,
|
|
902
|
-
host_address=None,
|
|
903
|
-
model_name=None,
|
|
904
|
-
personality=None,
|
|
905
|
-
n_predict=None,
|
|
906
|
-
stream=False,
|
|
907
|
-
temperature=0.1,
|
|
908
|
-
top_k=50,
|
|
909
|
-
top_p=0.95,
|
|
910
|
-
repeat_penalty=0.8,
|
|
911
|
-
repeat_last_n=40,
|
|
912
|
-
seed=None,
|
|
913
|
-
n_threads=8,
|
|
914
|
-
max_image_width=-1,
|
|
915
|
-
service_key: str = "",
|
|
916
|
-
streaming_callback=None,):
|
|
917
|
-
"""Generates text out of a prompt
|
|
918
|
-
|
|
919
|
-
Args:
|
|
920
|
-
prompt (str): The prompt to use for generation
|
|
921
|
-
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
|
|
922
|
-
callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
|
|
923
|
-
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
|
|
924
|
-
"""
|
|
925
|
-
# Set default values to instance variables if optional arguments are None
|
|
926
|
-
host_address = host_address if host_address else self.host_address
|
|
927
|
-
model_name = model_name if model_name else self.model_name
|
|
928
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
929
|
-
personality = personality if personality is not None else self.personality
|
|
930
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
931
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
932
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
933
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
934
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
935
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
936
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
937
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
938
|
-
|
|
939
|
-
count = 0
|
|
940
|
-
output = ""
|
|
941
|
-
|
|
942
|
-
messages = [
|
|
943
|
-
{
|
|
944
|
-
"role": "user",
|
|
945
|
-
"content": [
|
|
946
|
-
{
|
|
947
|
-
"type":"text",
|
|
948
|
-
"text":prompt
|
|
949
|
-
}
|
|
950
|
-
]+[
|
|
951
|
-
{
|
|
952
|
-
"type": "image_url",
|
|
953
|
-
"image_url": {
|
|
954
|
-
"url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
for image_path in images
|
|
958
|
-
]
|
|
959
|
-
}
|
|
960
|
-
]
|
|
961
|
-
chat_completion = self.client.chat.completions.create(
|
|
962
|
-
model=self.model_name, # Choose the engine according to your OpenAI plan
|
|
963
|
-
messages=messages,
|
|
964
|
-
max_tokens=n_predict, # Adjust the desired length of the generated response
|
|
965
|
-
n=1, # Specify the number of responses you want
|
|
966
|
-
temperature=temperature, # Adjust the temperature for more or less randomness in the output
|
|
967
|
-
stream=True
|
|
968
|
-
)
|
|
969
|
-
|
|
970
|
-
for resp in chat_completion:
|
|
971
|
-
if count >= n_predict:
|
|
972
|
-
break
|
|
973
|
-
try:
|
|
974
|
-
word = resp.choices[0].delta.content
|
|
975
|
-
except Exception as ex:
|
|
976
|
-
word = ""
|
|
977
|
-
if streaming_callback is not None:
|
|
978
|
-
if not streaming_callback(word):
|
|
979
|
-
break
|
|
980
|
-
if word:
|
|
981
|
-
output += word
|
|
982
|
-
count += 1
|
|
983
|
-
return output
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
def ollama_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
|
|
987
|
-
# Set default values to instance variables if optional arguments are None
|
|
988
|
-
host_address = host_address if host_address else self.host_address
|
|
989
|
-
model_name = model_name if model_name else self.model_name
|
|
990
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
991
|
-
personality = personality if personality is not None else self.personality
|
|
992
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
993
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
994
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
995
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
996
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
997
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
998
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
999
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
1000
|
-
|
|
1001
|
-
if service_key!="":
|
|
1002
|
-
headers = {
|
|
1003
|
-
'Content-Type': 'application/json',
|
|
1004
|
-
'Authorization': f'Bearer {service_key}',
|
|
1005
|
-
}
|
|
1006
|
-
else:
|
|
1007
|
-
headers = {
|
|
1008
|
-
'Content-Type': 'application/json',
|
|
1009
|
-
}
|
|
1010
|
-
|
|
1011
|
-
data = {
|
|
1012
|
-
'model':model_name,
|
|
1013
|
-
'prompt': prompt,
|
|
1014
|
-
"stream":stream,
|
|
1015
|
-
"temperature": float(temperature),
|
|
1016
|
-
"max_tokens": n_predict
|
|
1017
|
-
}
|
|
1018
|
-
completion_format_path = "/api/generate"
|
|
1019
|
-
if host_address.endswith("/"):
|
|
1020
|
-
host_address = host_address[:-1]
|
|
1021
|
-
url = f'{host_address}{completion_format_path}'
|
|
1022
|
-
|
|
1023
|
-
response = requests.post(url, json=data, headers=headers)
|
|
1024
|
-
|
|
1025
|
-
if response.status_code==404:
|
|
1026
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
1027
|
-
text = ""
|
|
1028
|
-
if stream:
|
|
1029
|
-
for line in response.iter_lines():
|
|
1030
|
-
decoded = line.decode("utf-8")
|
|
1031
|
-
json_data = json.loads(decoded)
|
|
1032
|
-
chunk = json_data["response"]
|
|
1033
|
-
## Process the JSON data here
|
|
1034
|
-
text +=chunk
|
|
1035
|
-
if streaming_callback:
|
|
1036
|
-
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
1037
|
-
break
|
|
1038
|
-
return text
|
|
1039
|
-
else:
|
|
1040
|
-
return response.json()["response"]
|
|
1041
|
-
|
|
1042
|
-
def ollama_generate_with_images(self,
|
|
1043
|
-
prompt,
|
|
1044
|
-
images,
|
|
1045
|
-
host_address=None,
|
|
1046
|
-
model_name=None,
|
|
1047
|
-
personality=None,
|
|
1048
|
-
n_predict=None,
|
|
1049
|
-
stream=False,
|
|
1050
|
-
temperature=0.1,
|
|
1051
|
-
top_k=50,
|
|
1052
|
-
top_p=0.95,
|
|
1053
|
-
repeat_penalty=0.8,
|
|
1054
|
-
repeat_last_n=40,
|
|
1055
|
-
seed=None,
|
|
1056
|
-
n_threads=8,
|
|
1057
|
-
max_image_width=-1,
|
|
1058
|
-
service_key: str = "",
|
|
1059
|
-
streaming_callback=None,):
|
|
1060
|
-
"""Generates text out of a prompt
|
|
1061
|
-
|
|
1062
|
-
Args:
|
|
1063
|
-
prompt (str): The prompt to use for generation
|
|
1064
|
-
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
|
|
1065
|
-
callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
|
|
1066
|
-
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
|
|
1067
|
-
"""
|
|
1068
|
-
# Set default values to instance variables if optional arguments are None
|
|
1069
|
-
host_address = host_address if host_address else self.host_address
|
|
1070
|
-
model_name = model_name if model_name else self.model_name
|
|
1071
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
1072
|
-
personality = personality if personality is not None else self.personality
|
|
1073
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
1074
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
1075
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
1076
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
1077
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
1078
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
1079
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
1080
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
1081
|
-
if service_key != "":
|
|
1082
|
-
headers = {
|
|
1083
|
-
'Content-Type': 'application/json',
|
|
1084
|
-
'Authorization': f'Bearer {service_key}',
|
|
1085
|
-
}
|
|
1086
|
-
else:
|
|
1087
|
-
headers = {
|
|
1088
|
-
'Content-Type': 'application/json',
|
|
1089
|
-
}
|
|
1090
|
-
|
|
1091
|
-
images_list = []
|
|
1092
|
-
for image in images:
|
|
1093
|
-
images_list.append(f"{encode_image(image, max_image_width)}")
|
|
1094
|
-
|
|
1095
|
-
data = {
|
|
1096
|
-
'model': model_name,
|
|
1097
|
-
'prompt': prompt,
|
|
1098
|
-
'images': images_list,
|
|
1099
|
-
"raw": True,
|
|
1100
|
-
"stream":True,
|
|
1101
|
-
"temperature": float(temperature),
|
|
1102
|
-
"max_tokens": n_predict
|
|
1103
|
-
}
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
data = {
|
|
1107
|
-
'model': model_name,
|
|
1108
|
-
'messages': [
|
|
1109
|
-
{
|
|
1110
|
-
"role": "user",
|
|
1111
|
-
"content": [
|
|
1112
|
-
{
|
|
1113
|
-
"type":"text",
|
|
1114
|
-
"text":prompt
|
|
1115
|
-
}
|
|
1116
|
-
]+[
|
|
1117
|
-
{
|
|
1118
|
-
"type": "image_url",
|
|
1119
|
-
"image_url": {
|
|
1120
|
-
"url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
|
|
1121
|
-
}
|
|
1122
|
-
}
|
|
1123
|
-
for image_path in images
|
|
1124
|
-
]
|
|
1125
|
-
}
|
|
1126
|
-
],
|
|
1127
|
-
"stream": True,
|
|
1128
|
-
"temperature": float(temperature),
|
|
1129
|
-
"max_tokens": n_predict
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
completion_format_path = "/api"
|
|
1133
|
-
|
|
1134
|
-
if host_address.endswith("/"):
|
|
1135
|
-
host_address = host_address[:-1]
|
|
1136
|
-
url = f'{host_address}{completion_format_path}'
|
|
1137
|
-
|
|
1138
|
-
response = requests.post(url, json=data, headers=headers)
|
|
1139
|
-
|
|
1140
|
-
if response.status_code == 400:
|
|
1141
|
-
try:
|
|
1142
|
-
content = response.content.decode("utf8")
|
|
1143
|
-
content = json.loads(content)
|
|
1144
|
-
self.error(content["error"]["message"])
|
|
1145
|
-
return
|
|
1146
|
-
except:
|
|
1147
|
-
content = response.content.decode("utf8")
|
|
1148
|
-
content = json.loads(content)
|
|
1149
|
-
self.error(content["message"])
|
|
1150
|
-
return
|
|
1151
|
-
elif response.status_code == 404:
|
|
1152
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
1153
|
-
|
|
1154
|
-
text = ""
|
|
1155
|
-
for line in response.iter_lines():
|
|
1156
|
-
decoded = line.decode("utf-8")
|
|
1157
|
-
if decoded.startswith("data: "):
|
|
1158
|
-
try:
|
|
1159
|
-
json_data = json.loads(decoded[5:].strip())
|
|
1160
|
-
try:
|
|
1161
|
-
chunk = json_data["choices"][0]["delta"]["content"]
|
|
1162
|
-
except:
|
|
1163
|
-
chunk = ""
|
|
1164
|
-
# Process the JSON data here
|
|
1165
|
-
text += chunk
|
|
1166
|
-
if streaming_callback:
|
|
1167
|
-
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
1168
|
-
break
|
|
1169
|
-
except:
|
|
1170
|
-
break
|
|
1171
|
-
else:
|
|
1172
|
-
if decoded.startswith("{"):
|
|
1173
|
-
for line_ in response.iter_lines():
|
|
1174
|
-
decoded += line_.decode("utf-8")
|
|
1175
|
-
try:
|
|
1176
|
-
json_data = json.loads(decoded)
|
|
1177
|
-
if json_data["object"] == "error":
|
|
1178
|
-
self.error(json_data["message"])
|
|
1179
|
-
break
|
|
1180
|
-
except:
|
|
1181
|
-
self.error("Couldn't generate text, verify your key or model name")
|
|
1182
|
-
else:
|
|
1183
|
-
text += decoded
|
|
1184
|
-
if streaming_callback:
|
|
1185
|
-
if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
1186
|
-
break
|
|
1187
|
-
return text
|
|
1188
|
-
|
|
1189
|
-
def litellm_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
|
|
1190
|
-
# Set default values to instance variables if optional arguments are None
|
|
1191
|
-
host_address = host_address if host_address else self.host_address
|
|
1192
|
-
model_name = model_name if model_name else self.model_name
|
|
1193
|
-
n_predict = n_predict if n_predict else self.n_predict
|
|
1194
|
-
personality = personality if personality is not None else self.personality
|
|
1195
|
-
# Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
|
|
1196
|
-
temperature = temperature if temperature is not None else self.temperature
|
|
1197
|
-
top_k = top_k if top_k is not None else self.top_k
|
|
1198
|
-
top_p = top_p if top_p is not None else self.top_p
|
|
1199
|
-
repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
|
|
1200
|
-
repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
|
|
1201
|
-
seed = seed or self.seed # Use the instance seed if not provided
|
|
1202
|
-
n_threads = n_threads if n_threads else self.n_threads
|
|
1203
|
-
|
|
1204
|
-
if service_key!="":
|
|
1205
|
-
headers = {
|
|
1206
|
-
'Content-Type': 'application/json',
|
|
1207
|
-
'Authorization': f'Bearer {service_key}',
|
|
1208
|
-
}
|
|
1209
|
-
else:
|
|
1210
|
-
headers = {
|
|
1211
|
-
'Content-Type': 'application/json',
|
|
1212
|
-
}
|
|
256
|
+
def embed(self, text):
|
|
257
|
+
self.binding.embed(text)
|
|
1213
258
|
|
|
1214
|
-
data = {
|
|
1215
|
-
'model':model_name,
|
|
1216
|
-
'prompt': prompt,
|
|
1217
|
-
"stream":True,
|
|
1218
|
-
"temperature": float(temperature),
|
|
1219
|
-
"max_tokens": n_predict
|
|
1220
|
-
}
|
|
1221
|
-
completion_format_path = "/api/generate"
|
|
1222
|
-
if host_address.endswith("/"):
|
|
1223
|
-
host_address = host_address[:-1]
|
|
1224
|
-
url = f'{host_address}{completion_format_path}'
|
|
1225
|
-
|
|
1226
|
-
response = requests.post(url, json=data, headers=headers)
|
|
1227
|
-
|
|
1228
|
-
if response.status_code==404:
|
|
1229
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
1230
|
-
text = ""
|
|
1231
|
-
for line in response.iter_lines():
|
|
1232
|
-
decoded = line.decode("utf-8")
|
|
1233
|
-
if decoded.startswith("{"):
|
|
1234
|
-
json_data = json.loads(decoded)
|
|
1235
|
-
if "error" in json_data:
|
|
1236
|
-
self.error(json_data["error"]["message"])
|
|
1237
|
-
break
|
|
1238
|
-
else:
|
|
1239
|
-
text +=decoded
|
|
1240
|
-
if streaming_callback:
|
|
1241
|
-
if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
1242
|
-
break
|
|
1243
|
-
|
|
1244
|
-
return text
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
def lollms_listMountedPersonalities(self, host_address:str=None):
|
|
1248
|
-
host_address = host_address if host_address else self.host_address
|
|
1249
|
-
url = f"{host_address}/list_mounted_personalities"
|
|
1250
|
-
|
|
1251
|
-
response = requests.get(url)
|
|
1252
|
-
|
|
1253
|
-
if response.status_code == 200:
|
|
1254
|
-
try:
|
|
1255
|
-
text = json.loads(response.content.decode("utf-8"))
|
|
1256
|
-
return text
|
|
1257
|
-
except Exception as ex:
|
|
1258
|
-
return {"status": False, "error": str(ex)}
|
|
1259
|
-
else:
|
|
1260
|
-
return {"status": False, "error": response.text}
|
|
1261
|
-
|
|
1262
|
-
def listModels(self, host_address:str=None):
|
|
1263
|
-
if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
|
|
1264
|
-
return self.lollms_listModels(host_address)
|
|
1265
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
|
|
1266
|
-
return self.ollama_listModels(host_address)
|
|
1267
|
-
elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
|
|
1268
|
-
return self.openai_listModels(host_address)
|
|
1269
|
-
|
|
1270
|
-
def lollms_listModels(self, host_address:str=None):
|
|
1271
|
-
host_address = host_address if host_address else self.host_address
|
|
1272
|
-
url = f"{host_address}/list_models"
|
|
1273
|
-
|
|
1274
|
-
response = requests.get(url)
|
|
1275
|
-
|
|
1276
|
-
if response.status_code == 200:
|
|
1277
|
-
try:
|
|
1278
|
-
text = json.loads(response.content.decode("utf-8"))
|
|
1279
|
-
return text
|
|
1280
|
-
except Exception as ex:
|
|
1281
|
-
return {"status": False, "error": str(ex)}
|
|
1282
|
-
else:
|
|
1283
|
-
return {"status": False, "error": response.text}
|
|
1284
|
-
|
|
1285
|
-
def ollama_listModels(self, host_address:str=None):
|
|
1286
|
-
if host_address is None:
|
|
1287
|
-
host_address = self.host_address
|
|
1288
|
-
url = f'{host_address}/api/tags'
|
|
1289
|
-
headers = {
|
|
1290
|
-
'accept': 'application/json',
|
|
1291
|
-
'Authorization': f'Bearer {self.service_key}'
|
|
1292
|
-
}
|
|
1293
|
-
response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
|
|
1294
|
-
try:
|
|
1295
|
-
data = response.json()
|
|
1296
|
-
model_info = []
|
|
1297
|
-
|
|
1298
|
-
for model in data['models']:
|
|
1299
|
-
model_name = model['name']
|
|
1300
|
-
owned_by = ""
|
|
1301
|
-
created_datetime = model["modified_at"]
|
|
1302
|
-
model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
|
|
1303
|
-
|
|
1304
|
-
return model_info
|
|
1305
|
-
except Exception as ex:
|
|
1306
|
-
trace_exception(ex)
|
|
1307
|
-
return []
|
|
1308
|
-
|
|
1309
|
-
def openai_listModels(self, host_address:str=None):
|
|
1310
|
-
if host_address is None:
|
|
1311
|
-
host_address = self.host_address
|
|
1312
|
-
url = f'{host_address}/v1/models'
|
|
1313
|
-
headers = {
|
|
1314
|
-
'accept': 'application/json',
|
|
1315
|
-
'Authorization': f'Bearer {self.service_key}'
|
|
1316
|
-
}
|
|
1317
|
-
response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
|
|
1318
|
-
try:
|
|
1319
|
-
data = response.json()
|
|
1320
|
-
model_info = []
|
|
1321
259
|
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
owned_by = model['owned_by']
|
|
1325
|
-
created_datetime = model["created"]
|
|
1326
|
-
model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
|
|
260
|
+
def listModels(self):
|
|
261
|
+
return self.binding.listModels()
|
|
1327
262
|
|
|
1328
|
-
return model_info
|
|
1329
|
-
except Exception as ex:
|
|
1330
|
-
trace_exception(ex)
|
|
1331
|
-
return []
|
|
1332
263
|
|
|
1333
264
|
|
|
1334
265
|
def generate_codes(
|
|
@@ -1376,11 +307,11 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1376
307
|
{self.ai_full_header}"""
|
|
1377
308
|
|
|
1378
309
|
if len(self.image_files)>0:
|
|
1379
|
-
response = self.
|
|
310
|
+
response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
|
|
1380
311
|
elif len(images)>0:
|
|
1381
|
-
response = self.
|
|
312
|
+
response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
|
|
1382
313
|
else:
|
|
1383
|
-
response = self.
|
|
314
|
+
response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
|
|
1384
315
|
response_full += response
|
|
1385
316
|
codes = self.extract_code_blocks(response)
|
|
1386
317
|
return codes
|
|
@@ -1428,16 +359,13 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1428
359
|
full_prompt += f"""You must return a single code tag.
|
|
1429
360
|
Do not split the code in multiple tags.
|
|
1430
361
|
{self.ai_full_header}"""
|
|
1431
|
-
|
|
1432
|
-
response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
1433
|
-
else:
|
|
1434
|
-
response = self.generate(full_prompt, max_size, False, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
362
|
+
response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
1435
363
|
codes = self.extract_code_blocks(response)
|
|
1436
364
|
if len(codes)>0:
|
|
1437
365
|
if not codes[-1]["is_complete"]:
|
|
1438
366
|
code = "\n".join(codes[-1]["content"].split("\n")[:-1])
|
|
1439
367
|
while not codes[-1]["is_complete"]:
|
|
1440
|
-
response = self.
|
|
368
|
+
response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
1441
369
|
codes = self.extract_code_blocks(response)
|
|
1442
370
|
if len(codes)==0:
|
|
1443
371
|
break
|
|
@@ -1453,103 +381,135 @@ Do not split the code in multiple tags.
|
|
|
1453
381
|
else:
|
|
1454
382
|
return None
|
|
1455
383
|
|
|
1456
|
-
def extract_code_blocks(self, text: str) -> List[dict]:
|
|
384
|
+
def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
|
|
1457
385
|
"""
|
|
1458
|
-
|
|
386
|
+
Extracts code blocks from text in Markdown or HTML format.
|
|
1459
387
|
|
|
1460
388
|
Parameters:
|
|
1461
|
-
text (str): The text
|
|
389
|
+
text (str): The text to extract code blocks from.
|
|
390
|
+
format (str): The format of code blocks ("markdown" for ``` or "html" for <code class="">).
|
|
1462
391
|
|
|
1463
392
|
Returns:
|
|
1464
|
-
List[dict]: A list of dictionaries
|
|
1465
|
-
- 'index' (int):
|
|
1466
|
-
- 'file_name' (str):
|
|
1467
|
-
- 'content' (str):
|
|
1468
|
-
- 'type' (str):
|
|
1469
|
-
- 'is_complete' (bool): True if
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
The function assumes that the number of triple backticks in the text is even.
|
|
1473
|
-
If the number of triple backticks is odd, it will consider the rest of the text as the last code block.
|
|
1474
|
-
"""
|
|
393
|
+
List[dict]: A list of dictionaries with:
|
|
394
|
+
- 'index' (int): Index of the code block.
|
|
395
|
+
- 'file_name' (str): File name from preceding text, if available.
|
|
396
|
+
- 'content' (str): Code block content.
|
|
397
|
+
- 'type' (str): Language type (from Markdown first line or HTML class).
|
|
398
|
+
- 'is_complete' (bool): True if block has a closing tag.
|
|
399
|
+
"""
|
|
400
|
+
code_blocks = []
|
|
1475
401
|
remaining = text
|
|
1476
|
-
bloc_index = 0
|
|
1477
402
|
first_index = 0
|
|
1478
403
|
indices = []
|
|
1479
|
-
while len(remaining) > 0:
|
|
1480
|
-
try:
|
|
1481
|
-
index = remaining.index("```")
|
|
1482
|
-
indices.append(index + first_index)
|
|
1483
|
-
remaining = remaining[index + 3:]
|
|
1484
|
-
first_index += index + 3
|
|
1485
|
-
bloc_index += 1
|
|
1486
|
-
except Exception as ex:
|
|
1487
|
-
if bloc_index % 2 == 1:
|
|
1488
|
-
index = len(remaining)
|
|
1489
|
-
indices.append(index)
|
|
1490
|
-
remaining = ""
|
|
1491
404
|
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
405
|
+
if format.lower() == "markdown":
|
|
406
|
+
# Markdown: Find triple backtick positions
|
|
407
|
+
while remaining:
|
|
408
|
+
try:
|
|
409
|
+
index = remaining.index("```")
|
|
410
|
+
indices.append(index + first_index)
|
|
411
|
+
remaining = remaining[index + 3:]
|
|
412
|
+
first_index += index + 3
|
|
413
|
+
except ValueError:
|
|
414
|
+
if len(indices) % 2 == 1: # Odd number of delimiters
|
|
415
|
+
indices.append(first_index + len(remaining))
|
|
416
|
+
break
|
|
417
|
+
|
|
418
|
+
elif format.lower() == "html":
|
|
419
|
+
# HTML: Find <code> and </code> positions, handling nested tags
|
|
420
|
+
while remaining:
|
|
421
|
+
try:
|
|
422
|
+
# Look for opening <code> tag
|
|
423
|
+
start_index = remaining.index("<code")
|
|
424
|
+
end_of_opening = remaining.index(">", start_index)
|
|
425
|
+
indices.append(start_index + first_index)
|
|
426
|
+
opening_tag = remaining[start_index:end_of_opening + 1]
|
|
427
|
+
remaining = remaining[end_of_opening + 1:]
|
|
428
|
+
first_index += end_of_opening + 1
|
|
429
|
+
|
|
430
|
+
# Look for matching </code>, accounting for nested <code>
|
|
431
|
+
nest_level = 0
|
|
432
|
+
temp_index = 0
|
|
433
|
+
while temp_index < len(remaining):
|
|
434
|
+
if remaining[temp_index:].startswith("<code"):
|
|
435
|
+
nest_level += 1
|
|
436
|
+
temp_index += remaining[temp_index:].index(">") + 1
|
|
437
|
+
elif remaining[temp_index:].startswith("</code>"):
|
|
438
|
+
if nest_level == 0:
|
|
439
|
+
indices.append(first_index + temp_index)
|
|
440
|
+
remaining = remaining[temp_index + len("</code>"):]
|
|
441
|
+
first_index += temp_index + len("</code>")
|
|
442
|
+
break
|
|
443
|
+
nest_level -= 1
|
|
444
|
+
temp_index += len("</code>")
|
|
445
|
+
else:
|
|
446
|
+
temp_index += 1
|
|
447
|
+
else:
|
|
448
|
+
indices.append(first_index + len(remaining))
|
|
449
|
+
break
|
|
450
|
+
except ValueError:
|
|
451
|
+
break
|
|
452
|
+
|
|
453
|
+
else:
|
|
454
|
+
raise ValueError("Format must be 'markdown' or 'html'")
|
|
455
|
+
|
|
456
|
+
for i in range(0, len(indices), 2):
|
|
1495
457
|
block_infos = {
|
|
1496
|
-
'index':
|
|
458
|
+
'index': i // 2,
|
|
1497
459
|
'file_name': "",
|
|
1498
|
-
'section': "",
|
|
1499
460
|
'content': "",
|
|
1500
|
-
'type':
|
|
461
|
+
'type': 'language-specific',
|
|
1501
462
|
'is_complete': False
|
|
1502
463
|
}
|
|
1503
|
-
if is_start:
|
|
1504
|
-
# Check the preceding line for file name
|
|
1505
|
-
preceding_text = text[:code_delimiter_position].strip().splitlines()
|
|
1506
|
-
if preceding_text:
|
|
1507
|
-
last_line = preceding_text[-1].strip()
|
|
1508
|
-
if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
|
|
1509
|
-
file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
|
|
1510
|
-
block_infos['file_name'] = file_name
|
|
1511
|
-
elif last_line.startswith("## filename:"):
|
|
1512
|
-
file_name = last_line[len("## filename:"):].strip()
|
|
1513
|
-
block_infos['file_name'] = file_name
|
|
1514
|
-
if last_line.startswith("<section>") and last_line.endswith("</section>"):
|
|
1515
|
-
section = last_line[len("<section>"):-len("</section>")].strip()
|
|
1516
|
-
block_infos['section'] = section
|
|
1517
|
-
|
|
1518
|
-
sub_text = text[code_delimiter_position + 3:]
|
|
1519
|
-
if len(sub_text) > 0:
|
|
1520
|
-
try:
|
|
1521
|
-
find_space = sub_text.index(" ")
|
|
1522
|
-
except:
|
|
1523
|
-
find_space = int(1e10)
|
|
1524
|
-
try:
|
|
1525
|
-
find_return = sub_text.index("\n")
|
|
1526
|
-
except:
|
|
1527
|
-
find_return = int(1e10)
|
|
1528
|
-
next_index = min(find_return, find_space)
|
|
1529
|
-
if '{' in sub_text[:next_index]:
|
|
1530
|
-
next_index = 0
|
|
1531
|
-
start_pos = next_index
|
|
1532
|
-
if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
|
|
1533
|
-
block_infos["type"] = 'language-specific'
|
|
1534
|
-
else:
|
|
1535
|
-
block_infos["type"] = sub_text[:next_index]
|
|
1536
464
|
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
465
|
+
# Extract preceding text for file name
|
|
466
|
+
start_pos = indices[i]
|
|
467
|
+
preceding_text = text[:start_pos].strip().splitlines()
|
|
468
|
+
if preceding_text:
|
|
469
|
+
last_line = preceding_text[-1].strip()
|
|
470
|
+
if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
|
|
471
|
+
block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
|
|
472
|
+
elif last_line.startswith("## filename:"):
|
|
473
|
+
block_infos['file_name'] = last_line[len("## filename:"):].strip()
|
|
474
|
+
|
|
475
|
+
# Extract content and type
|
|
476
|
+
if format.lower() == "markdown":
|
|
477
|
+
sub_text = text[start_pos + 3:]
|
|
478
|
+
if i + 1 < len(indices):
|
|
479
|
+
end_pos = indices[i + 1]
|
|
480
|
+
content = text[start_pos + 3:end_pos].strip()
|
|
481
|
+
block_infos['is_complete'] = True
|
|
482
|
+
else:
|
|
483
|
+
content = sub_text.strip()
|
|
484
|
+
block_infos['is_complete'] = False
|
|
485
|
+
|
|
486
|
+
if content:
|
|
487
|
+
first_line = content.split('\n', 1)[0].strip()
|
|
488
|
+
if first_line and not first_line.startswith(('{', ' ', '\t')):
|
|
489
|
+
block_infos['type'] = first_line
|
|
490
|
+
content = content[len(first_line):].strip()
|
|
491
|
+
|
|
492
|
+
elif format.lower() == "html":
|
|
493
|
+
opening_tag = text[start_pos:text.index(">", start_pos) + 1]
|
|
494
|
+
sub_text = text[start_pos + len(opening_tag):]
|
|
495
|
+
if i + 1 < len(indices):
|
|
496
|
+
end_pos = indices[i + 1]
|
|
497
|
+
content = text[start_pos + len(opening_tag):end_pos].strip()
|
|
498
|
+
block_infos['is_complete'] = True
|
|
499
|
+
else:
|
|
500
|
+
content = sub_text.strip()
|
|
501
|
+
block_infos['is_complete'] = False
|
|
502
|
+
|
|
503
|
+
# Extract language from class attribute
|
|
504
|
+
if 'class="' in opening_tag:
|
|
505
|
+
class_start = opening_tag.index('class="') + len('class="')
|
|
506
|
+
class_end = opening_tag.index('"', class_start)
|
|
507
|
+
class_value = opening_tag[class_start:class_end]
|
|
508
|
+
if class_value.startswith("language-"):
|
|
509
|
+
block_infos['type'] = class_value[len("language-"):]
|
|
510
|
+
|
|
511
|
+
block_infos['content'] = content
|
|
512
|
+
code_blocks.append(block_infos)
|
|
1553
513
|
|
|
1554
514
|
return code_blocks
|
|
1555
515
|
|
|
@@ -1631,7 +591,7 @@ Do not split the code in multiple tags.
|
|
|
1631
591
|
}
|
|
1632
592
|
"""
|
|
1633
593
|
|
|
1634
|
-
response = self.
|
|
594
|
+
response = self.generate_text_code(
|
|
1635
595
|
prompt=prompt,
|
|
1636
596
|
template=template,
|
|
1637
597
|
language="json",
|
|
@@ -1699,7 +659,7 @@ Do not split the code in multiple tags.
|
|
|
1699
659
|
else:
|
|
1700
660
|
prompt += "{\"index\": (the selected answer index)}"
|
|
1701
661
|
|
|
1702
|
-
response = self.
|
|
662
|
+
response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
|
|
1703
663
|
accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
|
|
1704
664
|
|
|
1705
665
|
try:
|
|
@@ -1761,7 +721,7 @@ Do not split the code in multiple tags.
|
|
|
1761
721
|
else:
|
|
1762
722
|
prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
|
|
1763
723
|
|
|
1764
|
-
response = self.
|
|
724
|
+
response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
|
|
1765
725
|
|
|
1766
726
|
try:
|
|
1767
727
|
result = json.loads(response)
|
|
@@ -1883,7 +843,7 @@ Do not discuss the information inside thememory, just put the relevant informati
|
|
|
1883
843
|
ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
|
|
1884
844
|
ASCIIColors.red(prompt)
|
|
1885
845
|
|
|
1886
|
-
memory = self.
|
|
846
|
+
memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
|
|
1887
847
|
code = self.extract_code_blocks(memory)
|
|
1888
848
|
if code:
|
|
1889
849
|
memory=code[0]["content"]
|
|
@@ -1919,171 +879,206 @@ The updated memory must be put in a {chunk_processing_output_format} markdown ta
|
|
|
1919
879
|
|
|
1920
880
|
# Generate final summary
|
|
1921
881
|
final_prompt = final_prompt_template
|
|
1922
|
-
memory = self.
|
|
882
|
+
memory = self.generate_text(final_prompt, streaming_callback=callback)
|
|
1923
883
|
code = self.extract_code_blocks(memory)
|
|
1924
884
|
if code:
|
|
1925
885
|
memory=code[0]["content"]
|
|
1926
886
|
return memory
|
|
1927
887
|
|
|
1928
|
-
def deepsearch(
|
|
1929
|
-
self,
|
|
1930
|
-
query: str,
|
|
1931
|
-
text: str = None,
|
|
1932
|
-
files: list = None,
|
|
1933
|
-
search_prompt: str = "Extract information related to the query from the current text chunk and update the memory with new findings.",
|
|
1934
|
-
aggregation_prompt: str = None,
|
|
1935
|
-
output_format: str = "markdown",
|
|
1936
|
-
ctx_size: int = None,
|
|
1937
|
-
chunk_size: int = None,
|
|
1938
|
-
bootstrap_chunk_size: int = None,
|
|
1939
|
-
bootstrap_steps: int = None,
|
|
1940
|
-
callback=None,
|
|
1941
|
-
debug: bool = False
|
|
1942
|
-
):
|
|
1943
|
-
"""
|
|
1944
|
-
Searches for specific information related to a query in a long text or a list of files.
|
|
1945
|
-
Processes the input in chunks, updates a memory with relevant findings, and optionally aggregates them.
|
|
1946
|
-
|
|
1947
|
-
Parameters:
|
|
1948
|
-
- query (str): The query to search for.
|
|
1949
|
-
- text (str, optional): The input text to search in. Defaults to None.
|
|
1950
|
-
- files (list, optional): List of file paths to search in. Defaults to None.
|
|
1951
|
-
- search_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard extraction prompt.
|
|
1952
|
-
- aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
|
|
1953
|
-
- output_format (str, optional): Output format. Defaults to "markdown".
|
|
1954
|
-
- ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
|
|
1955
|
-
- chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but is slower.
|
|
1956
|
-
- bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
|
|
1957
|
-
- bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
|
|
1958
|
-
- callback (callable, optional): Function called after each chunk. Defaults to None.
|
|
1959
|
-
- debug (bool, optional): Enable debug output. Defaults to False.
|
|
1960
888
|
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
# Set defaults
|
|
1965
|
-
if ctx_size is None:
|
|
1966
|
-
ctx_size = self.ctx_size
|
|
1967
|
-
if chunk_size is None:
|
|
1968
|
-
chunk_size = ctx_size // 4
|
|
1969
|
-
|
|
1970
|
-
# Prepare input
|
|
1971
|
-
if files:
|
|
1972
|
-
all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
|
|
1973
|
-
elif text:
|
|
1974
|
-
all_texts = [("input_text", text)]
|
|
1975
|
-
else:
|
|
1976
|
-
raise ValueError("Either text or files must be provided.")
|
|
889
|
+
def update_memory_from_file_chunk_prompt(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, memory_template, query, task_prompt):
|
|
890
|
+
return f"""{self.system_full_header}
|
|
891
|
+
You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
|
|
1977
892
|
|
|
1978
|
-
|
|
1979
|
-
memory = ""
|
|
1980
|
-
chunk_id = 0
|
|
893
|
+
Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the JSON template.
|
|
1981
894
|
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
895
|
+
----
|
|
896
|
+
# Current file: {file_name}
|
|
897
|
+
# Chunk number in this file: {file_chunk_id}
|
|
898
|
+
# Global chunk number: {global_chunk_id}
|
|
899
|
+
# Text chunk:
|
|
900
|
+
```markdown
|
|
901
|
+
{chunk}
|
|
902
|
+
```
|
|
903
|
+
{'Current findings memory (cumulative across all files):' if memory!="" else 'Memory template:'}
|
|
904
|
+
```markdown
|
|
905
|
+
{memory if memory!="" else memory_template}
|
|
906
|
+
```
|
|
907
|
+
{self.user_full_header}
|
|
908
|
+
Query: '{query}'
|
|
909
|
+
Task: {task_prompt}
|
|
910
|
+
Update the markdown memory by adding new information from this chunk relevant to the query. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
|
|
911
|
+
Ensure the output is valid markdown matching the structure of the provided template.
|
|
912
|
+
Make sure to extract only information relevant to answering the user's query or providing important contextual information.
|
|
913
|
+
Return the updated markdown memory inside a markdown code block.
|
|
914
|
+
{self.ai_full_header}
|
|
915
|
+
"""
|
|
1985
916
|
|
|
1986
|
-
|
|
917
|
+
def update_memory_from_file_chunk_prompt_markdown(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
|
|
918
|
+
return f"""{self.system_full_header}
|
|
919
|
+
You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
|
|
1987
920
|
|
|
921
|
+
Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the markdown template.
|
|
1988
922
|
|
|
1989
923
|
----
|
|
1990
|
-
#
|
|
924
|
+
# Current file: {file_name}
|
|
925
|
+
# Chunk number in this file: {file_chunk_id}
|
|
926
|
+
# Global chunk number: {global_chunk_id}
|
|
1991
927
|
# Text chunk:
|
|
1992
928
|
```markdown
|
|
1993
|
-
{
|
|
929
|
+
{chunk}
|
|
1994
930
|
```
|
|
1995
|
-
|
|
1996
|
-
Current findings memory:
|
|
931
|
+
Current findings memory (cumulative across all files):
|
|
1997
932
|
```markdown
|
|
1998
|
-
{
|
|
933
|
+
{memory}
|
|
1999
934
|
```
|
|
2000
935
|
{self.user_full_header}
|
|
2001
936
|
Query: '{query}'
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
937
|
+
{'Start Creating a memory from the text chunk in a format adapted to answer the user Query' if memory=="" else 'Update the markdown memory by adding new information from this chunk relevant to the query.'} Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
|
|
938
|
+
{'Ensure the output is valid markdown matching the structure of the current memory' if memory!='' else 'Ensure the output is valid markdown matching the structure of the provided template.'}
|
|
939
|
+
Make sure to extract only information relevant to answering the user's query or providing important contextual information.
|
|
940
|
+
Return the updated markdown memory inside a markdown code block.
|
|
2006
941
|
{self.ai_full_header}
|
|
2007
942
|
"""
|
|
2008
943
|
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
final_prompt = f"""{self.system_full_header}
|
|
2061
|
-
You are a search results aggregator.
|
|
944
|
+
def deep_analyze(
|
|
945
|
+
self,
|
|
946
|
+
query: str,
|
|
947
|
+
text: str = None,
|
|
948
|
+
files: list = None,
|
|
949
|
+
aggregation_prompt: str = None,
|
|
950
|
+
output_format: str = "markdown",
|
|
951
|
+
ctx_size: int = None,
|
|
952
|
+
chunk_size: int = None,
|
|
953
|
+
bootstrap_chunk_size: int = None,
|
|
954
|
+
bootstrap_steps: int = None,
|
|
955
|
+
callback=None,
|
|
956
|
+
debug: bool = False
|
|
957
|
+
):
|
|
958
|
+
"""
|
|
959
|
+
Searches for specific information related to a query in a long text or a list of files.
|
|
960
|
+
Processes each file separately in chunks, updates a shared markdown memory with relevant findings, and optionally aggregates them.
|
|
961
|
+
|
|
962
|
+
Parameters:
|
|
963
|
+
- query (str): The query to search for.
|
|
964
|
+
- text (str, optional): The input text to search in. Defaults to None.
|
|
965
|
+
- files (list, optional): List of file paths to search in. Defaults to None.
|
|
966
|
+
- task_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard markdown extraction prompt.
|
|
967
|
+
- aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
|
|
968
|
+
- output_format (str, optional): Output format. Defaults to "markdown".
|
|
969
|
+
- ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
|
|
970
|
+
- chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but are slower.
|
|
971
|
+
- bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
|
|
972
|
+
- bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
|
|
973
|
+
- callback (callable, optional): Function called after each chunk. Defaults to None.
|
|
974
|
+
- debug (bool, optional): Enable debug output. Defaults to False.
|
|
975
|
+
|
|
976
|
+
Returns:
|
|
977
|
+
- str: The search findings or aggregated output in the specified format.
|
|
978
|
+
"""
|
|
979
|
+
# Set defaults
|
|
980
|
+
if ctx_size is None:
|
|
981
|
+
ctx_size = self.default_ctx_size
|
|
982
|
+
if chunk_size is None:
|
|
983
|
+
chunk_size = ctx_size // 4
|
|
984
|
+
|
|
985
|
+
# Prepare input
|
|
986
|
+
if files:
|
|
987
|
+
all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
|
|
988
|
+
elif text:
|
|
989
|
+
all_texts = [("input_text", text)]
|
|
990
|
+
else:
|
|
991
|
+
raise ValueError("Either text or files must be provided.")
|
|
992
|
+
|
|
993
|
+
# Set default memory template for article analysis if none provided
|
|
994
|
+
memory = ""
|
|
2062
995
|
|
|
996
|
+
# Initialize global chunk counter
|
|
997
|
+
global_chunk_id = 0
|
|
998
|
+
|
|
999
|
+
# Calculate static prompt tokens
|
|
1000
|
+
example_prompt = self.update_memory_from_file_chunk_prompt_markdown("example.txt","0", "0", "", "", query)
|
|
1001
|
+
static_tokens = len(self.tokenize(example_prompt))
|
|
1002
|
+
|
|
1003
|
+
# Process each file separately
|
|
1004
|
+
for file_name, file_text in all_texts:
|
|
1005
|
+
file_tokens = self.tokenize(file_text)
|
|
1006
|
+
start_token_idx = 0
|
|
1007
|
+
file_chunk_id = 0 # Reset chunk counter for each file
|
|
1008
|
+
|
|
1009
|
+
while start_token_idx < len(file_tokens):
|
|
1010
|
+
# Calculate available tokens
|
|
1011
|
+
current_memory_tokens = len(self.tokenize(memory))
|
|
1012
|
+
available_tokens = ctx_size - static_tokens - current_memory_tokens
|
|
1013
|
+
if available_tokens <= 0:
|
|
1014
|
+
raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
|
|
1015
|
+
|
|
1016
|
+
# Adjust chunk size
|
|
1017
|
+
actual_chunk_size = (
|
|
1018
|
+
min(bootstrap_chunk_size, available_tokens)
|
|
1019
|
+
if bootstrap_chunk_size is not None and bootstrap_steps is not None and global_chunk_id < bootstrap_steps
|
|
1020
|
+
else min(chunk_size, available_tokens)
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
|
|
1024
|
+
chunk_tokens = file_tokens[start_token_idx:end_token_idx]
|
|
1025
|
+
chunk = self.detokenize(chunk_tokens)
|
|
1026
|
+
|
|
1027
|
+
# Generate updated memory
|
|
1028
|
+
prompt = self.update_memory_from_file_chunk_prompt_markdown(
|
|
1029
|
+
file_name=file_name,
|
|
1030
|
+
file_chunk_id=file_chunk_id,
|
|
1031
|
+
global_chunk_id=global_chunk_id,
|
|
1032
|
+
chunk=chunk,
|
|
1033
|
+
memory=memory,
|
|
1034
|
+
query=query)
|
|
1035
|
+
if debug:
|
|
1036
|
+
print(f"----- Chunk {file_chunk_id} (Global {global_chunk_id}) from {file_name} ------")
|
|
1037
|
+
print(prompt)
|
|
1038
|
+
|
|
1039
|
+
output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
|
|
1040
|
+
code = self.extract_code_blocks(output)
|
|
1041
|
+
if code:
|
|
1042
|
+
memory = code[0]["content"]
|
|
1043
|
+
else:
|
|
1044
|
+
memory = output
|
|
1045
|
+
|
|
1046
|
+
if debug:
|
|
1047
|
+
ASCIIColors.red("----- Updated Memory ------")
|
|
1048
|
+
ASCIIColors.white(memory)
|
|
1049
|
+
ASCIIColors.red("---------------------------")
|
|
1050
|
+
|
|
1051
|
+
start_token_idx = end_token_idx
|
|
1052
|
+
file_chunk_id += 1
|
|
1053
|
+
global_chunk_id += 1
|
|
1054
|
+
|
|
1055
|
+
# Aggregate findings if requested
|
|
1056
|
+
if aggregation_prompt:
|
|
1057
|
+
final_prompt = f"""{self.system_full_header}
|
|
1058
|
+
You are a search results aggregator.
|
|
2063
1059
|
{self.user_full_header}
|
|
2064
1060
|
{aggregation_prompt}
|
|
2065
|
-
|
|
2066
|
-
Collected findings:
|
|
1061
|
+
Collected findings (across all files):
|
|
2067
1062
|
```markdown
|
|
2068
1063
|
{memory}
|
|
2069
1064
|
```
|
|
2070
|
-
|
|
2071
1065
|
Provide the final output in {output_format} format.
|
|
2072
1066
|
{self.ai_full_header}
|
|
2073
1067
|
"""
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
1068
|
+
final_output = self.generate_text(final_prompt, streaming_callback=callback)
|
|
1069
|
+
code = self.extract_code_blocks(final_output)
|
|
1070
|
+
return code[0]["content"] if code else final_output
|
|
1071
|
+
return memory
|
|
1072
|
+
|
|
2078
1073
|
def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
|
|
2079
1074
|
ASCIIColors.error(content)
|
|
2080
1075
|
|
|
2081
1076
|
|
|
2082
1077
|
|
|
2083
1078
|
if __name__=="__main__":
|
|
2084
|
-
|
|
1079
|
+
lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
|
|
2085
1080
|
#lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
|
|
2086
|
-
lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
|
|
1081
|
+
#lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
|
|
2087
1082
|
print(lc.listModels())
|
|
2088
1083
|
code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
|
|
2089
1084
|
print(code)
|