lollms-client 0.15.2__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/generate_and_speak/generate_and_speak.py +251 -0
- examples/generate_game_sfx/generate_game_fx.py +240 -0
- examples/simple_text_gen_with_image_test.py +8 -8
- examples/text_2_image.py +0 -1
- examples/text_gen.py +1 -1
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
- lollms_client/llm_bindings/lollms/__init__.py +31 -24
- lollms_client/llm_bindings/ollama/__init__.py +47 -27
- lollms_client/llm_bindings/openai/__init__.py +62 -35
- lollms_client/llm_bindings/openllm/__init__.py +4 -1
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
- lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
- lollms_client/llm_bindings/transformers/__init__.py +3 -0
- lollms_client/llm_bindings/vllm/__init__.py +4 -1
- lollms_client/lollms_core.py +65 -33
- lollms_client/lollms_llm_binding.py +76 -22
- lollms_client/lollms_stt_binding.py +3 -15
- lollms_client/lollms_tti_binding.py +5 -29
- lollms_client/lollms_ttm_binding.py +5 -28
- lollms_client/lollms_tts_binding.py +4 -28
- lollms_client/lollms_ttv_binding.py +4 -28
- lollms_client/lollms_utilities.py +5 -3
- lollms_client/stt_bindings/lollms/__init__.py +5 -4
- lollms_client/stt_bindings/whisper/__init__.py +304 -0
- lollms_client/stt_bindings/whispercpp/__init__.py +380 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -6
- lollms_client/ttm_bindings/audiocraft/__init__.py +281 -0
- lollms_client/ttm_bindings/bark/__init__.py +339 -0
- lollms_client/tts_bindings/bark/__init__.py +336 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +343 -0
- lollms_client/tts_bindings/xtts/__init__.py +317 -0
- lollms_client-0.17.0.dist-info/METADATA +183 -0
- lollms_client-0.17.0.dist-info/RECORD +65 -0
- lollms_client-0.15.2.dist-info/METADATA +0 -192
- lollms_client-0.15.2.dist-info/RECORD +0 -56
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/top_level.txt +0 -0
|
@@ -46,43 +46,50 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
46
46
|
self.personality = personality
|
|
47
47
|
self.model = None
|
|
48
48
|
|
|
49
|
-
def generate_text(self,
|
|
49
|
+
def generate_text(self,
|
|
50
50
|
prompt: str,
|
|
51
51
|
images: Optional[List[str]] = None,
|
|
52
52
|
system_prompt: str = "",
|
|
53
53
|
n_predict: Optional[int] = None,
|
|
54
|
-
stream: bool =
|
|
55
|
-
temperature: float =
|
|
56
|
-
top_k: int =
|
|
57
|
-
top_p: float =
|
|
58
|
-
repeat_penalty: float =
|
|
59
|
-
repeat_last_n: int =
|
|
54
|
+
stream: Optional[bool] = None,
|
|
55
|
+
temperature: Optional[float] = None,
|
|
56
|
+
top_k: Optional[int] = None,
|
|
57
|
+
top_p: Optional[float] = None,
|
|
58
|
+
repeat_penalty: Optional[float] = None,
|
|
59
|
+
repeat_last_n: Optional[int] = None,
|
|
60
60
|
seed: Optional[int] = None,
|
|
61
|
-
n_threads: int =
|
|
61
|
+
n_threads: Optional[int] = None,
|
|
62
62
|
ctx_size: int | None = None,
|
|
63
|
-
streaming_callback: Optional[Callable[[str,
|
|
63
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
64
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
65
|
+
user_keyword:Optional[str]="!@>user:",
|
|
66
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
67
|
+
) -> Union[str, dict]:
|
|
64
68
|
"""
|
|
65
|
-
Generate text using the
|
|
69
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
66
70
|
|
|
67
71
|
Args:
|
|
68
72
|
prompt (str): The input prompt for text generation.
|
|
69
73
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
81
|
-
- First parameter (str): The chunk of text received
|
|
82
|
-
- Second parameter (str): The message type (
|
|
74
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
75
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
76
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
77
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
78
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
79
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
80
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
81
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
82
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
83
|
+
ctx_size (int | None): Context size override for this generation.
|
|
84
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
85
|
+
- First parameter (str): The chunk of text received.
|
|
86
|
+
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
87
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
88
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
89
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
83
90
|
|
|
84
91
|
Returns:
|
|
85
|
-
Union[str, dict]: Generated text
|
|
92
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
86
93
|
"""
|
|
87
94
|
# Determine endpoint based on presence of images
|
|
88
95
|
endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
|
|
@@ -109,47 +109,53 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
109
109
|
self.ollama_client = None # Ensure it's None if initialization fails
|
|
110
110
|
# Optionally re-raise or handle so the binding is clearly unusable
|
|
111
111
|
raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
|
|
112
|
-
|
|
113
|
-
def generate_text(self,
|
|
112
|
+
|
|
113
|
+
def generate_text(self,
|
|
114
114
|
prompt: str,
|
|
115
|
-
images: Optional[List[str]] = None,
|
|
115
|
+
images: Optional[List[str]] = None,
|
|
116
116
|
system_prompt: str = "",
|
|
117
117
|
n_predict: Optional[int] = None,
|
|
118
|
-
stream: bool =
|
|
118
|
+
stream: Optional[bool] = None,
|
|
119
119
|
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
120
120
|
top_k: int = 40, # Ollama default is 40
|
|
121
121
|
top_p: float = 0.9, # Ollama default is 0.9
|
|
122
122
|
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
123
123
|
repeat_last_n: int = 64, # Ollama default is 64
|
|
124
124
|
seed: Optional[int] = None,
|
|
125
|
-
n_threads: Optional[int] = None,
|
|
126
|
-
ctx_size:
|
|
127
|
-
streaming_callback: Optional[Callable[[str,
|
|
128
|
-
|
|
125
|
+
n_threads: Optional[int] = None,
|
|
126
|
+
ctx_size: int | None = None,
|
|
127
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
128
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
129
|
+
user_keyword:Optional[str]="!@>user:",
|
|
130
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
131
|
+
) -> Union[str, dict]:
|
|
129
132
|
"""
|
|
130
|
-
Generate text using the
|
|
133
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
131
134
|
|
|
132
135
|
Args:
|
|
133
136
|
prompt (str): The input prompt for text generation.
|
|
134
137
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
135
|
-
n_predict (Optional[int]): Maximum number of tokens to generate
|
|
136
|
-
stream (bool): Whether to stream the output.
|
|
137
|
-
temperature (float): Sampling temperature.
|
|
138
|
-
top_k (int): Top-k sampling parameter.
|
|
139
|
-
top_p (float): Top-p sampling parameter.
|
|
140
|
-
repeat_penalty (float): Penalty for repeated tokens.
|
|
141
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
142
|
-
seed (Optional[int]): Random seed for generation.
|
|
143
|
-
n_threads (Optional[int]): Number of threads to use
|
|
144
|
-
ctx_size (
|
|
145
|
-
streaming_callback (Optional[Callable[[str,
|
|
146
|
-
- First parameter (str): The chunk of text received
|
|
147
|
-
- Second parameter (
|
|
148
|
-
|
|
138
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
139
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
140
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
141
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
142
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
143
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
144
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
145
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
146
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
147
|
+
ctx_size (int | None): Context size override for this generation.
|
|
148
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
149
|
+
- First parameter (str): The chunk of text received.
|
|
150
|
+
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
151
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
152
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
153
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
149
154
|
|
|
150
155
|
Returns:
|
|
151
|
-
Union[str,
|
|
156
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
152
157
|
"""
|
|
158
|
+
|
|
153
159
|
if not self.ollama_client:
|
|
154
160
|
return {"status": False, "error": "Ollama client not initialized."}
|
|
155
161
|
|
|
@@ -175,8 +181,15 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
175
181
|
# If images were base64 strings, they would need decoding to bytes first.
|
|
176
182
|
processed_images.append(img_path)
|
|
177
183
|
|
|
178
|
-
messages = [
|
|
179
|
-
|
|
184
|
+
messages = [
|
|
185
|
+
{'role': 'system', 'content':system_prompt},
|
|
186
|
+
]
|
|
187
|
+
if split:
|
|
188
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
189
|
+
if processed_images:
|
|
190
|
+
messages[-1]["images"]=processed_images
|
|
191
|
+
else:
|
|
192
|
+
messages.append({'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None})
|
|
180
193
|
if stream:
|
|
181
194
|
response_stream = self.ollama_client.chat(
|
|
182
195
|
model=self.model_name,
|
|
@@ -201,7 +214,14 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
201
214
|
)
|
|
202
215
|
return response_dict.get('message', {}).get('content', '')
|
|
203
216
|
else: # Text-only
|
|
204
|
-
messages = [
|
|
217
|
+
messages = [
|
|
218
|
+
{'role': 'system', 'content':system_prompt},
|
|
219
|
+
]
|
|
220
|
+
if split:
|
|
221
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
222
|
+
else:
|
|
223
|
+
messages.append({'role': 'user', 'content': prompt})
|
|
224
|
+
|
|
205
225
|
if stream:
|
|
206
226
|
response_stream = self.ollama_client.chat(
|
|
207
227
|
model=self.model_name,
|
|
@@ -55,42 +55,50 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
55
55
|
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def generate_text(self,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
58
|
+
def generate_text(self,
|
|
59
|
+
prompt: str,
|
|
60
|
+
images: Optional[List[str]] = None,
|
|
61
|
+
system_prompt: str = "",
|
|
62
|
+
n_predict: Optional[int] = None,
|
|
63
|
+
stream: Optional[bool] = None,
|
|
64
|
+
temperature: float = 0.7,
|
|
65
|
+
top_k: int = 40,
|
|
66
|
+
top_p: float = 0.9,
|
|
67
|
+
repeat_penalty: float = 1.1,
|
|
68
|
+
repeat_last_n: int = 64,
|
|
69
|
+
seed: Optional[int] = None,
|
|
70
|
+
n_threads: Optional[int] = None,
|
|
71
|
+
ctx_size: int | None = None,
|
|
72
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
73
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
74
|
+
user_keyword:Optional[str]="!@>user:",
|
|
75
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
76
|
+
) -> Union[str, dict]:
|
|
73
77
|
"""
|
|
74
|
-
Generate text
|
|
78
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
75
79
|
|
|
76
80
|
Args:
|
|
77
81
|
prompt (str): The input prompt for text generation.
|
|
78
82
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
79
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
80
|
-
stream (bool): Whether to stream the output.
|
|
81
|
-
temperature (float): Sampling temperature.
|
|
82
|
-
top_k (int): Top-k sampling parameter.
|
|
83
|
-
top_p (float): Top-p sampling parameter.
|
|
84
|
-
repeat_penalty (float): Penalty for repeated tokens.
|
|
85
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
86
|
-
seed (Optional[int]): Random seed for generation.
|
|
87
|
-
n_threads (int): Number of threads to use.
|
|
83
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
84
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
85
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
86
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
87
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
88
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
89
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
90
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
91
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
92
|
+
ctx_size (int | None): Context size override for this generation.
|
|
88
93
|
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
89
94
|
- First parameter (str): The chunk of text received.
|
|
90
95
|
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
96
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
97
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
98
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
91
99
|
|
|
92
100
|
Returns:
|
|
93
|
-
str: Generated text or error dictionary if failed.
|
|
101
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
94
102
|
"""
|
|
95
103
|
count = 0
|
|
96
104
|
output = ""
|
|
@@ -101,16 +109,17 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
101
109
|
{
|
|
102
110
|
"role": "system",
|
|
103
111
|
"content": system_prompt,
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
if split:
|
|
115
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
116
|
+
if images:
|
|
117
|
+
messages[-1]["content"] = [
|
|
109
118
|
{
|
|
110
119
|
"type": "text",
|
|
111
|
-
"text":
|
|
120
|
+
"text": messages[-1]["content"]
|
|
112
121
|
}
|
|
113
|
-
]
|
|
122
|
+
]+[
|
|
114
123
|
{
|
|
115
124
|
"type": "image_url",
|
|
116
125
|
"image_url": {
|
|
@@ -119,8 +128,26 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
119
128
|
}
|
|
120
129
|
for image_path in images
|
|
121
130
|
]
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
else:
|
|
132
|
+
messages.append({
|
|
133
|
+
'role': 'user',
|
|
134
|
+
'content': [
|
|
135
|
+
{
|
|
136
|
+
"type": "text",
|
|
137
|
+
"text": prompt
|
|
138
|
+
}
|
|
139
|
+
] + [
|
|
140
|
+
{
|
|
141
|
+
"type": "image_url",
|
|
142
|
+
"image_url": {
|
|
143
|
+
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
for image_path in images
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
|
|
124
151
|
else:
|
|
125
152
|
messages = [{"role": "user", "content": prompt}]
|
|
126
153
|
|
|
@@ -154,7 +154,10 @@ class OpenLLMBinding(LollmsLLMBinding):
|
|
|
154
154
|
seed: Optional[int] = None,
|
|
155
155
|
# n_threads: Optional[int] = None, # Server-side config for OpenLLM
|
|
156
156
|
# ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
|
|
157
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
157
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
158
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
159
|
+
user_keyword:Optional[str]="!@>user:",
|
|
160
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
158
161
|
) -> Union[str, Dict[str, any]]:
|
|
159
162
|
|
|
160
163
|
if not self.openllm_client:
|
|
@@ -216,6 +216,9 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
216
216
|
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
217
217
|
use_chat_format: bool = True,
|
|
218
218
|
grammar: Optional[Union[str, LlamaGrammar]] = None,
|
|
219
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
220
|
+
user_keyword:Optional[str]="!@>user:",
|
|
221
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
219
222
|
**generation_kwargs
|
|
220
223
|
) -> Union[str, Dict[str, any]]:
|
|
221
224
|
|
|
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
|
|
|
341
341
|
repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
|
|
342
342
|
seed: Optional[int] = None,
|
|
343
343
|
n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
|
|
344
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
344
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
345
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
346
|
+
user_keyword:Optional[str]="!@>user:",
|
|
347
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
345
348
|
) -> Union[str, Dict[str, any]]:
|
|
346
349
|
if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
|
|
347
350
|
|
|
@@ -312,6 +312,9 @@ class HuggingFaceHubBinding(LollmsLLMBinding):
|
|
|
312
312
|
seed: Optional[int] = None,
|
|
313
313
|
stop_words: Optional[List[str]] = None, # Added custom stop_words
|
|
314
314
|
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
315
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
316
|
+
user_keyword:Optional[str]="!@>user:",
|
|
317
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
315
318
|
use_chat_format_override: Optional[bool] = None,
|
|
316
319
|
**generation_kwargs
|
|
317
320
|
) -> Union[str, Dict[str, Any]]:
|
|
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
|
|
|
341
341
|
repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
|
|
342
342
|
seed: Optional[int] = None,
|
|
343
343
|
n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
|
|
344
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
344
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
345
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
346
|
+
user_keyword:Optional[str]="!@>user:",
|
|
347
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
345
348
|
) -> Union[str, Dict[str, any]]:
|
|
346
349
|
if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
|
|
347
350
|
|
lollms_client/lollms_core.py
CHANGED
|
@@ -11,7 +11,7 @@ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingM
|
|
|
11
11
|
from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
|
|
12
12
|
from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
|
|
13
13
|
|
|
14
|
-
import
|
|
14
|
+
import re
|
|
15
15
|
from enum import Enum
|
|
16
16
|
import base64
|
|
17
17
|
import requests
|
|
@@ -48,6 +48,13 @@ class LollmsClient():
|
|
|
48
48
|
ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
|
|
49
49
|
ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
|
|
50
50
|
|
|
51
|
+
# Configurations
|
|
52
|
+
tts_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
53
|
+
tti_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
54
|
+
stt_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
55
|
+
ttv_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
56
|
+
ttm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
57
|
+
|
|
51
58
|
# General Parameters (mostly defaults for LLM generation)
|
|
52
59
|
service_key: Optional[str] = None, # Shared service key/client_id
|
|
53
60
|
verify_ssl_certificate: bool = True,
|
|
@@ -84,6 +91,11 @@ class LollmsClient():
|
|
|
84
91
|
stt_bindings_dir (Path): Directory for STT bindings.
|
|
85
92
|
ttv_bindings_dir (Path): Directory for TTV bindings.
|
|
86
93
|
ttm_bindings_dir (Path): Directory for TTM bindings.
|
|
94
|
+
tts_binding_config (Optional[Dict]): Additional config for the TTS binding.
|
|
95
|
+
tti_binding_config (Optional[Dict]): Additional config for the TTI binding.
|
|
96
|
+
stt_binding_config (Optional[Dict]): Additional config for the STT binding.
|
|
97
|
+
ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
|
|
98
|
+
ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
|
|
87
99
|
service_key (Optional[str]): Shared authentication key or client_id.
|
|
88
100
|
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
89
101
|
ctx_size (Optional[int]): Default context size for LLM.
|
|
@@ -144,54 +156,62 @@ class LollmsClient():
|
|
|
144
156
|
if tts_binding_name:
|
|
145
157
|
self.tts = self.tts_binding_manager.create_binding(
|
|
146
158
|
binding_name=tts_binding_name,
|
|
147
|
-
|
|
148
|
-
service_key=self.service_key,
|
|
149
|
-
verify_ssl_certificate=self.verify_ssl_certificate
|
|
159
|
+
**tts_binding_config
|
|
150
160
|
)
|
|
151
161
|
if self.tts is None:
|
|
152
162
|
ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
|
|
153
163
|
|
|
154
164
|
if tti_binding_name:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
165
|
+
if tti_binding_config:
|
|
166
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
167
|
+
binding_name=tti_binding_name,
|
|
168
|
+
**tti_binding_config
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
172
|
+
binding_name=tti_binding_name
|
|
173
|
+
)
|
|
161
174
|
if self.tti is None:
|
|
162
175
|
ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
|
|
163
176
|
|
|
164
177
|
if stt_binding_name:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
178
|
+
if stt_binding_config:
|
|
179
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
180
|
+
binding_name=stt_binding_name,
|
|
181
|
+
**stt_binding_config
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
185
|
+
binding_name=stt_binding_name,
|
|
186
|
+
)
|
|
171
187
|
if self.stt is None:
|
|
172
188
|
ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
|
|
173
|
-
|
|
174
189
|
if ttv_binding_name:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
190
|
+
if ttv_binding_config:
|
|
191
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
192
|
+
binding_name=ttv_binding_name,
|
|
193
|
+
**ttv_binding_config
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
197
|
+
binding_name=ttv_binding_name
|
|
198
|
+
)
|
|
181
199
|
if self.ttv is None:
|
|
182
200
|
ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
|
|
183
201
|
|
|
184
202
|
if ttm_binding_name:
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
203
|
+
if ttm_binding_config:
|
|
204
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
205
|
+
binding_name=ttm_binding_name,
|
|
206
|
+
**ttm_binding_config
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
210
|
+
binding_name=ttm_binding_name
|
|
211
|
+
)
|
|
191
212
|
if self.ttm is None:
|
|
192
213
|
ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
|
|
193
214
|
|
|
194
|
-
|
|
195
215
|
# --- Store Default Generation Parameters ---
|
|
196
216
|
self.default_ctx_size = ctx_size
|
|
197
217
|
self.default_n_predict = n_predict
|
|
@@ -342,7 +362,11 @@ class LollmsClient():
|
|
|
342
362
|
seed: Optional[int] = None,
|
|
343
363
|
n_threads: Optional[int] = None,
|
|
344
364
|
ctx_size: int | None = None,
|
|
345
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
365
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
366
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
367
|
+
user_keyword:Optional[str]="!@>user:",
|
|
368
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
369
|
+
) -> Union[str, dict]:
|
|
346
370
|
"""
|
|
347
371
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
348
372
|
|
|
@@ -360,6 +384,9 @@ class LollmsClient():
|
|
|
360
384
|
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
361
385
|
ctx_size (int | None): Context size override for this generation.
|
|
362
386
|
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
387
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
388
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
389
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
363
390
|
|
|
364
391
|
Returns:
|
|
365
392
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
@@ -379,7 +406,10 @@ class LollmsClient():
|
|
|
379
406
|
seed=seed if seed is not None else self.default_seed,
|
|
380
407
|
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
381
408
|
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
382
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
409
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
410
|
+
split= split,
|
|
411
|
+
user_keyword=user_keyword,
|
|
412
|
+
ai_keyword=ai_keyword
|
|
383
413
|
)
|
|
384
414
|
raise RuntimeError("LLM binding not initialized.")
|
|
385
415
|
|
|
@@ -961,7 +991,6 @@ Do not split the code in multiple tags.
|
|
|
961
991
|
Ranks answers for a question from best to worst using LLM JSON generation.
|
|
962
992
|
(Implementation requires self.generate_code which uses self.generate_text)
|
|
963
993
|
"""
|
|
964
|
-
# ... (Implementation as provided before, relies on self.generate_code) ...
|
|
965
994
|
if not callback:
|
|
966
995
|
callback = self.sink
|
|
967
996
|
|
|
@@ -1547,6 +1576,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
1547
1576
|
callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
1548
1577
|
return final_output
|
|
1549
1578
|
|
|
1579
|
+
|
|
1550
1580
|
def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
|
|
1551
1581
|
"""
|
|
1552
1582
|
Chunks text based on token count.
|
|
@@ -1626,3 +1656,5 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
|
|
|
1626
1656
|
break
|
|
1627
1657
|
|
|
1628
1658
|
return chunks
|
|
1659
|
+
|
|
1660
|
+
|