lollms-client 0.15.2__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (39) hide show
  1. examples/generate_and_speak/generate_and_speak.py +251 -0
  2. examples/generate_game_sfx/generate_game_fx.py +240 -0
  3. examples/simple_text_gen_with_image_test.py +8 -8
  4. examples/text_2_image.py +0 -1
  5. examples/text_gen.py +1 -1
  6. lollms_client/__init__.py +1 -1
  7. lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
  8. lollms_client/llm_bindings/lollms/__init__.py +31 -24
  9. lollms_client/llm_bindings/ollama/__init__.py +47 -27
  10. lollms_client/llm_bindings/openai/__init__.py +62 -35
  11. lollms_client/llm_bindings/openllm/__init__.py +4 -1
  12. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
  13. lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
  14. lollms_client/llm_bindings/transformers/__init__.py +3 -0
  15. lollms_client/llm_bindings/vllm/__init__.py +4 -1
  16. lollms_client/lollms_core.py +65 -33
  17. lollms_client/lollms_llm_binding.py +76 -22
  18. lollms_client/lollms_stt_binding.py +3 -15
  19. lollms_client/lollms_tti_binding.py +5 -29
  20. lollms_client/lollms_ttm_binding.py +5 -28
  21. lollms_client/lollms_tts_binding.py +4 -28
  22. lollms_client/lollms_ttv_binding.py +4 -28
  23. lollms_client/lollms_utilities.py +5 -3
  24. lollms_client/stt_bindings/lollms/__init__.py +5 -4
  25. lollms_client/stt_bindings/whisper/__init__.py +304 -0
  26. lollms_client/stt_bindings/whispercpp/__init__.py +380 -0
  27. lollms_client/tti_bindings/lollms/__init__.py +4 -6
  28. lollms_client/ttm_bindings/audiocraft/__init__.py +281 -0
  29. lollms_client/ttm_bindings/bark/__init__.py +339 -0
  30. lollms_client/tts_bindings/bark/__init__.py +336 -0
  31. lollms_client/tts_bindings/piper_tts/__init__.py +343 -0
  32. lollms_client/tts_bindings/xtts/__init__.py +317 -0
  33. lollms_client-0.17.0.dist-info/METADATA +183 -0
  34. lollms_client-0.17.0.dist-info/RECORD +65 -0
  35. lollms_client-0.15.2.dist-info/METADATA +0 -192
  36. lollms_client-0.15.2.dist-info/RECORD +0 -56
  37. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/WHEEL +0 -0
  38. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/licenses/LICENSE +0 -0
  39. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/top_level.txt +0 -0
@@ -46,43 +46,50 @@ class LollmsLLMBinding(LollmsLLMBinding):
46
46
  self.personality = personality
47
47
  self.model = None
48
48
 
49
- def generate_text(self,
49
+ def generate_text(self,
50
50
  prompt: str,
51
51
  images: Optional[List[str]] = None,
52
52
  system_prompt: str = "",
53
53
  n_predict: Optional[int] = None,
54
- stream: bool = False,
55
- temperature: float = 0.1,
56
- top_k: int = 50,
57
- top_p: float = 0.95,
58
- repeat_penalty: float = 0.8,
59
- repeat_last_n: int = 40,
54
+ stream: Optional[bool] = None,
55
+ temperature: Optional[float] = None,
56
+ top_k: Optional[int] = None,
57
+ top_p: Optional[float] = None,
58
+ repeat_penalty: Optional[float] = None,
59
+ repeat_last_n: Optional[int] = None,
60
60
  seed: Optional[int] = None,
61
- n_threads: int = 8,
61
+ n_threads: Optional[int] = None,
62
62
  ctx_size: int | None = None,
63
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
63
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
64
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
65
+ user_keyword:Optional[str]="!@>user:",
66
+ ai_keyword:Optional[str]="!@>assistant:",
67
+ ) -> Union[str, dict]:
64
68
  """
65
- Generate text using the LOLLMS service, with optional image support.
69
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
66
70
 
67
71
  Args:
68
72
  prompt (str): The input prompt for text generation.
69
73
  images (Optional[List[str]]): List of image file paths for multimodal generation.
70
- If provided, uses the /lollms_generate_with_images endpoint.
71
- n_predict (Optional[int]): Maximum number of tokens to generate.
72
- stream (bool): Whether to stream the output. Defaults to False.
73
- temperature (float): Sampling temperature. Defaults to 0.1.
74
- top_k (int): Top-k sampling parameter. Defaults to 50.
75
- top_p (float): Top-p sampling parameter. Defaults to 0.95.
76
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
77
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
78
- seed (Optional[int]): Random seed for generation.
79
- n_threads (int): Number of threads to use. Defaults to 8.
80
- streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
81
- - First parameter (str): The chunk of text received from the stream.
82
- - Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
74
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
75
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
76
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
77
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
78
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
79
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
80
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
81
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
82
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
83
+ ctx_size (int | None): Context size override for this generation.
84
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
85
+ - First parameter (str): The chunk of text received.
86
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
87
+ split:Optional[bool]: put to true if the prompt is a discussion
88
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
89
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
83
90
 
84
91
  Returns:
85
- Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
92
+ Union[str, dict]: Generated text or error dictionary if failed.
86
93
  """
87
94
  # Determine endpoint based on presence of images
88
95
  endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
@@ -109,47 +109,53 @@ class OllamaBinding(LollmsLLMBinding):
109
109
  self.ollama_client = None # Ensure it's None if initialization fails
110
110
  # Optionally re-raise or handle so the binding is clearly unusable
111
111
  raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
112
-
113
- def generate_text(self,
112
+
113
+ def generate_text(self,
114
114
  prompt: str,
115
- images: Optional[List[str]] = None, # List of image file paths
115
+ images: Optional[List[str]] = None,
116
116
  system_prompt: str = "",
117
117
  n_predict: Optional[int] = None,
118
- stream: bool = False,
118
+ stream: Optional[bool] = None,
119
119
  temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
120
120
  top_k: int = 40, # Ollama default is 40
121
121
  top_p: float = 0.9, # Ollama default is 0.9
122
122
  repeat_penalty: float = 1.1, # Ollama default is 1.1
123
123
  repeat_last_n: int = 64, # Ollama default is 64
124
124
  seed: Optional[int] = None,
125
- n_threads: Optional[int] = None, # Ollama calls this num_thread
126
- ctx_size: Optional[int] = None, # Ollama calls this num_ctx
127
- streaming_callback: Optional[Callable[[str, int], bool]] = None
128
- ) -> Union[str, Dict[str, any]]:
125
+ n_threads: Optional[int] = None,
126
+ ctx_size: int | None = None,
127
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
128
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
129
+ user_keyword:Optional[str]="!@>user:",
130
+ ai_keyword:Optional[str]="!@>assistant:",
131
+ ) -> Union[str, dict]:
129
132
  """
130
- Generate text using the Ollama service, with optional image support.
133
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
131
134
 
132
135
  Args:
133
136
  prompt (str): The input prompt for text generation.
134
137
  images (Optional[List[str]]): List of image file paths for multimodal generation.
135
- n_predict (Optional[int]): Maximum number of tokens to generate (num_predict).
136
- stream (bool): Whether to stream the output. Defaults to False.
137
- temperature (float): Sampling temperature.
138
- top_k (int): Top-k sampling parameter.
139
- top_p (float): Top-p sampling parameter.
140
- repeat_penalty (float): Penalty for repeated tokens.
141
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
142
- seed (Optional[int]): Random seed for generation.
143
- n_threads (Optional[int]): Number of threads to use (num_thread).
144
- ctx_size (Optional[int]): Context window size (num_ctx).
145
- streaming_callback (Optional[Callable[[str, int], bool]]): Callback for streaming output.
146
- - First parameter (str): The chunk of text received from the stream.
147
- - Second parameter (int): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
148
- Return False to stop streaming.
138
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
139
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
140
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
141
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
142
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
143
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
144
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
145
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
146
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
147
+ ctx_size (int | None): Context size override for this generation.
148
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
149
+ - First parameter (str): The chunk of text received.
150
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
151
+ split:Optional[bool]: put to true if the prompt is a discussion
152
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
153
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
149
154
 
150
155
  Returns:
151
- Union[str, Dict[str, any]]: Generated text if successful, or a dictionary with status and error if failed.
156
+ Union[str, dict]: Generated text or error dictionary if failed.
152
157
  """
158
+
153
159
  if not self.ollama_client:
154
160
  return {"status": False, "error": "Ollama client not initialized."}
155
161
 
@@ -175,8 +181,15 @@ class OllamaBinding(LollmsLLMBinding):
175
181
  # If images were base64 strings, they would need decoding to bytes first.
176
182
  processed_images.append(img_path)
177
183
 
178
- messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
179
-
184
+ messages = [
185
+ {'role': 'system', 'content':system_prompt},
186
+ ]
187
+ if split:
188
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
189
+ if processed_images:
190
+ messages[-1]["images"]=processed_images
191
+ else:
192
+ messages.append({'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None})
180
193
  if stream:
181
194
  response_stream = self.ollama_client.chat(
182
195
  model=self.model_name,
@@ -201,7 +214,14 @@ class OllamaBinding(LollmsLLMBinding):
201
214
  )
202
215
  return response_dict.get('message', {}).get('content', '')
203
216
  else: # Text-only
204
- messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt}]
217
+ messages = [
218
+ {'role': 'system', 'content':system_prompt},
219
+ ]
220
+ if split:
221
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
222
+ else:
223
+ messages.append({'role': 'user', 'content': prompt})
224
+
205
225
  if stream:
206
226
  response_stream = self.ollama_client.chat(
207
227
  model=self.model_name,
@@ -55,42 +55,50 @@ class OpenAIBinding(LollmsLLMBinding):
55
55
  self.completion_format = ELF_COMPLETION_FORMAT.Chat
56
56
 
57
57
 
58
- def generate_text(self,
59
- prompt: str,
60
- images: Optional[List[str]] = None,
61
- system_prompt: str = "",
62
- n_predict: Optional[int] = None,
63
- stream: bool = False,
64
- temperature: float = 0.1,
65
- top_k: int = 50,
66
- top_p: float = 0.95,
67
- repeat_penalty: float = 0.8,
68
- repeat_last_n: int = 40,
69
- seed: Optional[int] = None,
70
- n_threads: int = 8,
71
- ctx_size: int | None = None,
72
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
58
+ def generate_text(self,
59
+ prompt: str,
60
+ images: Optional[List[str]] = None,
61
+ system_prompt: str = "",
62
+ n_predict: Optional[int] = None,
63
+ stream: Optional[bool] = None,
64
+ temperature: float = 0.7,
65
+ top_k: int = 40,
66
+ top_p: float = 0.9,
67
+ repeat_penalty: float = 1.1,
68
+ repeat_last_n: int = 64,
69
+ seed: Optional[int] = None,
70
+ n_threads: Optional[int] = None,
71
+ ctx_size: int | None = None,
72
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
73
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
74
+ user_keyword:Optional[str]="!@>user:",
75
+ ai_keyword:Optional[str]="!@>assistant:",
76
+ ) -> Union[str, dict]:
73
77
  """
74
- Generate text based on the provided prompt and parameters.
78
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
75
79
 
76
80
  Args:
77
81
  prompt (str): The input prompt for text generation.
78
82
  images (Optional[List[str]]): List of image file paths for multimodal generation.
79
- n_predict (Optional[int]): Maximum number of tokens to generate.
80
- stream (bool): Whether to stream the output. Defaults to False.
81
- temperature (float): Sampling temperature. Defaults to 0.1.
82
- top_k (int): Top-k sampling parameter. Defaults to 50.
83
- top_p (float): Top-p sampling parameter. Defaults to 0.95.
84
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
85
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
86
- seed (Optional[int]): Random seed for generation.
87
- n_threads (int): Number of threads to use. Defaults to 8.
83
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
84
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
85
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
86
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
87
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
88
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
89
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
90
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
91
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
92
+ ctx_size (int | None): Context size override for this generation.
88
93
  streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
89
94
  - First parameter (str): The chunk of text received.
90
95
  - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
96
+ split:Optional[bool]: put to true if the prompt is a discussion
97
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
98
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
91
99
 
92
100
  Returns:
93
- str: Generated text or error dictionary if failed.
101
+ Union[str, dict]: Generated text or error dictionary if failed.
94
102
  """
95
103
  count = 0
96
104
  output = ""
@@ -101,16 +109,17 @@ class OpenAIBinding(LollmsLLMBinding):
101
109
  {
102
110
  "role": "system",
103
111
  "content": system_prompt,
104
- },
105
-
106
- {
107
- "role": "user",
108
- "content": [
112
+ }
113
+ ]
114
+ if split:
115
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
116
+ if images:
117
+ messages[-1]["content"] = [
109
118
  {
110
119
  "type": "text",
111
- "text": prompt
120
+ "text": messages[-1]["content"]
112
121
  }
113
- ] + [
122
+ ]+[
114
123
  {
115
124
  "type": "image_url",
116
125
  "image_url": {
@@ -119,8 +128,26 @@ class OpenAIBinding(LollmsLLMBinding):
119
128
  }
120
129
  for image_path in images
121
130
  ]
122
- }
123
- ]
131
+ else:
132
+ messages.append({
133
+ 'role': 'user',
134
+ 'content': [
135
+ {
136
+ "type": "text",
137
+ "text": prompt
138
+ }
139
+ ] + [
140
+ {
141
+ "type": "image_url",
142
+ "image_url": {
143
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
144
+ }
145
+ }
146
+ for image_path in images
147
+ ]
148
+ }
149
+ )
150
+
124
151
  else:
125
152
  messages = [{"role": "user", "content": prompt}]
126
153
 
@@ -154,7 +154,10 @@ class OpenLLMBinding(LollmsLLMBinding):
154
154
  seed: Optional[int] = None,
155
155
  # n_threads: Optional[int] = None, # Server-side config for OpenLLM
156
156
  # ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
157
- streaming_callback: Optional[Callable[[str, int], bool]] = None
157
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
158
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
159
+ user_keyword:Optional[str]="!@>user:",
160
+ ai_keyword:Optional[str]="!@>assistant:",
158
161
  ) -> Union[str, Dict[str, any]]:
159
162
 
160
163
  if not self.openllm_client:
@@ -216,6 +216,9 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
216
216
  streaming_callback: Optional[Callable[[str, int], bool]] = None,
217
217
  use_chat_format: bool = True,
218
218
  grammar: Optional[Union[str, LlamaGrammar]] = None,
219
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
220
+ user_keyword:Optional[str]="!@>user:",
221
+ ai_keyword:Optional[str]="!@>assistant:",
219
222
  **generation_kwargs
220
223
  ) -> Union[str, Dict[str, any]]:
221
224
 
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
341
341
  repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
342
342
  seed: Optional[int] = None,
343
343
  n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
344
- streaming_callback: Optional[Callable[[str, int], bool]] = None
344
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
345
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
346
+ user_keyword:Optional[str]="!@>user:",
347
+ ai_keyword:Optional[str]="!@>assistant:",
345
348
  ) -> Union[str, Dict[str, any]]:
346
349
  if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
347
350
 
@@ -312,6 +312,9 @@ class HuggingFaceHubBinding(LollmsLLMBinding):
312
312
  seed: Optional[int] = None,
313
313
  stop_words: Optional[List[str]] = None, # Added custom stop_words
314
314
  streaming_callback: Optional[Callable[[str, int], bool]] = None,
315
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
316
+ user_keyword:Optional[str]="!@>user:",
317
+ ai_keyword:Optional[str]="!@>assistant:",
315
318
  use_chat_format_override: Optional[bool] = None,
316
319
  **generation_kwargs
317
320
  ) -> Union[str, Dict[str, Any]]:
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
341
341
  repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
342
342
  seed: Optional[int] = None,
343
343
  n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
344
- streaming_callback: Optional[Callable[[str, int], bool]] = None
344
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
345
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
346
+ user_keyword:Optional[str]="!@>user:",
347
+ ai_keyword:Optional[str]="!@>assistant:",
345
348
  ) -> Union[str, Dict[str, any]]:
346
349
  if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
347
350
 
@@ -11,7 +11,7 @@ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingM
11
11
  from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
12
12
  from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
13
13
 
14
- import json
14
+ import re
15
15
  from enum import Enum
16
16
  import base64
17
17
  import requests
@@ -48,6 +48,13 @@ class LollmsClient():
48
48
  ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
49
49
  ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
50
50
 
51
+ # Configurations
52
+ tts_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
53
+ tti_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
54
+ stt_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
55
+ ttv_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
56
+ ttm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
57
+
51
58
  # General Parameters (mostly defaults for LLM generation)
52
59
  service_key: Optional[str] = None, # Shared service key/client_id
53
60
  verify_ssl_certificate: bool = True,
@@ -84,6 +91,11 @@ class LollmsClient():
84
91
  stt_bindings_dir (Path): Directory for STT bindings.
85
92
  ttv_bindings_dir (Path): Directory for TTV bindings.
86
93
  ttm_bindings_dir (Path): Directory for TTM bindings.
94
+ tts_binding_config (Optional[Dict]): Additional config for the TTS binding.
95
+ tti_binding_config (Optional[Dict]): Additional config for the TTI binding.
96
+ stt_binding_config (Optional[Dict]): Additional config for the STT binding.
97
+ ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
98
+ ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
87
99
  service_key (Optional[str]): Shared authentication key or client_id.
88
100
  verify_ssl_certificate (bool): Whether to verify SSL certificates.
89
101
  ctx_size (Optional[int]): Default context size for LLM.
@@ -144,54 +156,62 @@ class LollmsClient():
144
156
  if tts_binding_name:
145
157
  self.tts = self.tts_binding_manager.create_binding(
146
158
  binding_name=tts_binding_name,
147
- host_address=effective_host_address,
148
- service_key=self.service_key,
149
- verify_ssl_certificate=self.verify_ssl_certificate
159
+ **tts_binding_config
150
160
  )
151
161
  if self.tts is None:
152
162
  ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
153
163
 
154
164
  if tti_binding_name:
155
- self.tti = self.tti_binding_manager.create_binding(
156
- binding_name=tti_binding_name,
157
- host_address=effective_host_address,
158
- service_key=self.service_key, # Passed as service_key, used as client_id by lollms TTI binding
159
- verify_ssl_certificate=self.verify_ssl_certificate
160
- )
165
+ if tti_binding_config:
166
+ self.tti = self.tti_binding_manager.create_binding(
167
+ binding_name=tti_binding_name,
168
+ **tti_binding_config
169
+ )
170
+ else:
171
+ self.tti = self.tti_binding_manager.create_binding(
172
+ binding_name=tti_binding_name
173
+ )
161
174
  if self.tti is None:
162
175
  ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
163
176
 
164
177
  if stt_binding_name:
165
- self.stt = self.stt_binding_manager.create_binding(
166
- binding_name=stt_binding_name,
167
- host_address=effective_host_address,
168
- service_key=self.service_key,
169
- verify_ssl_certificate=self.verify_ssl_certificate
170
- )
178
+ if stt_binding_config:
179
+ self.stt = self.stt_binding_manager.create_binding(
180
+ binding_name=stt_binding_name,
181
+ **stt_binding_config
182
+ )
183
+ else:
184
+ self.stt = self.stt_binding_manager.create_binding(
185
+ binding_name=stt_binding_name,
186
+ )
171
187
  if self.stt is None:
172
188
  ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
173
-
174
189
  if ttv_binding_name:
175
- self.ttv = self.ttv_binding_manager.create_binding(
176
- binding_name=ttv_binding_name,
177
- host_address=effective_host_address,
178
- service_key=self.service_key,
179
- verify_ssl_certificate=self.verify_ssl_certificate
180
- )
190
+ if ttv_binding_config:
191
+ self.ttv = self.ttv_binding_manager.create_binding(
192
+ binding_name=ttv_binding_name,
193
+ **ttv_binding_config
194
+ )
195
+ else:
196
+ self.ttv = self.ttv_binding_manager.create_binding(
197
+ binding_name=ttv_binding_name
198
+ )
181
199
  if self.ttv is None:
182
200
  ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
183
201
 
184
202
  if ttm_binding_name:
185
- self.ttm = self.ttm_binding_manager.create_binding(
186
- binding_name=ttm_binding_name,
187
- host_address=effective_host_address,
188
- service_key=self.service_key,
189
- verify_ssl_certificate=self.verify_ssl_certificate
190
- )
203
+ if ttm_binding_config:
204
+ self.ttm = self.ttm_binding_manager.create_binding(
205
+ binding_name=ttm_binding_name,
206
+ **ttm_binding_config
207
+ )
208
+ else:
209
+ self.ttm = self.ttm_binding_manager.create_binding(
210
+ binding_name=ttm_binding_name
211
+ )
191
212
  if self.ttm is None:
192
213
  ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
193
214
 
194
-
195
215
  # --- Store Default Generation Parameters ---
196
216
  self.default_ctx_size = ctx_size
197
217
  self.default_n_predict = n_predict
@@ -342,7 +362,11 @@ class LollmsClient():
342
362
  seed: Optional[int] = None,
343
363
  n_threads: Optional[int] = None,
344
364
  ctx_size: int | None = None,
345
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
365
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
366
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
367
+ user_keyword:Optional[str]="!@>user:",
368
+ ai_keyword:Optional[str]="!@>assistant:",
369
+ ) -> Union[str, dict]:
346
370
  """
347
371
  Generate text using the active LLM binding, using instance defaults if parameters are not provided.
348
372
 
@@ -360,6 +384,9 @@ class LollmsClient():
360
384
  n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
361
385
  ctx_size (int | None): Context size override for this generation.
362
386
  streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
387
+ split:Optional[bool]: put to true if the prompt is a discussion
388
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
389
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
363
390
 
364
391
  Returns:
365
392
  Union[str, dict]: Generated text or error dictionary if failed.
@@ -379,7 +406,10 @@ class LollmsClient():
379
406
  seed=seed if seed is not None else self.default_seed,
380
407
  n_threads=n_threads if n_threads is not None else self.default_n_threads,
381
408
  ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
382
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
409
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
410
+ split= split,
411
+ user_keyword=user_keyword,
412
+ ai_keyword=ai_keyword
383
413
  )
384
414
  raise RuntimeError("LLM binding not initialized.")
385
415
 
@@ -961,7 +991,6 @@ Do not split the code in multiple tags.
961
991
  Ranks answers for a question from best to worst using LLM JSON generation.
962
992
  (Implementation requires self.generate_code which uses self.generate_text)
963
993
  """
964
- # ... (Implementation as provided before, relies on self.generate_code) ...
965
994
  if not callback:
966
995
  callback = self.sink
967
996
 
@@ -1547,6 +1576,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
1547
1576
  callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1548
1577
  return final_output
1549
1578
 
1579
+
1550
1580
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
1551
1581
  """
1552
1582
  Chunks text based on token count.
@@ -1626,3 +1656,5 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
1626
1656
  break
1627
1657
 
1628
1658
  return chunks
1659
+
1660
+