lollms-client 1.4.1__py3-none-any.whl → 1.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
- lollms_client/llm_bindings/claude/__init__.py +125 -34
- lollms_client/llm_bindings/gemini/__init__.py +261 -159
- lollms_client/llm_bindings/grok/__init__.py +52 -14
- lollms_client/llm_bindings/groq/__init__.py +2 -2
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
- lollms_client/llm_bindings/litellm/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
- lollms_client/llm_bindings/lollms/__init__.py +151 -32
- lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
- lollms_client/llm_bindings/mistral/__init__.py +2 -2
- lollms_client/llm_bindings/novita_ai/__init__.py +439 -0
- lollms_client/llm_bindings/ollama/__init__.py +309 -93
- lollms_client/llm_bindings/open_router/__init__.py +2 -2
- lollms_client/llm_bindings/openai/__init__.py +148 -29
- lollms_client/llm_bindings/openllm/__init__.py +362 -506
- lollms_client/llm_bindings/openwebui/__init__.py +465 -0
- lollms_client/llm_bindings/perplexity/__init__.py +326 -0
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
- lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
- lollms_client/llm_bindings/transformers/__init__.py +428 -632
- lollms_client/llm_bindings/vllm/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_base_binding.py +61 -0
- lollms_client/lollms_core.py +516 -1890
- lollms_client/lollms_discussion.py +55 -18
- lollms_client/lollms_llm_binding.py +112 -261
- lollms_client/lollms_mcp_binding.py +34 -75
- lollms_client/lollms_personality.py +5 -2
- lollms_client/lollms_stt_binding.py +85 -52
- lollms_client/lollms_tti_binding.py +23 -37
- lollms_client/lollms_ttm_binding.py +24 -42
- lollms_client/lollms_tts_binding.py +28 -17
- lollms_client/lollms_ttv_binding.py +24 -42
- lollms_client/lollms_types.py +4 -2
- lollms_client/stt_bindings/whisper/__init__.py +108 -23
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +418 -810
- lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
- lollms_client/tti_bindings/gemini/__init__.py +182 -239
- lollms_client/tti_bindings/leonardo_ai/__init__.py +127 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +105 -0
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +178 -0
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +129 -0
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +115 -0
- lollms_client/ttm_bindings/stability_ai/__init__.py +117 -0
- lollms_client/ttm_bindings/topmediai/__init__.py +96 -0
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/lollms/__init__.py +6 -1
- lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
- lollms_client/tts_bindings/xtts/__init__.py +157 -74
- lollms_client/tts_bindings/xtts/server/main.py +241 -280
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/METADATA +316 -6
- lollms_client-1.7.10.dist-info/RECORD +89 -0
- lollms_client/ttm_bindings/bark/__init__.py +0 -339
- lollms_client-1.4.1.dist-info/RECORD +0 -78
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.4.1.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,15 @@
|
|
|
1
1
|
# lollms_binding.py
|
|
2
|
-
from abc import
|
|
2
|
+
from abc import abstractmethod
|
|
3
3
|
import importlib
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Callable, List, Union
|
|
6
|
-
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
7
|
-
import importlib
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Optional, Dict, List
|
|
5
|
+
from typing import Optional, Callable, List, Union, Dict
|
|
10
6
|
from ascii_colors import trace_exception, ASCIIColors
|
|
11
7
|
from lollms_client.lollms_types import MSG_TYPE
|
|
12
8
|
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
13
9
|
from lollms_client.lollms_utilities import ImageTokenizer
|
|
10
|
+
from lollms_client.lollms_base_binding import LollmsBaseBinding
|
|
14
11
|
import re
|
|
15
12
|
import yaml
|
|
16
|
-
|
|
17
|
-
from pathlib import Path
|
|
18
13
|
import json
|
|
19
14
|
|
|
20
15
|
def load_known_contexts():
|
|
@@ -42,7 +37,7 @@ def load_known_contexts():
|
|
|
42
37
|
print(f"An unexpected error occurred: {e}")
|
|
43
38
|
return []
|
|
44
39
|
|
|
45
|
-
class LollmsLLMBinding(
|
|
40
|
+
class LollmsLLMBinding(LollmsBaseBinding):
|
|
46
41
|
"""Abstract base class for all LOLLMS LLM bindings"""
|
|
47
42
|
|
|
48
43
|
def __init__(self,
|
|
@@ -55,7 +50,7 @@ class LollmsLLMBinding(ABC):
|
|
|
55
50
|
Args:
|
|
56
51
|
binding_name (Optional[str]): The name of the bindingto be used
|
|
57
52
|
"""
|
|
58
|
-
|
|
53
|
+
super().__init__(binding_name=binding_name, **kwargs)
|
|
59
54
|
self.model_name = None #Must be set by the instance
|
|
60
55
|
self.default_ctx_size = kwargs.get("ctx_size")
|
|
61
56
|
self.default_n_predict = kwargs.get("n_predict")
|
|
@@ -72,151 +67,92 @@ class LollmsLLMBinding(ABC):
|
|
|
72
67
|
|
|
73
68
|
@abstractmethod
|
|
74
69
|
def generate_text(self,
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
70
|
+
prompt: str,
|
|
71
|
+
images: Optional[List[str]] = None,
|
|
72
|
+
system_prompt: str = "",
|
|
73
|
+
n_predict: Optional[int] = None,
|
|
74
|
+
stream: Optional[bool] = None,
|
|
75
|
+
temperature: Optional[float] = None,
|
|
76
|
+
top_k: Optional[int] = None,
|
|
77
|
+
top_p: Optional[float] = None,
|
|
78
|
+
repeat_penalty: Optional[float] = None,
|
|
79
|
+
repeat_last_n: Optional[int] = None,
|
|
80
|
+
seed: Optional[int] = None,
|
|
81
|
+
n_threads: Optional[int] = None,
|
|
82
|
+
ctx_size: int | None = None,
|
|
83
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
84
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
85
|
+
user_keyword:Optional[str]="!@>user:",
|
|
86
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
87
|
+
think: Optional[bool] = False,
|
|
88
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
89
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
90
|
+
**kwargs
|
|
91
|
+
) -> Union[str, dict]:
|
|
93
92
|
"""
|
|
94
93
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
prompt (str): The input prompt for text generation.
|
|
98
|
-
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
99
|
-
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
100
|
-
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
101
|
-
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
102
|
-
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
103
|
-
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
104
|
-
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
105
|
-
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
106
|
-
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
107
|
-
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
108
|
-
ctx_size (int | None): Context size override for this generation.
|
|
109
|
-
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
110
|
-
- First parameter (str): The chunk of text received.
|
|
111
|
-
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
112
|
-
split:Optional[bool]: put to true if the prompt is a discussion
|
|
113
|
-
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
114
|
-
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
Union[str, dict]: Generated text or error dictionary if failed.
|
|
118
94
|
"""
|
|
119
95
|
pass
|
|
120
96
|
|
|
121
97
|
def generate_from_messages(self,
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
98
|
+
messages: List[Dict],
|
|
99
|
+
n_predict: Optional[int] = None,
|
|
100
|
+
stream: Optional[bool] = None,
|
|
101
|
+
temperature: Optional[float] = None,
|
|
102
|
+
top_k: Optional[int] = None,
|
|
103
|
+
top_p: Optional[float] = None,
|
|
104
|
+
repeat_penalty: Optional[float] = None,
|
|
105
|
+
repeat_last_n: Optional[int] = None,
|
|
106
|
+
seed: Optional[int] = None,
|
|
107
|
+
n_threads: Optional[int] = None,
|
|
108
|
+
ctx_size: int | None = None,
|
|
109
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
110
|
+
think: Optional[bool] = False,
|
|
111
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
112
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
113
|
+
**kwargs
|
|
114
|
+
) -> Union[str, dict]:
|
|
136
115
|
"""
|
|
137
116
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
messages (List[Dict]): A openai compatible list of messages
|
|
141
|
-
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
142
|
-
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
143
|
-
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
144
|
-
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
145
|
-
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
146
|
-
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
147
|
-
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
148
|
-
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
149
|
-
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
150
|
-
ctx_size (int | None): Context size override for this generation.
|
|
151
|
-
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
Union[str, dict]: Generated text or error dictionary if failed.
|
|
155
117
|
"""
|
|
156
118
|
ASCIIColors.red("This binding does not support generate_from_messages")
|
|
157
119
|
|
|
158
120
|
|
|
159
121
|
@abstractmethod
|
|
160
122
|
def chat(self,
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
123
|
+
discussion: LollmsDiscussion,
|
|
124
|
+
branch_tip_id: Optional[str] = None,
|
|
125
|
+
n_predict: Optional[int] = None,
|
|
126
|
+
stream: Optional[bool] = None,
|
|
127
|
+
temperature: Optional[float] = None,
|
|
128
|
+
top_k: Optional[int] = None,
|
|
129
|
+
top_p: Optional[float] = None,
|
|
130
|
+
repeat_penalty: Optional[float] = None,
|
|
131
|
+
repeat_last_n: Optional[int] = None,
|
|
132
|
+
seed: Optional[int] = None,
|
|
133
|
+
n_threads: Optional[int] = None,
|
|
134
|
+
ctx_size: Optional[int] = None,
|
|
135
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
136
|
+
think: Optional[bool] = False,
|
|
137
|
+
reasoning_effort: Optional[bool] = "low", # low, medium, high
|
|
138
|
+
reasoning_summary: Optional[bool] = "auto", # auto
|
|
139
|
+
**kwargs
|
|
140
|
+
) -> Union[str, dict]:
|
|
175
141
|
"""
|
|
176
142
|
A method to conduct a chat session with the model using a LollmsDiscussion object.
|
|
177
|
-
This method is responsible for formatting the discussion into the specific
|
|
178
|
-
format required by the model's API and then calling the generation endpoint.
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
182
|
-
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
183
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
184
|
-
stream (Optional[bool]): Whether to stream the output.
|
|
185
|
-
temperature (Optional[float]): Sampling temperature.
|
|
186
|
-
top_k (Optional[int]): Top-k sampling parameter.
|
|
187
|
-
top_p (Optional[float]): Top-p sampling parameter.
|
|
188
|
-
repeat_penalty (Optional[float]): Penalty for repeated tokens.
|
|
189
|
-
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty.
|
|
190
|
-
seed (Optional[int]): Random seed for generation.
|
|
191
|
-
n_threads (Optional[int]): Number of threads to use.
|
|
192
|
-
ctx_size (Optional[int]): Context size override for this generation.
|
|
193
|
-
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Union[str, dict]: The generated text or an error dictionary.
|
|
197
143
|
"""
|
|
198
144
|
pass
|
|
199
145
|
|
|
200
|
-
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
146
|
+
def get_ctx_size(self, model_name: Optional[str|None] = None) -> Optional[int]:
|
|
201
147
|
"""
|
|
202
148
|
Retrieves context size for a model from a hardcoded list.
|
|
203
|
-
|
|
204
|
-
This method checks if the model name contains a known base model identifier
|
|
205
|
-
(e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
|
|
206
|
-
as a failsafe when the context size cannot be retrieved directly from the
|
|
207
|
-
Ollama API.
|
|
208
149
|
"""
|
|
209
150
|
if model_name is None:
|
|
210
151
|
model_name = self.model_name
|
|
211
152
|
|
|
212
|
-
# Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
|
|
213
|
-
# should appear, as they will be checked first due to the sorting logic below.
|
|
214
153
|
known_contexts = load_known_contexts()
|
|
215
154
|
|
|
216
155
|
normalized_model_name = model_name.lower().strip()
|
|
217
|
-
|
|
218
|
-
# Sort keys by length in descending order. This ensures that a more specific
|
|
219
|
-
# name like 'llama3.1' is checked before a less specific name like 'llama3'.
|
|
220
156
|
sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
|
|
221
157
|
|
|
222
158
|
for base_name in sorted_base_models:
|
|
@@ -236,12 +172,6 @@ class LollmsLLMBinding(ABC):
|
|
|
236
172
|
def tokenize(self, text: str) -> list:
|
|
237
173
|
"""
|
|
238
174
|
Tokenize the input text into a list of tokens.
|
|
239
|
-
|
|
240
|
-
Args:
|
|
241
|
-
text (str): The text to tokenize.
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
list: List of tokens.
|
|
245
175
|
"""
|
|
246
176
|
pass
|
|
247
177
|
|
|
@@ -249,12 +179,6 @@ class LollmsLLMBinding(ABC):
|
|
|
249
179
|
def detokenize(self, tokens: list) -> str:
|
|
250
180
|
"""
|
|
251
181
|
Convert a list of tokens back to text.
|
|
252
|
-
|
|
253
|
-
Args:
|
|
254
|
-
tokens (list): List of tokens to detokenize.
|
|
255
|
-
|
|
256
|
-
Returns:
|
|
257
|
-
str: Detokenized text.
|
|
258
182
|
"""
|
|
259
183
|
pass
|
|
260
184
|
|
|
@@ -262,27 +186,14 @@ class LollmsLLMBinding(ABC):
|
|
|
262
186
|
def count_tokens(self, text: str) -> int:
|
|
263
187
|
"""
|
|
264
188
|
Count tokens from a text.
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
tokens (list): List of tokens to detokenize.
|
|
268
|
-
|
|
269
|
-
Returns:
|
|
270
|
-
int: Number of tokens in text.
|
|
271
189
|
"""
|
|
272
190
|
pass
|
|
273
191
|
|
|
274
192
|
def count_image_tokens(self, image: str) -> int:
|
|
275
193
|
"""
|
|
276
194
|
Estimate the number of tokens for an image using ImageTokenizer based on self.model_name.
|
|
277
|
-
|
|
278
|
-
Args:
|
|
279
|
-
image (str): Image to count tokens from. Either base64 string, path to image file, or URL.
|
|
280
|
-
|
|
281
|
-
Returns:
|
|
282
|
-
int: Estimated number of tokens for the image. Returns -1 on error.
|
|
283
195
|
"""
|
|
284
196
|
try:
|
|
285
|
-
# Delegate token counting to ImageTokenizer
|
|
286
197
|
return ImageTokenizer(self.model_name).count_image_tokens(image)
|
|
287
198
|
except Exception as e:
|
|
288
199
|
ASCIIColors.warning(f"Could not estimate image tokens: {e}")
|
|
@@ -291,13 +202,6 @@ class LollmsLLMBinding(ABC):
|
|
|
291
202
|
def embed(self, text: str, **kwargs) -> list:
|
|
292
203
|
"""
|
|
293
204
|
Get embeddings for the input text using Ollama API
|
|
294
|
-
|
|
295
|
-
Args:
|
|
296
|
-
text (str or List[str]): Input text to embed
|
|
297
|
-
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
298
|
-
|
|
299
|
-
Returns:
|
|
300
|
-
dict: Response containing embeddings
|
|
301
205
|
"""
|
|
302
206
|
pass
|
|
303
207
|
|
|
@@ -305,28 +209,13 @@ class LollmsLLMBinding(ABC):
|
|
|
305
209
|
def get_model_info(self) -> dict:
|
|
306
210
|
"""
|
|
307
211
|
Return information about the current model.
|
|
308
|
-
|
|
309
|
-
Returns:
|
|
310
|
-
dict: Model information dictionary.
|
|
311
212
|
"""
|
|
312
213
|
pass
|
|
313
214
|
|
|
314
|
-
@abstractmethod
|
|
315
|
-
def listModels(self) -> list:
|
|
316
|
-
"""Lists models"""
|
|
317
|
-
pass
|
|
318
|
-
|
|
319
|
-
|
|
320
215
|
@abstractmethod
|
|
321
216
|
def load_model(self, model_name: str) -> bool:
|
|
322
217
|
"""
|
|
323
218
|
Load a specific model.
|
|
324
|
-
|
|
325
|
-
Args:
|
|
326
|
-
model_name (str): Name of the model to load.
|
|
327
|
-
|
|
328
|
-
Returns:
|
|
329
|
-
bool: True if model loaded successfully, False otherwise.
|
|
330
219
|
"""
|
|
331
220
|
pass
|
|
332
221
|
|
|
@@ -334,9 +223,7 @@ class LollmsLLMBinding(ABC):
|
|
|
334
223
|
def split_discussion(self, lollms_prompt_string: str, system_keyword="!@>system:", user_keyword="!@>user:", ai_keyword="!@>assistant:") -> list:
|
|
335
224
|
"""
|
|
336
225
|
Splits a LoLLMs prompt into a list of OpenAI-style messages.
|
|
337
|
-
If the very first chunk has no prefix, it's assigned to "system".
|
|
338
226
|
"""
|
|
339
|
-
# Regex to split on any of the three prefixes (lookahead)
|
|
340
227
|
pattern = r"(?={}|{}|{})".format(
|
|
341
228
|
re.escape(system_keyword),
|
|
342
229
|
re.escape(user_keyword),
|
|
@@ -350,7 +237,6 @@ class LollmsLLMBinding(ABC):
|
|
|
350
237
|
if not part:
|
|
351
238
|
continue
|
|
352
239
|
|
|
353
|
-
# Determine role and strip prefix if present
|
|
354
240
|
if part.startswith(system_keyword):
|
|
355
241
|
role = "system"
|
|
356
242
|
content = part[len(system_keyword):].strip()
|
|
@@ -361,33 +247,54 @@ class LollmsLLMBinding(ABC):
|
|
|
361
247
|
role = "assistant"
|
|
362
248
|
content = part[len(ai_keyword):].strip()
|
|
363
249
|
else:
|
|
364
|
-
# No prefix: if it's the first valid chunk, treat as system
|
|
365
250
|
if not messages:
|
|
366
251
|
role = "system"
|
|
367
252
|
content = part
|
|
368
253
|
else:
|
|
369
|
-
# otherwise skip unrecognized segments
|
|
370
254
|
continue
|
|
371
255
|
|
|
372
256
|
messages.append({"role": role, "content": content})
|
|
373
257
|
if messages[-1]["content"]=="":
|
|
374
258
|
del messages[-1]
|
|
375
259
|
return messages
|
|
260
|
+
|
|
376
261
|
def ps(self):
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
262
|
+
"""
|
|
263
|
+
List models (simulating a process status command).
|
|
264
|
+
Since Lollms/OpenAI API doesn't have a specific 'ps' endpoint for running models with memory stats,
|
|
265
|
+
we list available models and populate structure with available info, leaving hardware stats empty.
|
|
266
|
+
"""
|
|
267
|
+
# Since there is no dedicated ps endpoint to see *running* models in the standard OpenAI API,
|
|
268
|
+
# we list available models and try to map relevant info.
|
|
269
|
+
models = self.list_models()
|
|
270
|
+
standardized_models = []
|
|
271
|
+
for m in models:
|
|
272
|
+
standardized_models.append({
|
|
273
|
+
"model_name": m.get("model_name"),
|
|
274
|
+
"size": None,
|
|
275
|
+
"vram_size": None,
|
|
276
|
+
"gpu_usage_percent": None,
|
|
277
|
+
"cpu_usage_percent": None,
|
|
278
|
+
"expires_at": None,
|
|
279
|
+
"parameters_size": None,
|
|
280
|
+
"quantization_level": None,
|
|
281
|
+
"parent_model": None,
|
|
282
|
+
"context_size": m.get("context_length"),
|
|
283
|
+
"owned_by": m.get("owned_by"),
|
|
284
|
+
"created": m.get("created")
|
|
285
|
+
})
|
|
286
|
+
return standardized_models
|
|
287
|
+
|
|
288
|
+
def get_context_size(self) -> Optional[int]:
|
|
289
|
+
"""
|
|
290
|
+
Returns the default context size for the binding.
|
|
291
|
+
"""
|
|
292
|
+
return self.default_ctx_size
|
|
380
293
|
|
|
381
294
|
class LollmsLLMBindingManager:
|
|
382
295
|
"""Manages binding discovery and instantiation"""
|
|
383
296
|
|
|
384
297
|
def __init__(self, llm_bindings_dir: Union[str, Path] = Path(__file__).parent.parent / "llm_bindings"):
|
|
385
|
-
"""
|
|
386
|
-
Initialize the LollmsLLMBindingManager.
|
|
387
|
-
|
|
388
|
-
Args:
|
|
389
|
-
llm_bindings_dir (str): Directory containing binding implementations. Defaults to "llm_bindings".
|
|
390
|
-
"""
|
|
391
298
|
self.llm_bindings_dir = Path(llm_bindings_dir)
|
|
392
299
|
self.available_bindings = {}
|
|
393
300
|
|
|
@@ -408,13 +315,6 @@ class LollmsLLMBindingManager:
|
|
|
408
315
|
**kwargs) -> Optional[LollmsLLMBinding]:
|
|
409
316
|
"""
|
|
410
317
|
Create an instance of a specific binding.
|
|
411
|
-
|
|
412
|
-
Args:
|
|
413
|
-
binding_name (str): Name of the binding to create.
|
|
414
|
-
kwargs: binding specific arguments
|
|
415
|
-
|
|
416
|
-
Returns:
|
|
417
|
-
Optional[LollmsLLMBinding]: Binding instance or None if creation failed.
|
|
418
318
|
"""
|
|
419
319
|
if binding_name not in self.available_bindings:
|
|
420
320
|
self._load_binding(binding_name)
|
|
@@ -470,14 +370,7 @@ class LollmsLLMBindingManager:
|
|
|
470
370
|
@staticmethod
|
|
471
371
|
def get_bindings_list(llm_bindings_dir: Union[str, Path]) -> List[Dict]:
|
|
472
372
|
"""
|
|
473
|
-
Lists all available LLM bindings by scanning a directory
|
|
474
|
-
description.yaml file if present, or providing a default description.
|
|
475
|
-
|
|
476
|
-
Args:
|
|
477
|
-
llm_bindings_dir (Union[str, Path]): The path to the directory containing LLM binding folders.
|
|
478
|
-
|
|
479
|
-
Returns:
|
|
480
|
-
List[Dict]: A list of dictionaries, each describing a binding.
|
|
373
|
+
Lists all available LLM bindings by scanning a directory.
|
|
481
374
|
"""
|
|
482
375
|
bindings_dir = Path(llm_bindings_dir)
|
|
483
376
|
if not bindings_dir.is_dir():
|
|
@@ -508,70 +401,28 @@ class LollmsLLMBindingManager:
|
|
|
508
401
|
def get_available_bindings(self) -> List[Dict]:
|
|
509
402
|
"""
|
|
510
403
|
Retrieves a list of all available LLM bindings with their full descriptions.
|
|
511
|
-
|
|
512
|
-
This method scans the configured `llm_bindings_dir`, parsing the `description.yaml`
|
|
513
|
-
file for each valid binding. If a `description.yaml` is missing, a fallback
|
|
514
|
-
description with common parameters is generated. This is the primary method
|
|
515
|
-
for discovering available bindings and their configuration requirements.
|
|
516
|
-
|
|
517
|
-
Returns:
|
|
518
|
-
List[Dict]:
|
|
519
|
-
A list of dictionaries, where each dictionary represents the
|
|
520
|
-
full description of an available binding.
|
|
521
|
-
|
|
522
|
-
Each dictionary contains the following keys:
|
|
523
|
-
- ``binding_name`` (str): The programmatic name of the binding (its folder name).
|
|
524
|
-
- ``title`` (str): A user-friendly title for the binding.
|
|
525
|
-
- ``author`` (str): The creator of the binding.
|
|
526
|
-
- ``creation_date`` (str): The date the binding was created.
|
|
527
|
-
- ``last_update_date`` (str): The date of the last major update.
|
|
528
|
-
- ``description`` (str): A detailed explanation of the binding's purpose.
|
|
529
|
-
- ``input_parameters`` (List[Dict]): A list of parameters required to
|
|
530
|
-
configure the binding. Each parameter is a dictionary with:
|
|
531
|
-
- ``name`` (str): The parameter's name (e.g., 'model_name').
|
|
532
|
-
- ``type`` (str): The expected data type ('str', 'int', 'float', 'bool').
|
|
533
|
-
- ``description`` (str): A user-friendly description of the parameter.
|
|
534
|
-
- ``mandatory`` (bool): True if the parameter must be provided.
|
|
535
|
-
- ``default``: The default value for the parameter.
|
|
536
|
-
|
|
537
|
-
Example of a returned dictionary in the list:
|
|
538
|
-
.. code-block:: python
|
|
539
|
-
|
|
540
|
-
{
|
|
541
|
-
"binding_name": "ollama",
|
|
542
|
-
"title": "Ollama",
|
|
543
|
-
"author": "ParisNeo",
|
|
544
|
-
...
|
|
545
|
-
"input_parameters": [
|
|
546
|
-
{
|
|
547
|
-
"name": "host_address",
|
|
548
|
-
"type": "str",
|
|
549
|
-
"description": "The URL of the Ollama server.",
|
|
550
|
-
"mandatory": True,
|
|
551
|
-
"default": "http://localhost:11434"
|
|
552
|
-
},
|
|
553
|
-
...
|
|
554
|
-
]
|
|
555
|
-
}
|
|
556
404
|
"""
|
|
557
405
|
return LollmsLLMBindingManager.get_bindings_list(self.llm_bindings_dir)
|
|
558
406
|
|
|
559
407
|
def get_available_bindings(llm_bindings_dir: Union[str, Path] = None) -> List[Dict]:
|
|
560
408
|
"""
|
|
561
409
|
Lists all available LLM bindings with their detailed descriptions.
|
|
562
|
-
|
|
563
|
-
This function serves as a primary entry point for discovering what bindings
|
|
564
|
-
are available and how to configure them.
|
|
565
|
-
|
|
566
|
-
Args:
|
|
567
|
-
llm_bindings_dir (Union[str, Path], optional):
|
|
568
|
-
The path to the LLM bindings directory. If None, it defaults to the
|
|
569
|
-
'llm_bindings' subdirectory relative to this file.
|
|
570
|
-
Defaults to None.
|
|
571
|
-
|
|
572
|
-
Returns:
|
|
573
|
-
List[Dict]: A list of dictionaries, each describing a binding.
|
|
574
410
|
"""
|
|
575
411
|
if llm_bindings_dir is None:
|
|
576
412
|
llm_bindings_dir = Path(__file__).parent / "llm_bindings"
|
|
577
|
-
return LollmsLLMBindingManager.get_bindings_list(llm_bindings_dir)
|
|
413
|
+
return LollmsLLMBindingManager.get_bindings_list(llm_bindings_dir)
|
|
414
|
+
|
|
415
|
+
def list_binding_models(llm_binding_name: str, llm_binding_config: Optional[Dict[str, any]]|None = None, llm_bindings_dir: str|Path = Path(__file__).parent / "llm_bindings") -> List[Dict]:
|
|
416
|
+
"""
|
|
417
|
+
Lists all available models for a specific binding.
|
|
418
|
+
"""
|
|
419
|
+
binding = LollmsLLMBindingManager(llm_bindings_dir).create_binding(
|
|
420
|
+
binding_name=llm_binding_name,
|
|
421
|
+
**{
|
|
422
|
+
k: v
|
|
423
|
+
for k, v in (llm_binding_config or {}).items()
|
|
424
|
+
if k != "binding_name"
|
|
425
|
+
}
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
return binding.list_models() if binding else []
|