lollms-client 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/lollms/__init__.py +1 -0
- lollms_client/llm_bindings/ollama/__init__.py +5 -1
- lollms_client/llm_bindings/openai/__init__.py +1 -0
- lollms_client/llm_bindings/transformers/__init__.py +67 -71
- lollms_client/lollms_core.py +1222 -638
- lollms_client/lollms_stt_binding.py +137 -0
- lollms_client/lollms_tasks.py +1 -2
- lollms_client/lollms_tti_binding.py +175 -0
- lollms_client/lollms_ttm_binding.py +135 -0
- lollms_client/lollms_tts_binding.py +138 -0
- lollms_client/lollms_ttv_binding.py +135 -0
- lollms_client/stt_bindings/lollms/__init__.py +138 -0
- lollms_client/tti_bindings/lollms/__init__.py +210 -0
- lollms_client/ttm_bindings/__init__.py +0 -0
- lollms_client/ttm_bindings/lollms/__init__.py +73 -0
- lollms_client/tts_bindings/lollms/__init__.py +145 -0
- lollms_client/ttv_bindings/__init__.py +73 -0
- {lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/METADATA +11 -2
- lollms_client-0.12.0.dist-info/RECORD +41 -0
- {lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/WHEEL +1 -1
- lollms_client-0.10.0.dist-info/RECORD +0 -34
- {lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info/licenses}/LICENSE +0 -0
- {lollms_client-0.10.0.dist-info → lollms_client-0.12.0.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -4,5 +4,5 @@ from lollms_client.lollms_types import MSG_TYPE
|
|
|
4
4
|
from lollms_client.lollms_personality import LollmsPersonality
|
|
5
5
|
from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
|
|
6
6
|
from lollms_client.lollms_utilities import PromptReshaper
|
|
7
|
-
from lollms_client.
|
|
7
|
+
from lollms_client.lollms_tts_binding import LollmsTTS
|
|
8
8
|
from lollms_client.lollms_functions import FunctionCalling_Library
|
|
@@ -54,6 +54,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
54
54
|
repeat_last_n: int = 40,
|
|
55
55
|
seed: Optional[int] = None,
|
|
56
56
|
n_threads: int = 8,
|
|
57
|
+
ctx_size: int | None = None,
|
|
57
58
|
streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
|
|
58
59
|
"""
|
|
59
60
|
Generate text using the LOLLMS service, with optional image support.
|
|
@@ -54,6 +54,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
54
54
|
repeat_last_n: int = 40,
|
|
55
55
|
seed: Optional[int] = None,
|
|
56
56
|
n_threads: int = 8,
|
|
57
|
+
ctx_size: int | None = None,
|
|
57
58
|
streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
|
|
58
59
|
"""
|
|
59
60
|
Generate text using the Ollama service, with optional image support.
|
|
@@ -111,8 +112,10 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
111
112
|
}],
|
|
112
113
|
"stream": stream,
|
|
113
114
|
"temperature": float(temperature),
|
|
114
|
-
"max_tokens": n_predict
|
|
115
|
+
"max_tokens": n_predict,
|
|
115
116
|
}
|
|
117
|
+
if ctx_size is not None:
|
|
118
|
+
data["num_ctx"] = ctx_size
|
|
116
119
|
url = f'{host_address}/api/chat'
|
|
117
120
|
else:
|
|
118
121
|
# Text-only generation using /api/generate endpoint
|
|
@@ -265,6 +268,7 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
265
268
|
}
|
|
266
269
|
response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
|
|
267
270
|
try:
|
|
271
|
+
ASCIIColors.debug("Listing ollama models")
|
|
268
272
|
data = response.json()
|
|
269
273
|
model_info = []
|
|
270
274
|
|
|
@@ -62,6 +62,7 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
62
62
|
repeat_last_n: int = 40,
|
|
63
63
|
seed: Optional[int] = None,
|
|
64
64
|
n_threads: int = 8,
|
|
65
|
+
ctx_size: int | None = None,
|
|
65
66
|
streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
|
|
66
67
|
"""
|
|
67
68
|
Generate text based on the provided prompt and parameters.
|
|
@@ -11,12 +11,12 @@ from ascii_colors import ASCIIColors
|
|
|
11
11
|
import pipmaster as pm
|
|
12
12
|
if not pm.is_installed("torch"):
|
|
13
13
|
ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
|
|
14
|
-
pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
14
|
+
pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
15
15
|
|
|
16
16
|
import torch
|
|
17
17
|
if not torch.cuda.is_available():
|
|
18
18
|
ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
|
|
19
|
-
pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
19
|
+
pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
|
|
20
20
|
import torch
|
|
21
21
|
|
|
22
22
|
if not pm.is_installed("transformers"):
|
|
@@ -26,6 +26,7 @@ BindingName = "TransformersBinding"
|
|
|
26
26
|
|
|
27
27
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig
|
|
28
28
|
from packaging import version
|
|
29
|
+
import transformers
|
|
29
30
|
|
|
30
31
|
class TransformersBinding(LollmsLLMBinding):
|
|
31
32
|
"""Transformers-specific binding implementation"""
|
|
@@ -35,7 +36,8 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
35
36
|
model_name: str = "",
|
|
36
37
|
service_key: str = None,
|
|
37
38
|
verify_ssl_certificate: bool = True,
|
|
38
|
-
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat
|
|
39
|
+
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
|
|
40
|
+
prompt_template: Optional[str] = None):
|
|
39
41
|
"""
|
|
40
42
|
Initialize the Transformers binding.
|
|
41
43
|
|
|
@@ -45,6 +47,7 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
45
47
|
service_key (str): Authentication key for the service. Defaults to None.
|
|
46
48
|
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
47
49
|
default_completion_format (ELF_COMPLETION_FORMAT): Default format for completions.
|
|
50
|
+
prompt_template (Optional[str]): Custom prompt template. If None, inferred from model.
|
|
48
51
|
"""
|
|
49
52
|
super().__init__(
|
|
50
53
|
host_address=host_address,
|
|
@@ -76,6 +79,9 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
76
79
|
|
|
77
80
|
self.generation_config = GenerationConfig.from_pretrained(str(model_name))
|
|
78
81
|
|
|
82
|
+
# Infer or set prompt template
|
|
83
|
+
self.prompt_template = prompt_template if prompt_template else self._infer_prompt_template(model_name)
|
|
84
|
+
|
|
79
85
|
# Display device information
|
|
80
86
|
device = next(self.model.parameters()).device
|
|
81
87
|
device_type = "CPU" if device.type == "cpu" else "GPU"
|
|
@@ -86,26 +92,47 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
86
92
|
[ASCIIColors.color_green, ASCIIColors.color_blue if device_type == "GPU" else ASCIIColors.color_red]
|
|
87
93
|
)
|
|
88
94
|
|
|
95
|
+
def _infer_prompt_template(self, model_name: str) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Infer the prompt template based on the model name.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
model_name (str): Name of the model.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
str: The inferred prompt template format string.
|
|
104
|
+
"""
|
|
105
|
+
model_name = model_name.lower()
|
|
106
|
+
if "llama-2" in model_name or "llama" in model_name:
|
|
107
|
+
return "[INST] <<SYS>> {system_prompt} <</SYS>> {user_prompt} [/INST]"
|
|
108
|
+
elif "gpt" in model_name:
|
|
109
|
+
return "{system_prompt}\n{user_prompt}" # Simple concatenation for GPT-style models
|
|
110
|
+
else:
|
|
111
|
+
# Default to a basic chat format
|
|
112
|
+
ASCIIColors.yellow(f"Warning: No specific template found for {model_name}. Using default chat format.")
|
|
113
|
+
return "[INST] {system_prompt}\n{user_prompt} [/INST]"
|
|
89
114
|
|
|
90
115
|
def generate_text(self,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
116
|
+
prompt: str,
|
|
117
|
+
images: Optional[List[str]] = None,
|
|
118
|
+
n_predict: Optional[int] = None,
|
|
119
|
+
stream: bool = False,
|
|
120
|
+
temperature: float = 0.1,
|
|
121
|
+
top_k: int = 50,
|
|
122
|
+
top_p: float = 0.95,
|
|
123
|
+
repeat_penalty: float = 0.8,
|
|
124
|
+
repeat_last_n: int = 40,
|
|
125
|
+
seed: Optional[int] = None,
|
|
126
|
+
n_threads: int = 8,
|
|
127
|
+
ctx_size: int | None = None,
|
|
128
|
+
streaming_callback: Optional[Callable[[str, str], None]] = None,
|
|
129
|
+
return_legacy_cache: bool = False,
|
|
130
|
+
system_prompt: str = "You are a helpful assistant.") -> Union[str, dict]:
|
|
104
131
|
"""
|
|
105
132
|
Generate text using the Transformers model, with optional image support.
|
|
106
133
|
|
|
107
134
|
Args:
|
|
108
|
-
prompt (str): The input prompt for text generation.
|
|
135
|
+
prompt (str): The input prompt for text generation (user prompt).
|
|
109
136
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
110
137
|
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
111
138
|
stream (bool): Whether to stream the output. Defaults to False.
|
|
@@ -118,6 +145,7 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
118
145
|
n_threads (int): Number of threads to use. Defaults to 8.
|
|
119
146
|
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
120
147
|
return_legacy_cache (bool): Whether to use legacy cache format (pre-v4.47). Defaults to False.
|
|
148
|
+
system_prompt (str): System prompt to set model behavior. Defaults to "You are a helpful assistant."
|
|
121
149
|
|
|
122
150
|
Returns:
|
|
123
151
|
Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
|
|
@@ -130,6 +158,12 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
130
158
|
if seed is not None:
|
|
131
159
|
torch.manual_seed(seed)
|
|
132
160
|
|
|
161
|
+
# Apply the prompt template
|
|
162
|
+
formatted_prompt = self.prompt_template.format(
|
|
163
|
+
system_prompt=system_prompt,
|
|
164
|
+
user_prompt=prompt
|
|
165
|
+
)
|
|
166
|
+
|
|
133
167
|
# Prepare generation config
|
|
134
168
|
self.generation_config.max_new_tokens = n_predict if n_predict else 2048
|
|
135
169
|
self.generation_config.temperature = temperature
|
|
@@ -139,14 +173,14 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
139
173
|
self.generation_config.pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
|
|
140
174
|
|
|
141
175
|
# Tokenize input with attention mask
|
|
142
|
-
inputs = self.tokenizer(
|
|
176
|
+
inputs = self.tokenizer(formatted_prompt, return_tensors="pt", padding=True)
|
|
143
177
|
input_ids = inputs.input_ids.to(self.model.device)
|
|
144
178
|
attention_mask = inputs.attention_mask.to(self.model.device)
|
|
145
179
|
|
|
146
180
|
# Handle image input if provided (basic implementation)
|
|
147
181
|
if images and len(images) > 0:
|
|
148
182
|
ASCIIColors.yellow("Warning: Image processing not fully implemented in this binding")
|
|
149
|
-
|
|
183
|
+
formatted_prompt += "\n[Image content not processed]"
|
|
150
184
|
|
|
151
185
|
# Check transformers version for cache handling
|
|
152
186
|
use_legacy_cache = return_legacy_cache or version.parse(transformers.__version__) < version.parse("4.47.0")
|
|
@@ -169,10 +203,8 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
169
203
|
):
|
|
170
204
|
# Handle different output formats based on version/cache setting
|
|
171
205
|
if use_legacy_cache:
|
|
172
|
-
# Legacy format: tuple of (sequences, scores, ...)
|
|
173
206
|
sequences = output[0]
|
|
174
207
|
else:
|
|
175
|
-
# New format: Cache instance
|
|
176
208
|
sequences = output.sequences
|
|
177
209
|
|
|
178
210
|
# Decode the new tokens
|
|
@@ -212,70 +244,34 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
212
244
|
error_msg = f"Error generating text: {str(e)}"
|
|
213
245
|
ASCIIColors.red(error_msg)
|
|
214
246
|
return {"status": "error", "error": error_msg}
|
|
215
|
-
|
|
216
|
-
|
|
217
247
|
|
|
218
248
|
def tokenize(self, text: str) -> list:
|
|
219
|
-
"""
|
|
220
|
-
Tokenize the input text into a list of characters.
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
text (str): The text to tokenize.
|
|
224
|
-
|
|
225
|
-
Returns:
|
|
226
|
-
list: List of individual characters.
|
|
227
|
-
"""
|
|
249
|
+
"""Tokenize the input text into a list of characters."""
|
|
228
250
|
return list(text)
|
|
229
251
|
|
|
230
252
|
def detokenize(self, tokens: list) -> str:
|
|
231
|
-
"""
|
|
232
|
-
Convert a list of tokens back to text.
|
|
233
|
-
|
|
234
|
-
Args:
|
|
235
|
-
tokens (list): List of tokens (characters) to detokenize.
|
|
236
|
-
|
|
237
|
-
Returns:
|
|
238
|
-
str: Detokenized text.
|
|
239
|
-
"""
|
|
253
|
+
"""Convert a list of tokens back to text."""
|
|
240
254
|
return "".join(tokens)
|
|
255
|
+
|
|
241
256
|
def embed(self, text: str, **kwargs) -> list:
|
|
242
|
-
"""
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
Args:
|
|
246
|
-
text (str or List[str]): Input text to embed
|
|
247
|
-
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
248
|
-
|
|
249
|
-
Returns:
|
|
250
|
-
dict: Response containing embeddings
|
|
251
|
-
"""
|
|
252
|
-
pass
|
|
257
|
+
"""Get embeddings for the input text (placeholder)."""
|
|
258
|
+
pass
|
|
259
|
+
|
|
253
260
|
def get_model_info(self) -> dict:
|
|
254
|
-
"""
|
|
255
|
-
Return information about the current Ollama model.
|
|
256
|
-
|
|
257
|
-
Returns:
|
|
258
|
-
dict: Dictionary containing model name, version, and host address.
|
|
259
|
-
"""
|
|
261
|
+
"""Return information about the current model."""
|
|
260
262
|
return {
|
|
261
|
-
"name": "
|
|
262
|
-
"version":
|
|
263
|
+
"name": "transformers",
|
|
264
|
+
"version": transformers.__version__,
|
|
263
265
|
"host_address": self.host_address,
|
|
264
266
|
"model_name": self.model_name
|
|
265
267
|
}
|
|
268
|
+
|
|
266
269
|
def listModels(self):
|
|
267
|
-
"""
|
|
270
|
+
"""Lists available models (placeholder)."""
|
|
268
271
|
pass
|
|
272
|
+
|
|
269
273
|
def load_model(self, model_name: str) -> bool:
|
|
270
|
-
"""
|
|
271
|
-
Load a specific model into the Ollama binding.
|
|
272
|
-
|
|
273
|
-
Args:
|
|
274
|
-
model_name (str): Name of the model to load.
|
|
275
|
-
|
|
276
|
-
Returns:
|
|
277
|
-
bool: True if model loaded successfully.
|
|
278
|
-
"""
|
|
274
|
+
"""Load a specific model into the binding."""
|
|
279
275
|
self.model = model_name
|
|
280
276
|
self.model_name = model_name
|
|
281
|
-
return True
|
|
277
|
+
return True
|