lollms-client 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lollms_client/__init__.py CHANGED
@@ -4,5 +4,5 @@ from lollms_client.lollms_types import MSG_TYPE
4
4
  from lollms_client.lollms_personality import LollmsPersonality
5
5
  from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
6
6
  from lollms_client.lollms_utilities import PromptReshaper
7
- from lollms_client.lollms_tts import LollmsTTS
7
+ from lollms_client.lollms_tts_binding import LollmsTTS
8
8
  from lollms_client.lollms_functions import FunctionCalling_Library
@@ -54,6 +54,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
54
54
  repeat_last_n: int = 40,
55
55
  seed: Optional[int] = None,
56
56
  n_threads: int = 8,
57
+ ctx_size: int | None = None,
57
58
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
58
59
  """
59
60
  Generate text using the LOLLMS service, with optional image support.
@@ -54,6 +54,7 @@ class OllamaBinding(LollmsLLMBinding):
54
54
  repeat_last_n: int = 40,
55
55
  seed: Optional[int] = None,
56
56
  n_threads: int = 8,
57
+ ctx_size: int | None = None,
57
58
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
58
59
  """
59
60
  Generate text using the Ollama service, with optional image support.
@@ -111,8 +112,10 @@ class OllamaBinding(LollmsLLMBinding):
111
112
  }],
112
113
  "stream": stream,
113
114
  "temperature": float(temperature),
114
- "max_tokens": n_predict
115
+ "max_tokens": n_predict,
115
116
  }
117
+ if ctx_size is not None:
118
+ data["num_ctx"] = ctx_size
116
119
  url = f'{host_address}/api/chat'
117
120
  else:
118
121
  # Text-only generation using /api/generate endpoint
@@ -265,6 +268,7 @@ class OllamaBinding(LollmsLLMBinding):
265
268
  }
266
269
  response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
267
270
  try:
271
+ ASCIIColors.debug("Listing ollama models")
268
272
  data = response.json()
269
273
  model_info = []
270
274
 
@@ -62,6 +62,7 @@ class OpenAIBinding(LollmsLLMBinding):
62
62
  repeat_last_n: int = 40,
63
63
  seed: Optional[int] = None,
64
64
  n_threads: int = 8,
65
+ ctx_size: int | None = None,
65
66
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
66
67
  """
67
68
  Generate text based on the provided prompt and parameters.
@@ -11,12 +11,12 @@ from ascii_colors import ASCIIColors
11
11
  import pipmaster as pm
12
12
  if not pm.is_installed("torch"):
13
13
  ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
14
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
14
+ pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
15
15
 
16
16
  import torch
17
17
  if not torch.cuda.is_available():
18
18
  ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
19
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
19
+ pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
20
20
  import torch
21
21
 
22
22
  if not pm.is_installed("transformers"):
@@ -26,6 +26,7 @@ BindingName = "TransformersBinding"
26
26
 
27
27
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig
28
28
  from packaging import version
29
+ import transformers
29
30
 
30
31
  class TransformersBinding(LollmsLLMBinding):
31
32
  """Transformers-specific binding implementation"""
@@ -35,7 +36,8 @@ class TransformersBinding(LollmsLLMBinding):
35
36
  model_name: str = "",
36
37
  service_key: str = None,
37
38
  verify_ssl_certificate: bool = True,
38
- default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
39
+ default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
40
+ prompt_template: Optional[str] = None):
39
41
  """
40
42
  Initialize the Transformers binding.
41
43
 
@@ -45,6 +47,7 @@ class TransformersBinding(LollmsLLMBinding):
45
47
  service_key (str): Authentication key for the service. Defaults to None.
46
48
  verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
47
49
  default_completion_format (ELF_COMPLETION_FORMAT): Default format for completions.
50
+ prompt_template (Optional[str]): Custom prompt template. If None, inferred from model.
48
51
  """
49
52
  super().__init__(
50
53
  host_address=host_address,
@@ -76,6 +79,9 @@ class TransformersBinding(LollmsLLMBinding):
76
79
 
77
80
  self.generation_config = GenerationConfig.from_pretrained(str(model_name))
78
81
 
82
+ # Infer or set prompt template
83
+ self.prompt_template = prompt_template if prompt_template else self._infer_prompt_template(model_name)
84
+
79
85
  # Display device information
80
86
  device = next(self.model.parameters()).device
81
87
  device_type = "CPU" if device.type == "cpu" else "GPU"
@@ -86,26 +92,47 @@ class TransformersBinding(LollmsLLMBinding):
86
92
  [ASCIIColors.color_green, ASCIIColors.color_blue if device_type == "GPU" else ASCIIColors.color_red]
87
93
  )
88
94
 
95
+ def _infer_prompt_template(self, model_name: str) -> str:
96
+ """
97
+ Infer the prompt template based on the model name.
98
+
99
+ Args:
100
+ model_name (str): Name of the model.
101
+
102
+ Returns:
103
+ str: The inferred prompt template format string.
104
+ """
105
+ model_name = model_name.lower()
106
+ if "llama-2" in model_name or "llama" in model_name:
107
+ return "[INST] <<SYS>> {system_prompt} <</SYS>> {user_prompt} [/INST]"
108
+ elif "gpt" in model_name:
109
+ return "{system_prompt}\n{user_prompt}" # Simple concatenation for GPT-style models
110
+ else:
111
+ # Default to a basic chat format
112
+ ASCIIColors.yellow(f"Warning: No specific template found for {model_name}. Using default chat format.")
113
+ return "[INST] {system_prompt}\n{user_prompt} [/INST]"
89
114
 
90
115
  def generate_text(self,
91
- prompt: str,
92
- images: Optional[List[str]] = None,
93
- n_predict: Optional[int] = None,
94
- stream: bool = False,
95
- temperature: float = 0.1,
96
- top_k: int = 50,
97
- top_p: float = 0.95,
98
- repeat_penalty: float = 0.8,
99
- repeat_last_n: int = 40,
100
- seed: Optional[int] = None,
101
- n_threads: int = 8,
102
- streaming_callback: Optional[Callable[[str, str], None]] = None,
103
- return_legacy_cache: bool = False) -> Union[str, dict]:
116
+ prompt: str,
117
+ images: Optional[List[str]] = None,
118
+ n_predict: Optional[int] = None,
119
+ stream: bool = False,
120
+ temperature: float = 0.1,
121
+ top_k: int = 50,
122
+ top_p: float = 0.95,
123
+ repeat_penalty: float = 0.8,
124
+ repeat_last_n: int = 40,
125
+ seed: Optional[int] = None,
126
+ n_threads: int = 8,
127
+ ctx_size: int | None = None,
128
+ streaming_callback: Optional[Callable[[str, str], None]] = None,
129
+ return_legacy_cache: bool = False,
130
+ system_prompt: str = "You are a helpful assistant.") -> Union[str, dict]:
104
131
  """
105
132
  Generate text using the Transformers model, with optional image support.
106
133
 
107
134
  Args:
108
- prompt (str): The input prompt for text generation.
135
+ prompt (str): The input prompt for text generation (user prompt).
109
136
  images (Optional[List[str]]): List of image file paths for multimodal generation.
110
137
  n_predict (Optional[int]): Maximum number of tokens to generate.
111
138
  stream (bool): Whether to stream the output. Defaults to False.
@@ -118,6 +145,7 @@ class TransformersBinding(LollmsLLMBinding):
118
145
  n_threads (int): Number of threads to use. Defaults to 8.
119
146
  streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
120
147
  return_legacy_cache (bool): Whether to use legacy cache format (pre-v4.47). Defaults to False.
148
+ system_prompt (str): System prompt to set model behavior. Defaults to "You are a helpful assistant."
121
149
 
122
150
  Returns:
123
151
  Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
@@ -130,6 +158,12 @@ class TransformersBinding(LollmsLLMBinding):
130
158
  if seed is not None:
131
159
  torch.manual_seed(seed)
132
160
 
161
+ # Apply the prompt template
162
+ formatted_prompt = self.prompt_template.format(
163
+ system_prompt=system_prompt,
164
+ user_prompt=prompt
165
+ )
166
+
133
167
  # Prepare generation config
134
168
  self.generation_config.max_new_tokens = n_predict if n_predict else 2048
135
169
  self.generation_config.temperature = temperature
@@ -139,14 +173,14 @@ class TransformersBinding(LollmsLLMBinding):
139
173
  self.generation_config.pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
140
174
 
141
175
  # Tokenize input with attention mask
142
- inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
176
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt", padding=True)
143
177
  input_ids = inputs.input_ids.to(self.model.device)
144
178
  attention_mask = inputs.attention_mask.to(self.model.device)
145
179
 
146
180
  # Handle image input if provided (basic implementation)
147
181
  if images and len(images) > 0:
148
182
  ASCIIColors.yellow("Warning: Image processing not fully implemented in this binding")
149
- prompt += "\n[Image content not processed]"
183
+ formatted_prompt += "\n[Image content not processed]"
150
184
 
151
185
  # Check transformers version for cache handling
152
186
  use_legacy_cache = return_legacy_cache or version.parse(transformers.__version__) < version.parse("4.47.0")
@@ -169,10 +203,8 @@ class TransformersBinding(LollmsLLMBinding):
169
203
  ):
170
204
  # Handle different output formats based on version/cache setting
171
205
  if use_legacy_cache:
172
- # Legacy format: tuple of (sequences, scores, ...)
173
206
  sequences = output[0]
174
207
  else:
175
- # New format: Cache instance
176
208
  sequences = output.sequences
177
209
 
178
210
  # Decode the new tokens
@@ -212,70 +244,34 @@ class TransformersBinding(LollmsLLMBinding):
212
244
  error_msg = f"Error generating text: {str(e)}"
213
245
  ASCIIColors.red(error_msg)
214
246
  return {"status": "error", "error": error_msg}
215
-
216
-
217
247
 
218
248
  def tokenize(self, text: str) -> list:
219
- """
220
- Tokenize the input text into a list of characters.
221
-
222
- Args:
223
- text (str): The text to tokenize.
224
-
225
- Returns:
226
- list: List of individual characters.
227
- """
249
+ """Tokenize the input text into a list of characters."""
228
250
  return list(text)
229
251
 
230
252
  def detokenize(self, tokens: list) -> str:
231
- """
232
- Convert a list of tokens back to text.
233
-
234
- Args:
235
- tokens (list): List of tokens (characters) to detokenize.
236
-
237
- Returns:
238
- str: Detokenized text.
239
- """
253
+ """Convert a list of tokens back to text."""
240
254
  return "".join(tokens)
255
+
241
256
  def embed(self, text: str, **kwargs) -> list:
242
- """
243
- Get embeddings for the input text using Ollama API
244
-
245
- Args:
246
- text (str or List[str]): Input text to embed
247
- **kwargs: Additional arguments like model, truncate, options, keep_alive
248
-
249
- Returns:
250
- dict: Response containing embeddings
251
- """
252
- pass
257
+ """Get embeddings for the input text (placeholder)."""
258
+ pass
259
+
253
260
  def get_model_info(self) -> dict:
254
- """
255
- Return information about the current Ollama model.
256
-
257
- Returns:
258
- dict: Dictionary containing model name, version, and host address.
259
- """
261
+ """Return information about the current model."""
260
262
  return {
261
- "name": "ollama",
262
- "version": "2.0",
263
+ "name": "transformers",
264
+ "version": transformers.__version__,
263
265
  "host_address": self.host_address,
264
266
  "model_name": self.model_name
265
267
  }
268
+
266
269
  def listModels(self):
267
- """ Lists available models """
270
+ """Lists available models (placeholder)."""
268
271
  pass
272
+
269
273
  def load_model(self, model_name: str) -> bool:
270
- """
271
- Load a specific model into the Ollama binding.
272
-
273
- Args:
274
- model_name (str): Name of the model to load.
275
-
276
- Returns:
277
- bool: True if model loaded successfully.
278
- """
274
+ """Load a specific model into the binding."""
279
275
  self.model = model_name
280
276
  self.model_name = model_name
281
- return True
277
+ return True