lollms-client 1.5.6__py3-none-any.whl → 1.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +2 -2
  3. lollms_client/llm_bindings/claude/__init__.py +125 -34
  4. lollms_client/llm_bindings/gemini/__init__.py +261 -159
  5. lollms_client/llm_bindings/grok/__init__.py +52 -14
  6. lollms_client/llm_bindings/groq/__init__.py +2 -2
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +2 -2
  8. lollms_client/llm_bindings/litellm/__init__.py +1 -1
  9. lollms_client/llm_bindings/llamacpp/__init__.py +18 -11
  10. lollms_client/llm_bindings/lollms/__init__.py +76 -21
  11. lollms_client/llm_bindings/lollms_webui/__init__.py +1 -1
  12. lollms_client/llm_bindings/mistral/__init__.py +2 -2
  13. lollms_client/llm_bindings/novita_ai/__init__.py +142 -6
  14. lollms_client/llm_bindings/ollama/__init__.py +307 -89
  15. lollms_client/llm_bindings/open_router/__init__.py +2 -2
  16. lollms_client/llm_bindings/openai/__init__.py +81 -20
  17. lollms_client/llm_bindings/openllm/__init__.py +362 -506
  18. lollms_client/llm_bindings/openwebui/__init__.py +333 -171
  19. lollms_client/llm_bindings/perplexity/__init__.py +2 -2
  20. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -3
  21. lollms_client/llm_bindings/tensor_rt/__init__.py +1 -1
  22. lollms_client/llm_bindings/transformers/__init__.py +428 -632
  23. lollms_client/llm_bindings/vllm/__init__.py +1 -1
  24. lollms_client/lollms_agentic.py +4 -2
  25. lollms_client/lollms_base_binding.py +61 -0
  26. lollms_client/lollms_core.py +512 -1890
  27. lollms_client/lollms_discussion.py +25 -11
  28. lollms_client/lollms_llm_binding.py +112 -261
  29. lollms_client/lollms_mcp_binding.py +34 -75
  30. lollms_client/lollms_stt_binding.py +85 -52
  31. lollms_client/lollms_tti_binding.py +23 -37
  32. lollms_client/lollms_ttm_binding.py +24 -42
  33. lollms_client/lollms_tts_binding.py +28 -17
  34. lollms_client/lollms_ttv_binding.py +24 -42
  35. lollms_client/lollms_types.py +4 -2
  36. lollms_client/stt_bindings/whisper/__init__.py +108 -23
  37. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  38. lollms_client/tti_bindings/diffusers/__init__.py +418 -810
  39. lollms_client/tti_bindings/diffusers/server/main.py +1051 -0
  40. lollms_client/tti_bindings/gemini/__init__.py +182 -239
  41. lollms_client/tti_bindings/leonardo_ai/__init__.py +6 -3
  42. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  43. lollms_client/tti_bindings/novita_ai/__init__.py +5 -2
  44. lollms_client/tti_bindings/openai/__init__.py +10 -11
  45. lollms_client/tti_bindings/stability_ai/__init__.py +5 -3
  46. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  47. lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
  48. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  49. lollms_client/ttm_bindings/replicate/__init__.py +7 -4
  50. lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
  51. lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
  52. lollms_client/tts_bindings/bark/__init__.py +7 -10
  53. lollms_client/tts_bindings/lollms/__init__.py +6 -1
  54. lollms_client/tts_bindings/piper_tts/__init__.py +8 -11
  55. lollms_client/tts_bindings/xtts/__init__.py +157 -74
  56. lollms_client/tts_bindings/xtts/server/main.py +241 -280
  57. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/METADATA +113 -5
  58. lollms_client-1.7.10.dist-info/RECORD +89 -0
  59. lollms_client-1.5.6.dist-info/RECORD +0 -87
  60. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/WHEEL +0 -0
  61. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/licenses/LICENSE +0 -0
  62. {lollms_client-1.5.6.dist-info → lollms_client-1.7.10.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py CHANGED
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
8
8
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
9
9
  from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
10
10
 
11
- __version__ = "1.5.6" # Updated version
11
+ __version__ = "1.7.10" # Updated version
12
12
 
13
13
  # Optionally, you could define __all__ if you want to be explicit about exports
14
14
  __all__ = [
@@ -238,7 +238,7 @@ class AzureOpenAIBinding(LollmsLLMBinding):
238
238
  "supports_vision": True, # Assume modern deployments support vision
239
239
  }
240
240
 
241
- def listModels(self) -> List[Dict[str, str]]:
241
+ def list_models(self) -> List[Dict[str, str]]:
242
242
  """
243
243
  List Models is not supported via the Azure OpenAI API.
244
244
  Deployments are managed in the Azure Portal. This method returns an empty list.
@@ -280,7 +280,7 @@ if __name__ == '__main__':
280
280
 
281
281
  # --- List Models ---
282
282
  ASCIIColors.cyan("\n--- Listing Models ---")
283
- models = binding.listModels()
283
+ models = binding.list_models()
284
284
  if not models:
285
285
  ASCIIColors.green("Correctly returned an empty list for models, as expected for Azure.")
286
286
 
@@ -1,3 +1,4 @@
1
+ # bindings/claude/__init__.py
1
2
  import base64
2
3
  import os
3
4
  from io import BytesIO
@@ -14,7 +15,6 @@ from ascii_colors import ASCIIColors, trace_exception
14
15
  import pipmaster as pm
15
16
 
16
17
  # Ensure the required packages are installed
17
- # Added 'requests' for dynamic model listing
18
18
  pm.ensure_packages(["anthropic", "pillow", "tiktoken", "requests"])
19
19
 
20
20
  import anthropic
@@ -28,8 +28,9 @@ ANTHROPIC_API_BASE_URL = "https://api.anthropic.com/v1"
28
28
 
29
29
  # A hardcoded list to be used as a fallback if the API call fails
30
30
  _FALLBACK_MODELS = [
31
- {'model_name': 'claude-3-opus-20240229', 'display_name': 'Claude 3 Opus', 'description': 'Most powerful model for highly complex tasks.', 'owned_by': 'Anthropic'},
31
+ {'model_name': 'claude-3-7-sonnet-20250219', 'display_name': 'Claude 3.7 Sonnet', 'description': 'Most intelligent model with extended thinking capabilities.', 'owned_by': 'Anthropic'},
32
32
  {'model_name': 'claude-3-5-sonnet-20240620', 'display_name': 'Claude 3.5 Sonnet', 'description': 'Our most intelligent model, a new industry standard.', 'owned_by': 'Anthropic'},
33
+ {'model_name': 'claude-3-opus-20240229', 'display_name': 'Claude 3 Opus', 'description': 'Most powerful model for highly complex tasks.', 'owned_by': 'Anthropic'},
33
34
  {'model_name': 'claude-3-sonnet-20240229', 'display_name': 'Claude 3 Sonnet', 'description': 'Ideal balance of intelligence and speed for enterprise workloads.', 'owned_by': 'Anthropic'},
34
35
  {'model_name': 'claude-3-haiku-20240307', 'display_name': 'Claude 3 Haiku', 'description': 'Fastest and most compact model for near-instant responsiveness.', 'owned_by': 'Anthropic'},
35
36
  {'model_name': 'claude-2.1', 'display_name': 'Claude 2.1', 'description': 'Legacy model with a 200K token context window.', 'owned_by': 'Anthropic'},
@@ -124,6 +125,9 @@ class ClaudeBinding(LollmsLLMBinding):
124
125
  split:Optional[bool]=False, # Not used in this direct method
125
126
  user_keyword:Optional[str]="!@>user:", # Not used
126
127
  ai_keyword:Optional[str]="!@>assistant:", # Not used
128
+ think: Optional[bool] = False,
129
+ reasoning_effort: Optional[str] = "low", # low, medium, high
130
+ reasoning_summary: Optional[bool] = False, # auto
127
131
  ) -> Union[str, dict]:
128
132
  """
129
133
  Generate text using the Claude model.
@@ -131,8 +135,34 @@ class ClaudeBinding(LollmsLLMBinding):
131
135
  if not self.client:
132
136
  return {"status": False, "error": "Anthropic client not initialized."}
133
137
 
138
+ # Handling Thinking / Reasoning
139
+ thinking_config = None
140
+ if think:
141
+ # Map reasoning_effort to budget_tokens
142
+ budget = 1024 # default/low
143
+ if reasoning_effort == "medium":
144
+ budget = 8192
145
+ elif reasoning_effort == "high":
146
+ budget = 16000
147
+
148
+ # Constraint: max_tokens (n_predict) must be > budget_tokens
149
+ # If default n_predict (2048) is too low for reasoning, boost it.
150
+ required_min_tokens = budget + 2048 # Buffer for output
151
+ if n_predict is None or n_predict < required_min_tokens:
152
+ n_predict = required_min_tokens
153
+ ASCIIColors.info(f"Adjusting n_predict to {n_predict} to accommodate thinking budget of {budget}")
154
+
155
+ thinking_config = {"type": "enabled", "budget_tokens": budget}
156
+ # Temperature must be removed or handled differently when thinking is enabled?
157
+ # Anthropic API usually allows temperature with thinking, but strict 1.0 might be enforced by API for some models.
158
+ # We'll leave it unless it errors. Note: Some documentation says temp should be 1.0 or not present for reasoning models,
159
+ # but Claude 3.7 supports it. We will let the API handle it.
160
+
134
161
  api_params = self._construct_parameters(temperature, top_p, top_k, n_predict)
135
-
162
+ if thinking_config:
163
+ api_params["thinking"] = thinking_config
164
+ # Ensure max_tokens is set in params (it is set by _construct_parameters via n_predict)
165
+
136
166
  message_content = []
137
167
  if prompt and prompt.strip():
138
168
  message_content.append({"type": "text", "text": prompt})
@@ -140,7 +170,6 @@ class ClaudeBinding(LollmsLLMBinding):
140
170
  if images:
141
171
  for image_data in images:
142
172
  try:
143
- # ... (image processing code is unchanged)
144
173
  if is_image_path(image_data):
145
174
  with open(image_data, "rb") as image_file:
146
175
  b64_data = base64.b64encode(image_file.read()).decode('utf-8')
@@ -166,8 +195,6 @@ class ClaudeBinding(LollmsLLMBinding):
166
195
  messages = [{"role": "user", "content": message_content}]
167
196
  full_response_text = ""
168
197
 
169
- # ---- CHANGE START ----
170
- # Conditionally build the request arguments to avoid sending an empty `system` parameter.
171
198
  request_args = {
172
199
  "model": self.model_name,
173
200
  "messages": messages,
@@ -175,22 +202,49 @@ class ClaudeBinding(LollmsLLMBinding):
175
202
  }
176
203
  if system_prompt and system_prompt.strip():
177
204
  request_args["system"] = system_prompt
178
- # ---- CHANGE END ----
179
205
 
180
206
  try:
181
207
  if stream:
208
+ # Use raw stream iteration to catch thinking events
182
209
  with self.client.messages.stream(**request_args) as stream_response:
183
- for chunk in stream_response.text_stream:
184
- full_response_text += chunk
185
- if streaming_callback:
186
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
187
- break
210
+ in_thinking_block = False
211
+ for event in stream_response:
212
+ if event.type == "content_block_start" and event.content_block.type == "thinking":
213
+ full_response_text += "<think>\n"
214
+ if streaming_callback:
215
+ streaming_callback("<think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
216
+ in_thinking_block = True
217
+ elif event.type == "content_block_delta" and event.delta.type == "thinking_delta":
218
+ chunk = event.delta.thinking
219
+ full_response_text += chunk
220
+ if streaming_callback:
221
+ streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
222
+ elif event.type == "content_block_stop" and in_thinking_block:
223
+ full_response_text += "\n</think>\n"
224
+ if streaming_callback:
225
+ streaming_callback("\n</think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
226
+ in_thinking_block = False
227
+ elif event.type == "content_block_delta" and event.delta.type == "text_delta":
228
+ chunk = event.delta.text
229
+ full_response_text += chunk
230
+ if streaming_callback:
231
+ if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
232
+ break
188
233
  return full_response_text
189
234
  else:
190
235
  response = self.client.messages.create(**request_args)
191
236
  if response.stop_reason == "error":
192
237
  return {"status": False, "error": f"API returned an error: {response.stop_reason}"}
193
- return response.content[0].text
238
+
239
+ # Reconstruct full text including thinking
240
+ output_parts = []
241
+ for block in response.content:
242
+ if block.type == "thinking":
243
+ output_parts.append(f"<think>\n{block.thinking}\n</think>\n")
244
+ elif block.type == "text":
245
+ output_parts.append(block.text)
246
+
247
+ return "".join(output_parts)
194
248
 
195
249
  except Exception as ex:
196
250
  error_message = f"An unexpected error occurred with Claude API: {str(ex)}"
@@ -210,7 +264,10 @@ class ClaudeBinding(LollmsLLMBinding):
210
264
  seed: Optional[int] = None, # Not supported
211
265
  n_threads: Optional[int] = None, # Not supported
212
266
  ctx_size: Optional[int] = None, # Not supported
213
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
267
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
268
+ think: Optional[bool] = False,
269
+ reasoning_effort: Optional[str] = "low", # low, medium, high
270
+ reasoning_summary: Optional[bool] = False, # auto
214
271
  ) -> Union[str, dict]:
215
272
  """
216
273
  Conduct a chat session with the Claude model using a LollmsDiscussion object.
@@ -222,7 +279,6 @@ class ClaudeBinding(LollmsLLMBinding):
222
279
  messages = discussion.get_messages(branch_tip_id)
223
280
 
224
281
  history = []
225
- # ... (history building code is unchanged)
226
282
  for msg in messages:
227
283
  role = 'user' if msg.sender_type == "user" else 'assistant'
228
284
  content_parts = []
@@ -252,11 +308,28 @@ class ClaudeBinding(LollmsLLMBinding):
252
308
  if not history:
253
309
  return {"status": "error", "message": "Cannot start chat with an empty discussion."}
254
310
 
311
+ # Handling Thinking / Reasoning
312
+ thinking_config = None
313
+ if think:
314
+ budget = 1024
315
+ if reasoning_effort == "medium":
316
+ budget = 8192
317
+ elif reasoning_effort == "high":
318
+ budget = 16000
319
+
320
+ required_min_tokens = budget + 2048
321
+ if n_predict is None or n_predict < required_min_tokens:
322
+ n_predict = required_min_tokens
323
+ ASCIIColors.info(f"Adjusting n_predict to {n_predict} for thinking budget {budget}")
324
+
325
+ thinking_config = {"type": "enabled", "budget_tokens": budget}
326
+
255
327
  api_params = self._construct_parameters(temperature, top_p, top_k, n_predict)
328
+ if thinking_config:
329
+ api_params["thinking"] = thinking_config
330
+
256
331
  full_response_text = ""
257
332
 
258
- # ---- CHANGE START ----
259
- # Conditionally build the request arguments to avoid sending an empty `system` parameter.
260
333
  request_args = {
261
334
  "model": self.model_name,
262
335
  "messages": history,
@@ -264,29 +337,49 @@ class ClaudeBinding(LollmsLLMBinding):
264
337
  }
265
338
  if system_prompt and system_prompt.strip():
266
339
  request_args["system"] = system_prompt
267
- # ---- CHANGE END ----
268
340
 
269
341
  try:
270
342
  if stream:
271
343
  with self.client.messages.stream(**request_args) as stream_response:
272
- for chunk in stream_response.text_stream:
273
- full_response_text += chunk
274
- if streaming_callback:
275
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
276
- break
344
+ in_thinking_block = False
345
+ for event in stream_response:
346
+ if event.type == "content_block_start" and event.content_block.type == "thinking":
347
+ full_response_text += "<think>\n"
348
+ if streaming_callback: streaming_callback("<think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
349
+ in_thinking_block = True
350
+ elif event.type == "content_block_delta" and event.delta.type == "thinking_delta":
351
+ chunk = event.delta.thinking
352
+ full_response_text += chunk
353
+ if streaming_callback: streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
354
+ elif event.type == "content_block_stop" and in_thinking_block:
355
+ full_response_text += "\n</think>\n"
356
+ if streaming_callback: streaming_callback("\n</think>\n", MSG_TYPE.MSG_TYPE_CHUNK)
357
+ in_thinking_block = False
358
+ elif event.type == "content_block_delta" and event.delta.type == "text_delta":
359
+ chunk = event.delta.text
360
+ full_response_text += chunk
361
+ if streaming_callback:
362
+ if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
363
+ break
277
364
  return full_response_text
278
365
  else:
279
366
  response = self.client.messages.create(**request_args)
280
367
  if response.stop_reason == "error":
281
368
  return {"status": "error", "message": f"API returned an error: {response.stop_reason}"}
282
- return response.content[0].text
369
+
370
+ output_parts = []
371
+ for block in response.content:
372
+ if block.type == "thinking":
373
+ output_parts.append(f"<think>\n{block.thinking}\n</think>\n")
374
+ elif block.type == "text":
375
+ output_parts.append(block.text)
376
+ return "".join(output_parts)
283
377
 
284
378
  except Exception as ex:
285
379
  error_message = f"An unexpected error occurred with Claude API: {str(ex)}"
286
380
  trace_exception(ex)
287
381
  return {"status": "error", "message": error_message}
288
382
 
289
- # ... (Rest of the file is unchanged) ...
290
383
  def tokenize(self, text: str) -> list:
291
384
  """
292
385
  Tokenize the input text.
@@ -329,7 +422,7 @@ class ClaudeBinding(LollmsLLMBinding):
329
422
  model=self.model_name,
330
423
  messages=[{"role": "user", "content": text}]
331
424
  )
332
- return response.token_count # Updated to correct response attribute
425
+ return response.input_tokens # Updated to correct response attribute (it's usually 'input_tokens' in CountTokensResponse)
333
426
  except Exception as e:
334
427
  trace_exception(e)
335
428
  ASCIIColors.error(f"Failed to count tokens with Claude API: {e}")
@@ -354,7 +447,7 @@ class ClaudeBinding(LollmsLLMBinding):
354
447
  "supports_vision": "claude-3" in self.model_name,
355
448
  }
356
449
 
357
- def listModels(self) -> List[Dict[str, str]]:
450
+ def list_models(self) -> List[Dict[str, str]]:
358
451
  """
359
452
  Lists available models from the Anthropic API.
360
453
  Caches the result to avoid repeated API calls.
@@ -368,8 +461,6 @@ class ClaudeBinding(LollmsLLMBinding):
368
461
  self._cached_models = _FALLBACK_MODELS
369
462
  return self._cached_models
370
463
 
371
- # This part is complex and likely correct, leaving as is.
372
- # It's good practice.
373
464
  headers = {
374
465
  "x-api-key": self.service_key,
375
466
  "anthropic-version": "2023-06-01",
@@ -437,8 +528,8 @@ if __name__ == '__main__':
437
528
  ASCIIColors.yellow("--- Testing ClaudeBinding ---")
438
529
 
439
530
  # --- Configuration ---
440
- test_model_name = "claude-3-haiku-20240307" # Use Haiku for speed in testing
441
- test_vision_model_name = "claude-3-sonnet-20240229"
531
+ test_model_name = "claude-3-7-sonnet-20250219" # Use Haiku for speed in testing
532
+ test_vision_model_name = "claude-3-5-sonnet-20240620"
442
533
 
443
534
  full_streamed_text = ""
444
535
 
@@ -451,7 +542,7 @@ if __name__ == '__main__':
451
542
 
452
543
  # --- List Models ---
453
544
  ASCIIColors.cyan("\n--- Listing Models (dynamic) ---")
454
- models = binding.listModels()
545
+ models = binding.list_models()
455
546
  if models:
456
547
  ASCIIColors.green(f"Found {len(models)} models.")
457
548
  for m in models:
@@ -472,7 +563,7 @@ if __name__ == '__main__':
472
563
  ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
473
564
  prompt_text = "Explain the importance of bees in one paragraph."
474
565
  ASCIIColors.info(f"Prompt: {prompt_text}")
475
- generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt=" ")
566
+ generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False, system_prompt=" ", think=True)
476
567
  if isinstance(generated_text, str):
477
568
  ASCIIColors.green(f"Generated text:\n{generated_text}")
478
569
  else:
@@ -488,7 +579,7 @@ if __name__ == '__main__':
488
579
  return True
489
580
 
490
581
  ASCIIColors.info(f"Prompt: {prompt_text}")
491
- result = binding.generate_text(prompt_text, n_predict=150, stream=True, streaming_callback=stream_callback)
582
+ result = binding.generate_text(prompt_text, n_predict=150, stream=True, streaming_callback=stream_callback, think=True)
492
583
  full_streamed_text = "".join(captured_chunks)
493
584
  print("\n--- End of Stream ---")
494
585
  ASCIIColors.green(f"Full streamed text (for verification): {result}")