lollms-client 1.6.5__tar.gz → 1.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {lollms_client-1.6.5/src/lollms_client.egg-info → lollms_client-1.6.7}/PKG-INFO +1 -1
  2. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/__init__.py +1 -1
  3. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_core.py +156 -129
  4. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/diffusers/__init__.py +45 -22
  5. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/diffusers/server/main.py +164 -54
  6. lollms_client-1.6.7/src/lollms_client/tts_bindings/xtts/__init__.py +194 -0
  7. lollms_client-1.6.7/src/lollms_client/tts_bindings/xtts/server/main.py +275 -0
  8. {lollms_client-1.6.5 → lollms_client-1.6.7/src/lollms_client.egg-info}/PKG-INFO +1 -1
  9. lollms_client-1.6.5/src/lollms_client/tts_bindings/xtts/__init__.py +0 -170
  10. lollms_client-1.6.5/src/lollms_client/tts_bindings/xtts/server/main.py +0 -330
  11. {lollms_client-1.6.5 → lollms_client-1.6.7}/LICENSE +0 -0
  12. {lollms_client-1.6.5 → lollms_client-1.6.7}/README.md +0 -0
  13. {lollms_client-1.6.5 → lollms_client-1.6.7}/pyproject.toml +0 -0
  14. {lollms_client-1.6.5 → lollms_client-1.6.7}/setup.cfg +0 -0
  15. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/assets/models_ctx_sizes.json +0 -0
  16. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/__init__.py +0 -0
  17. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/azure_openai/__init__.py +0 -0
  18. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/claude/__init__.py +0 -0
  19. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/gemini/__init__.py +0 -0
  20. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/grok/__init__.py +0 -0
  21. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/groq/__init__.py +0 -0
  22. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +0 -0
  23. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/litellm/__init__.py +0 -0
  24. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/llamacpp/__init__.py +0 -0
  25. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/lollms/__init__.py +0 -0
  26. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/lollms_webui/__init__.py +0 -0
  27. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/mistral/__init__.py +0 -0
  28. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/novita_ai/__init__.py +0 -0
  29. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/ollama/__init__.py +0 -0
  30. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/open_router/__init__.py +0 -0
  31. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/openai/__init__.py +0 -0
  32. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/openllm/__init__.py +0 -0
  33. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/openwebui/__init__.py +0 -0
  34. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/perplexity/__init__.py +0 -0
  35. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/pythonllamacpp/__init__.py +0 -0
  36. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
  37. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/transformers/__init__.py +0 -0
  38. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/llm_bindings/vllm/__init__.py +0 -0
  39. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_agentic.py +0 -0
  40. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_config.py +0 -0
  41. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_discussion.py +0 -0
  42. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_js_analyzer.py +0 -0
  43. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_llm_binding.py +0 -0
  44. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_mcp_binding.py +0 -0
  45. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_mcp_security.py +0 -0
  46. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_personality.py +0 -0
  47. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_python_analyzer.py +0 -0
  48. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_stt_binding.py +0 -0
  49. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_tti_binding.py +0 -0
  50. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_ttm_binding.py +0 -0
  51. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_tts_binding.py +0 -0
  52. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_ttv_binding.py +0 -0
  53. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_types.py +0 -0
  54. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/lollms_utilities.py +0 -0
  55. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
  56. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
  57. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
  58. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
  59. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
  60. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/remote_mcp/__init__.py +0 -0
  61. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/mcp_bindings/standard_mcp/__init__.py +0 -0
  62. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/stt_bindings/__init__.py +0 -0
  63. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  64. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/stt_bindings/whisper/__init__.py +0 -0
  65. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
  66. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/__init__.py +0 -0
  67. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/gemini/__init__.py +0 -0
  68. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/leonardo_ai/__init__.py +0 -0
  69. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  70. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/novita_ai/__init__.py +0 -0
  71. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/openai/__init__.py +0 -0
  72. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tti_bindings/stability_ai/__init__.py +0 -0
  73. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/__init__.py +0 -0
  74. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
  75. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/beatoven_ai/__init__.py +0 -0
  76. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
  77. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/replicate/__init__.py +0 -0
  78. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/stability_ai/__init__.py +0 -0
  79. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttm_bindings/topmediai/__init__.py +0 -0
  80. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/__init__.py +0 -0
  81. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/bark/__init__.py +0 -0
  82. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/bark/server/install_bark.py +0 -0
  83. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/bark/server/main.py +0 -0
  84. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  85. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
  86. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/piper_tts/server/install_piper.py +0 -0
  87. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/piper_tts/server/main.py +0 -0
  88. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/piper_tts/server/setup_voices.py +0 -0
  89. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/tts_bindings/xtts/server/setup_voices.py +0 -0
  90. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttv_bindings/__init__.py +0 -0
  91. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  92. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client.egg-info/SOURCES.txt +0 -0
  93. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client.egg-info/dependency_links.txt +0 -0
  94. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client.egg-info/requires.txt +0 -0
  95. {lollms_client-1.6.5 → lollms_client-1.6.7}/src/lollms_client.egg-info/top_level.txt +0 -0
  96. {lollms_client-1.6.5 → lollms_client-1.6.7}/test/test_lollms_discussion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 1.6.5
3
+ Version: 1.6.7
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache License
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
8
8
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
9
9
  from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
10
10
 
11
- __version__ = "1.6.5" # Updated version
11
+ __version__ = "1.6.7" # Updated version
12
12
 
13
13
  # Optionally, you could define __all__ if you want to be explicit about exports
14
14
  __all__ = [
@@ -4307,16 +4307,17 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4307
4307
  contextual_prompt: Optional[str] = None,
4308
4308
  system_prompt: str | None = None,
4309
4309
  context_fill_percentage: float = 0.75,
4310
- overlap_tokens: int = 150, # Added a default for better context continuity
4310
+ overlap_tokens: int = 150,
4311
4311
  expected_generation_tokens: int = 1500,
4312
+ max_scratchpad_tokens: int = 4000, # NEW: Hard limit for scratchpad
4313
+ scratchpad_compression_threshold: int = 3000, # NEW: When to compress
4312
4314
  streaming_callback: Optional[Callable] = None,
4313
4315
  return_scratchpad_only: bool = False,
4314
4316
  debug: bool = True,
4315
4317
  **kwargs
4316
4318
  ) -> str:
4317
4319
  """
4318
- Processes long text by breaking it down into chunks, analyzing each one incrementally,
4319
- and synthesizing the results into a comprehensive final response based on a user-defined objective.
4320
+ Processes long text with FIXED chunk sizing and managed scratchpad growth.
4320
4321
  """
4321
4322
 
4322
4323
  if debug:
@@ -4328,7 +4329,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4328
4329
 
4329
4330
  # Get context size
4330
4331
  try:
4331
- context_size = self.llm.get_context_size() or 8192 # Using a more modern default
4332
+ context_size = self.llm.get_context_size() or 8192
4332
4333
  except:
4333
4334
  context_size = 8192
4334
4335
 
@@ -4339,65 +4340,50 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4339
4340
  if not text_to_process:
4340
4341
  return ""
4341
4342
 
4342
- # Use a simple word-based split for token estimation
4343
+ # Use word-based split for token estimation
4343
4344
  tokens = text_to_process.split()
4344
4345
  if debug:
4345
4346
  print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
4346
4347
 
4347
- # Dynamic token budget calculation
4348
- def calculate_token_budgets(scratchpad_content: str = "", step_num: int = 0) -> dict:
4349
- # Generic prompt templates are more concise
4350
- base_system_tokens = 150
4351
- user_template_tokens = 250
4352
- scratchpad_tokens = len(scratchpad_content.split()) * 1.3 if scratchpad_content else 0
4353
-
4354
- used_tokens = base_system_tokens + user_template_tokens + scratchpad_tokens + expected_generation_tokens
4355
- total_budget = int(context_size * context_fill_percentage)
4356
- available_for_chunk = max(500, int(total_budget - used_tokens)) # Ensure a reasonable minimum chunk size
4357
-
4358
- budget_info = {
4359
- "total_budget": total_budget,
4360
- "chunk_budget": available_for_chunk,
4361
- "efficiency_ratio": available_for_chunk / total_budget if total_budget > 0 else 0,
4362
- "scratchpad_tokens": int(scratchpad_tokens),
4363
- "used_tokens": int(used_tokens)
4364
- }
4365
-
4366
- if debug:
4367
- print(f"🔧 DEBUG Step {step_num}: Budget = {available_for_chunk}/{total_budget} tokens, "
4368
- f"Scratchpad = {int(scratchpad_tokens)} tokens")
4369
-
4370
- return budget_info
4371
-
4372
- # Initial budget calculation
4373
- initial_budget = calculate_token_budgets()
4374
- chunk_size_tokens = initial_budget["chunk_budget"]
4375
-
4348
+ # ========================================
4349
+ # FIXED: Calculate chunk size ONCE upfront
4350
+ # ========================================
4351
+ base_system_tokens = 150
4352
+ user_template_tokens = 250
4353
+
4354
+ # Reserve space for maximum expected scratchpad size
4355
+ reserved_scratchpad_tokens = max_scratchpad_tokens
4356
+
4357
+ total_budget = int(context_size * context_fill_percentage)
4358
+ used_tokens = base_system_tokens + user_template_tokens + reserved_scratchpad_tokens + expected_generation_tokens
4359
+
4360
+ # FIXED chunk size - never changes during processing
4361
+ FIXED_CHUNK_SIZE = max(500, int(total_budget - used_tokens))
4362
+
4376
4363
  if debug:
4377
- print(f"🔧 DEBUG: Initial chunk size: {chunk_size_tokens} word tokens")
4364
+ print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens (will not change)")
4365
+ print(f"🔧 DEBUG: Reserved scratchpad space: {reserved_scratchpad_tokens} tokens")
4366
+ print(f"🔧 DEBUG: Total budget: {total_budget} tokens")
4378
4367
 
4379
4368
  if streaming_callback:
4380
4369
  streaming_callback(
4381
- f"Context Budget: {initial_budget['chunk_budget']:,}/{initial_budget['total_budget']:,} tokens "
4382
- f"({initial_budget['efficiency_ratio']:.1%} efficiency)",
4370
+ f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
4383
4371
  MSG_TYPE.MSG_TYPE_STEP,
4384
- {"budget_info": initial_budget}
4372
+ {"fixed_chunk_size": FIXED_CHUNK_SIZE, "total_budget": total_budget}
4385
4373
  )
4386
4374
 
4387
4375
  # Single pass for short content
4388
- if len(tokens) <= chunk_size_tokens:
4376
+ if len(tokens) <= FIXED_CHUNK_SIZE:
4389
4377
  if debug:
4390
- print("🔧 DEBUG: Content is short enough for single-pass processing")
4378
+ print("🔧 DEBUG: Content fits in single pass")
4391
4379
 
4392
4380
  if streaming_callback:
4393
4381
  streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
4394
4382
 
4395
- # Generic single-pass system prompt
4396
4383
  system_prompt = (
4397
4384
  "You are an expert AI assistant for text analysis and summarization. "
4398
4385
  "Your task is to carefully analyze the provided text and generate a comprehensive, "
4399
- "accurate, and well-structured response that directly addresses the user's objective. "
4400
- "Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
4386
+ "accurate, and well-structured response that directly addresses the user's objective."
4401
4387
  )
4402
4388
 
4403
4389
  prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
@@ -4413,120 +4399,164 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4413
4399
  print(f"🔧 DEBUG: Single-pass processing failed: {e}")
4414
4400
  return f"Error in single-pass processing: {e}"
4415
4401
 
4416
- # Multi-chunk processing for long content
4402
+ # ========================================
4403
+ # FIXED: Multi-chunk processing with static sizing
4404
+ # ========================================
4417
4405
  if debug:
4418
- print("🔧 DEBUG: Using multi-chunk processing for long content")
4406
+ print("🔧 DEBUG: Using multi-chunk processing with FIXED chunk size")
4419
4407
 
4420
4408
  chunk_summaries = []
4421
4409
  current_position = 0
4422
4410
  step_number = 1
4411
+
4412
+ # Pre-calculate total steps (won't change since chunk size is fixed)
4413
+ total_steps = -(-len(tokens) // (FIXED_CHUNK_SIZE - overlap_tokens)) # Ceiling division
4414
+
4415
+ if debug:
4416
+ print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
4417
+
4418
+ # ========================================
4419
+ # NEW: Scratchpad compression helper
4420
+ # ========================================
4421
+ def compress_scratchpad(scratchpad_sections: list) -> list:
4422
+ """Compress scratchpad when it gets too large"""
4423
+ if len(scratchpad_sections) <= 2:
4424
+ return scratchpad_sections
4425
+
4426
+ combined = "\n\n---\n\n".join(scratchpad_sections)
4427
+ current_size = len(combined.split())
4428
+
4429
+ if current_size <= scratchpad_compression_threshold:
4430
+ return scratchpad_sections
4431
+
4432
+ if debug:
4433
+ print(f"🔧 DEBUG: Compressing scratchpad from {current_size} tokens")
4434
+
4435
+ compression_prompt = (
4436
+ f"Consolidate the following analysis sections into a more concise summary. "
4437
+ f"Retain all key facts, data points, and conclusions, but eliminate redundancy:\n\n"
4438
+ f"{combined}"
4439
+ )
4440
+
4441
+ try:
4442
+ compressed = self.remove_thinking_blocks(
4443
+ self.llm.generate_text(
4444
+ compression_prompt,
4445
+ system_prompt="You are a text consolidation expert. Create concise summaries that preserve all important information.",
4446
+ **kwargs
4447
+ )
4448
+ )
4449
+
4450
+ if debug:
4451
+ compressed_size = len(compressed.split())
4452
+ print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
4453
+
4454
+ return [compressed]
4455
+ except Exception as e:
4456
+ if debug:
4457
+ print(f"🔧 DEBUG: Compression failed: {e}, keeping last 3 sections")
4458
+ # Fallback: keep only recent sections
4459
+ return scratchpad_sections[-3:]
4423
4460
 
4461
+ # Main processing loop with FIXED chunk size
4424
4462
  while current_position < len(tokens):
4425
- # Recalculate budget for each step for dynamic adaptation
4426
- current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4427
- current_budget = calculate_token_budgets(current_scratchpad, step_number)
4428
- adaptive_chunk_size = max(500, current_budget["chunk_budget"])
4429
-
4430
- # Extract the next chunk of text
4431
- chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
4463
+ # Extract chunk using FIXED size
4464
+ chunk_end = min(current_position + FIXED_CHUNK_SIZE, len(tokens))
4432
4465
  chunk_tokens = tokens[current_position:chunk_end]
4433
4466
  chunk_text = " ".join(chunk_tokens)
4434
4467
 
4435
4468
  if debug:
4436
- print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
4437
- f"({len(chunk_tokens)} tokens)")
4469
+ print(f"\n🔧 DEBUG Step {step_number}/{total_steps}: Processing chunk from {current_position} to {chunk_end} "
4470
+ f"({len(chunk_tokens)} tokens)")
4438
4471
 
4439
- # Progress calculation
4440
- remaining_tokens = len(tokens) - current_position
4441
- estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
4442
- total_estimated_steps = step_number + estimated_remaining_steps -1
4443
- progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
4472
+ # Progress calculation (based on fixed steps)
4473
+ progress = (step_number / total_steps) * 90
4444
4474
 
4445
4475
  if streaming_callback:
4446
4476
  streaming_callback(
4447
- f"Processing chunk {step_number}/{total_estimated_steps} - "
4448
- f"Budget: {adaptive_chunk_size:,} tokens",
4477
+ f"Processing chunk {step_number}/{total_steps} - Fixed size: {FIXED_CHUNK_SIZE:,} tokens",
4449
4478
  MSG_TYPE.MSG_TYPE_STEP_START,
4450
- {"step": step_number, "progress": progress}
4479
+ {"step": step_number, "total_steps": total_steps, "progress": progress}
4451
4480
  )
4452
4481
 
4482
+ # Check and compress scratchpad if needed
4483
+ current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4484
+ scratchpad_size = len(current_scratchpad.split())
4485
+
4486
+ if scratchpad_size > scratchpad_compression_threshold:
4487
+ if debug:
4488
+ print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
4489
+ chunk_summaries = compress_scratchpad(chunk_summaries)
4490
+ current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4491
+ scratchpad_size = len(current_scratchpad.split())
4492
+
4453
4493
  try:
4454
- # Generic, state-aware system prompt
4455
4494
  system_prompt = (
4456
- f"You are a component in a multi-step text processing pipeline. Your role is to analyze a chunk of text and extract key information relevant to a global objective.\n\n"
4457
- f"**Current Status:** You are on step {step_number} of approximately {total_estimated_steps} steps. Progress is at {progress:.1f}%.\n\n"
4458
- f"**Your Task:**\n"
4459
- f"Analyze the 'New Text Chunk' provided below. Extract and summarize any information, data points, or key ideas that are relevant to the 'Global Objective'.\n"
4460
- f"Review the 'Existing Scratchpad Content' to understand what has already been found. Your goal is to add *new* insights that are not already captured.\n\n"
4461
- f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
4495
+ f"You are a component in a multi-step text processing pipeline analyzing step {step_number} of {total_steps}.\n\n"
4496
+ f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
4497
+ f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
4498
+ f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
4499
+ f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
4462
4500
  )
4463
4501
 
4464
- # Generic, context-aware user prompt
4465
- summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions from the text."
4466
- scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
4502
+ summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
4503
+ scratchpad_status = "First chunk analysis" if not chunk_summaries else f"{len(chunk_summaries)} sections completed, {scratchpad_size} tokens"
4467
4504
 
4468
4505
  user_prompt = (
4469
4506
  f"--- Global Objective ---\n{summarization_objective}\n\n"
4470
- f"--- Current Progress ---\n"
4471
- f"{scratchpad_status} (Step {step_number}/{total_estimated_steps})\n\n"
4472
- f"--- Existing Scratchpad Content (for context) ---\n{current_scratchpad}\n\n"
4473
- f"--- New Text Chunk to Analyze ---\n{chunk_text}\n\n"
4474
- f"--- Your Instructions ---\n"
4475
- f"Extract key information from the 'New Text Chunk' that aligns with the 'Global Objective'. "
4476
- f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
4477
- f"If no new relevant information is found, state that clearly."
4507
+ f"--- Progress ---\nStep {step_number}/{total_steps} | {scratchpad_status}\n\n"
4508
+ f"--- Existing Scratchpad (for context) ---\n{current_scratchpad}\n\n"
4509
+ f"--- New Text Chunk ---\n{chunk_text}\n\n"
4510
+ f"--- Instructions ---\n"
4511
+ f"Extract NEW key information from this chunk that aligns with the objective. "
4512
+ f"Be concise. Avoid repeating scratchpad content."
4478
4513
  )
4479
4514
 
4480
4515
  if debug:
4481
- print(f"🔧 DEBUG: Sending {len(user_prompt)} char prompt to LLM")
4516
+ print(f"🔧 DEBUG: Prompt size: {len(user_prompt)} chars, Scratchpad: {scratchpad_size} tokens")
4482
4517
 
4483
4518
  chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
4484
4519
 
4485
4520
  if debug:
4486
- print(f"🔧 DEBUG: Received {len(chunk_summary)} char response preview: {chunk_summary[:200]}...")
4521
+ print(f"🔧 DEBUG: Received {len(chunk_summary)} char response")
4487
4522
 
4488
- # Generic content filtering
4523
+ # Filter logic
4489
4524
  filter_out = False
4490
4525
  filter_reason = "content accepted"
4491
4526
 
4492
- # Check for explicit rejection signals
4493
4527
  if (chunk_summary.strip().lower().startswith('[no new') or
4494
4528
  chunk_summary.strip().lower().startswith('no new information')):
4495
4529
  filter_out = True
4496
4530
  filter_reason = "explicit rejection signal"
4497
- # Check for overly short or generic refusal responses
4498
4531
  elif len(chunk_summary.strip()) < 25:
4499
4532
  filter_out = True
4500
- filter_reason = "response too short to be useful"
4501
- # Check for common error phrases
4502
- elif any(error_phrase in chunk_summary.lower()[:150] for error_phrase in [
4503
- 'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
4533
+ filter_reason = "response too short"
4534
+ elif any(error in chunk_summary.lower()[:150] for error in [
4535
+ 'error', 'failed', 'cannot provide', 'unable to analyze']):
4504
4536
  filter_out = True
4505
- filter_reason = "error or refusal response detected"
4537
+ filter_reason = "error response"
4506
4538
 
4507
4539
  if not filter_out:
4508
4540
  chunk_summaries.append(chunk_summary.strip())
4509
4541
  content_added = True
4510
4542
  if debug:
4511
- print(f"🔧 DEBUG: ✅ Content added to scratchpad (total sections: {len(chunk_summaries)})")
4543
+ print(f"🔧 DEBUG: ✅ Content added (total sections: {len(chunk_summaries)})")
4512
4544
  else:
4513
4545
  content_added = False
4514
4546
  if debug:
4515
- print(f"🔧 DEBUG: ❌ Content filtered out - {filter_reason}: {chunk_summary[:100]}...")
4547
+ print(f"🔧 DEBUG: ❌ Filtered: {filter_reason}")
4516
4548
 
4517
- # Update progress via callback
4518
4549
  if streaming_callback:
4519
4550
  updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4520
4551
  streaming_callback(
4521
4552
  updated_scratchpad,
4522
4553
  MSG_TYPE.MSG_TYPE_SCRATCHPAD,
4523
- {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added, "filter_reason": filter_reason}
4554
+ {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added}
4524
4555
  )
4525
- progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
4526
4556
  streaming_callback(
4527
4557
  f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
4528
4558
  MSG_TYPE.MSG_TYPE_STEP_END,
4529
- {"progress": progress_after}
4559
+ {"progress": progress}
4530
4560
  )
4531
4561
 
4532
4562
  except Exception as e:
@@ -4536,82 +4566,79 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4536
4566
  self.trace_exception(e)
4537
4567
  if streaming_callback:
4538
4568
  streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
4539
- chunk_summaries.append(f"[Error processing chunk at step {step_number}: {str(e)[:150]}]")
4569
+ chunk_summaries.append(f"[Error at step {step_number}: {str(e)[:150]}]")
4540
4570
 
4541
- # Move to the next chunk, allowing for overlap
4542
- current_position += max(1, adaptive_chunk_size - overlap_tokens)
4571
+ # Move to next chunk with FIXED size
4572
+ current_position += max(1, FIXED_CHUNK_SIZE - overlap_tokens)
4543
4573
  step_number += 1
4544
4574
 
4545
- # Safety break for excessively long documents
4575
+ # Safety break
4546
4576
  if step_number > 200:
4547
- if debug: print(f"🔧 DEBUG: Safety break after {step_number-1} steps.")
4548
- chunk_summaries.append("[Processing halted due to exceeding maximum step limit.]")
4577
+ if debug:
4578
+ print(f"🔧 DEBUG: Safety break at step {step_number}")
4579
+ chunk_summaries.append("[Processing halted: exceeded maximum steps]")
4549
4580
  break
4550
4581
 
4551
4582
  if debug:
4552
- print(f"\n🔧 DEBUG: Chunk processing complete. Total sections gathered: {len(chunk_summaries)}")
4583
+ print(f"\n🔧 DEBUG: Processing complete. Sections: {len(chunk_summaries)}")
4553
4584
 
4554
- # Return only the scratchpad content if requested
4585
+ # Return scratchpad only if requested
4555
4586
  if return_scratchpad_only:
4556
4587
  final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4557
4588
  if streaming_callback:
4558
- streaming_callback("Returning scratchpad content as final output.", MSG_TYPE.MSG_TYPE_STEP, {})
4589
+ streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
4559
4590
  return final_scratchpad.strip()
4560
4591
 
4561
- # Final Synthesis Step
4592
+ # Final synthesis
4562
4593
  if streaming_callback:
4563
- streaming_callback("Synthesizing final comprehensive response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
4594
+ streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
4564
4595
 
4565
4596
  if not chunk_summaries:
4566
- error_msg = "No content was successfully processed or extracted from the document. The input might be empty or an issue occurred during processing."
4597
+ error_msg = "No content was successfully processed."
4567
4598
  if debug:
4568
4599
  print(f"🔧 DEBUG: ❌ {error_msg}")
4569
4600
  return error_msg
4570
4601
 
4571
4602
  combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4572
- synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis of the provided text."
4603
+ synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
4573
4604
 
4574
4605
  if debug:
4575
- print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} char scratchpad with {len(chunk_summaries)} sections.")
4606
+ print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {len(chunk_summaries)} sections")
4576
4607
 
4577
- # Generic synthesis prompts
4578
4608
  synthesis_system_prompt = (
4579
- "You are an expert AI assistant specializing in synthesizing information. "
4580
- "Your task is to consolidate a series of text analysis sections from a scratchpad into a single, coherent, and well-structured final response. "
4581
- "Eliminate redundancy, organize the content logically, and ensure the final output directly and comprehensively addresses the user's primary objective. "
4582
- "Use markdown for clear formatting (e.g., headers, lists, bold text)."
4609
+ "You are an expert at synthesizing information. "
4610
+ "Consolidate the analysis sections into a coherent final response. "
4611
+ "Eliminate redundancy, organize logically, and use markdown formatting."
4583
4612
  )
4584
4613
 
4585
4614
  synthesis_user_prompt = (
4586
4615
  f"--- Final Objective ---\n{synthesis_objective}\n\n"
4587
- f"--- Collected Analysis Sections (Scratchpad) ---\n{combined_scratchpad}\n\n"
4588
- f"--- Your Final Task ---\n"
4589
- f"Synthesize all the information from the 'Collected Analysis Sections' into a single, high-quality, and comprehensive response. "
4590
- f"Your response must directly address the 'Final Objective'. "
4591
- f"Organize your answer logically with clear sections using markdown headers. "
4592
- f"Ensure all key information is included, remove any repetitive statements, and produce a polished, final document."
4616
+ f"--- Collected Analysis Sections ---\n{combined_scratchpad}\n\n"
4617
+ f"--- Instructions ---\n"
4618
+ f"Synthesize all information into a comprehensive response addressing the objective. "
4619
+ f"Organize with markdown headers, remove repetition, create a polished final document."
4593
4620
  )
4594
4621
 
4595
4622
  try:
4596
4623
  final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
4597
4624
  if debug:
4598
- print(f"🔧 DEBUG: Final synthesis generated: {len(final_answer):,} characters")
4625
+ print(f"🔧 DEBUG: Final synthesis: {len(final_answer):,} characters")
4599
4626
  if streaming_callback:
4600
- streaming_callback("Final synthesis complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
4627
+ streaming_callback("Final synthesis complete", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
4601
4628
  return final_answer.strip()
4602
4629
 
4603
4630
  except Exception as e:
4604
- error_msg = f"The final synthesis step failed: {str(e)}. Returning the organized scratchpad content as a fallback."
4605
- if debug: print(f"🔧 DEBUG: ❌ {error_msg}")
4631
+ error_msg = f"Synthesis failed: {str(e)}. Returning scratchpad."
4632
+ if debug:
4633
+ print(f"🔧 DEBUG: ❌ {error_msg}")
4606
4634
 
4607
- # Fallback to returning the organized scratchpad
4608
4635
  organized_scratchpad = (
4609
4636
  f"# Analysis Summary\n\n"
4610
- f"*Note: The final synthesis process encountered an error. The raw, organized analysis sections are provided below.*\n\n"
4611
- f"## Collected Sections\n\n"
4612
- f"{combined_scratchpad}"
4637
+ f"*Note: Final synthesis failed. Raw analysis sections below.*\n\n"
4638
+ f"## Collected Sections\n\n{combined_scratchpad}"
4613
4639
  )
4614
4640
  return organized_scratchpad
4641
+
4615
4642
 
4616
4643
 
4617
4644
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
@@ -53,6 +53,7 @@ class DiffusersBinding(LollmsTTIBinding):
53
53
  self.server_dir = self.binding_root / "server"
54
54
  self.venv_dir = Path("./venv/tti_diffusers_venv")
55
55
  self.models_path = Path(kwargs.get("models_path", "./data/models/diffusers_models")).resolve()
56
+ self.extra_models_path = kwargs.get("extra_models_path")
56
57
  self.models_path.mkdir(exist_ok=True, parents=True)
57
58
  if self.auto_start_server:
58
59
  self.ensure_server_is_running()
@@ -68,36 +69,47 @@ class DiffusersBinding(LollmsTTIBinding):
68
69
  return False
69
70
 
70
71
 
71
- def ensure_server_is_running(self, continue_if_locked: bool = True):
72
+ def ensure_server_is_running(self):
72
73
  """
73
74
  Ensures the Diffusers server is running. If not, it attempts to start it
74
- in a process-safe manner using a file lock.
75
-
76
- Args:
77
- continue_if_locked (bool): If True, return immediately if another process
78
- already holds the lock.
75
+ in a process-safe manner using a file lock. This method is designed to
76
+ prevent race conditions in multi-worker environments.
79
77
  """
80
78
  self.server_dir.mkdir(exist_ok=True)
81
- lock_path = self.models_path / "diffusers_server.lock"
79
+ # Use a lock file in the binding's server directory for consistency across instances
80
+ lock_path = self.server_dir / "diffusers_server.lock"
82
81
  lock = FileLock(lock_path)
83
82
 
84
83
  ASCIIColors.info("Attempting to start or connect to the Diffusers server...")
84
+
85
+ # First, perform a quick check without the lock to avoid unnecessary waiting.
86
+ if self.is_server_running():
87
+ ASCIIColors.green("Diffusers Server is already running and responsive.")
88
+ return
89
+
85
90
  try:
86
- # Try to acquire lock immediately if continue_if_locked=True
87
- with lock.acquire(timeout=0 if continue_if_locked else 60):
91
+ # Try to acquire the lock with a timeout. If another process is starting
92
+ # the server, this will wait until it's finished.
93
+ with lock.acquire(timeout=60):
94
+ # After acquiring the lock, we MUST re-check if the server is running.
95
+ # Another process might have started it and released the lock while we were waiting.
88
96
  if not self.is_server_running():
89
97
  ASCIIColors.yellow("Lock acquired. Starting dedicated Diffusers server...")
90
98
  self.start_server()
99
+ # The process that starts the server is responsible for waiting for it to be ready
100
+ # BEFORE releasing the lock. This is the key to preventing race conditions.
101
+ self._wait_for_server()
91
102
  else:
92
- ASCIIColors.green("Server was started by another process. Connected successfully.")
103
+ ASCIIColors.green("Server was started by another process while we waited. Connected successfully.")
93
104
  except Timeout:
94
- if continue_if_locked:
95
- ASCIIColors.yellow("Lock held by another process. Skipping server startup and continuing execution.")
96
- return
97
- else:
98
- ASCIIColors.yellow("Could not acquire lock within timeout. Waiting for server to become available...")
105
+ # This happens if the process holding the lock takes more than 60 seconds to start the server.
106
+ # We don't try to start another one. We just wait for the existing one to be ready.
107
+ ASCIIColors.yellow("Could not acquire lock, another process is taking a long time to start the server. Waiting...")
108
+ self._wait_for_server(timeout=300) # Give it a longer timeout here just in case.
99
109
 
100
- self._wait_for_server()
110
+ # A final verification to ensure we are connected.
111
+ if not self.is_server_running():
112
+ raise RuntimeError("Failed to start or connect to the Diffusers server after all attempts.")
101
113
 
102
114
  def install_server_dependencies(self):
103
115
  """
@@ -191,6 +203,10 @@ class DiffusersBinding(LollmsTTIBinding):
191
203
  "--models-path", str(self.models_path.resolve()) # Pass models_path to server
192
204
  ]
193
205
 
206
+ if self.extra_models_path:
207
+ resolved_extra_path = Path(self.extra_models_path).resolve()
208
+ command.extend(["--extra-models-path", str(resolved_extra_path)])
209
+
194
210
  # Use DETACHED_PROCESS on Windows to allow the server to run independently of the parent process.
195
211
  # On Linux/macOS, the process will be daemonized enough to not be killed with the worker.
196
212
  creationflags = subprocess.DETACHED_PROCESS if sys.platform == "win32" else 0
@@ -273,11 +289,14 @@ class DiffusersBinding(LollmsTTIBinding):
273
289
  pass
274
290
 
275
291
  def generate_image(self, prompt: str, negative_prompt: str = "", **kwargs) -> bytes:
276
- # This is a pure JSON request
292
+ params = kwargs.copy()
293
+ if "model_name" not in params and self.config.get("model_name"):
294
+ params["model_name"] = self.config["model_name"]
295
+
277
296
  response = self._post_json_request("/generate_image", data={
278
297
  "prompt": prompt,
279
298
  "negative_prompt": negative_prompt,
280
- "params": kwargs
299
+ "params": params
281
300
  })
282
301
  return response.content
283
302
 
@@ -307,15 +326,19 @@ class DiffusersBinding(LollmsTTIBinding):
307
326
  raise ValueError(f"Unsupported image type in edit_image: {type(img)}")
308
327
  if not images_b64:
309
328
  raise ValueError("No valid images were provided to the edit_image function.")
329
+
330
+ params = kwargs.copy()
331
+ if "model_name" not in params and self.config.get("model_name"):
332
+ params["model_name"] = self.config["model_name"]
310
333
 
311
334
  # Translate "mask" to "mask_image" for server compatibility
312
- if "mask" in kwargs and kwargs["mask"]:
313
- kwargs["mask_image"] = kwargs.pop("mask")
335
+ if "mask" in params and params["mask"]:
336
+ params["mask_image"] = params.pop("mask")
314
337
 
315
338
  json_payload = {
316
339
  "prompt": prompt,
317
340
  "images_b64": images_b64,
318
- "params": kwargs
341
+ "params": params
319
342
  }
320
343
  response = self._post_json_request("/edit_image", data=json_payload)
321
344
  return response.content
@@ -351,4 +374,4 @@ class DiffusersBinding(LollmsTTIBinding):
351
374
  def __del__(self):
352
375
  # The client destructor does not stop the server,
353
376
  # as it is a shared resource for all worker processes.
354
- pass
377
+ pass