lollms-client 1.6.7__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (33) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/lollms_agentic.py +4 -2
  3. lollms_client/lollms_core.py +263 -138
  4. lollms_client/lollms_discussion.py +15 -2
  5. lollms_client/lollms_stt_binding.py +59 -3
  6. lollms_client/lollms_tti_binding.py +3 -1
  7. lollms_client/lollms_ttm_binding.py +3 -1
  8. lollms_client/lollms_tts_binding.py +2 -2
  9. lollms_client/lollms_ttv_binding.py +3 -1
  10. lollms_client/stt_bindings/whisper/__init__.py +20 -12
  11. lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
  12. lollms_client/tti_bindings/diffusers/__init__.py +9 -10
  13. lollms_client/tti_bindings/diffusers/server/main.py +10 -59
  14. lollms_client/tti_bindings/gemini/__init__.py +4 -1
  15. lollms_client/tti_bindings/leonardo_ai/__init__.py +5 -2
  16. lollms_client/tti_bindings/lollms/__init__.py +4 -1
  17. lollms_client/tti_bindings/novita_ai/__init__.py +4 -1
  18. lollms_client/tti_bindings/openai/__init__.py +10 -11
  19. lollms_client/tti_bindings/stability_ai/__init__.py +4 -2
  20. lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
  21. lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
  22. lollms_client/ttm_bindings/lollms/__init__.py +4 -17
  23. lollms_client/ttm_bindings/replicate/__init__.py +7 -4
  24. lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
  25. lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
  26. lollms_client/tts_bindings/bark/__init__.py +7 -10
  27. lollms_client/tts_bindings/piper_tts/__init__.py +7 -10
  28. lollms_client/tts_bindings/xtts/__init__.py +8 -8
  29. {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/METADATA +1 -1
  30. {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/RECORD +33 -33
  31. {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/WHEEL +0 -0
  32. {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/licenses/LICENSE +0 -0
  33. {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/top_level.txt +0 -0
@@ -91,21 +91,6 @@ class LollmsClient():
91
91
  stt_binding_config (Optional[Dict]): Additional config for the STT binding.
92
92
  ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
93
93
  ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
94
- service_key (Optional[str]): Shared authentication key or client_id.
95
- verify_ssl_certificate (bool): Whether to verify SSL certificates.
96
- ctx_size (Optional[int]): Default context size for LLM.
97
- n_predict (Optional[int]): Default max tokens for LLM.
98
- stream (bool): Default streaming mode for LLM.
99
- temperature (float): Default temperature for LLM.
100
- top_k (int): Default top_k for LLM.
101
- top_p (float): Default top_p for LLM.
102
- repeat_penalty (float): Default repeat penalty for LLM.
103
- repeat_last_n (int): Default repeat last n for LLM.
104
- seed (Optional[int]): Default seed for LLM.
105
- n_threads (int): Default threads for LLM.
106
- streaming_callback (Optional[Callable]): Default streaming callback for LLM.
107
- user_name (str): Default user name for prompts.
108
- ai_name (str): Default AI name for prompts.
109
94
 
110
95
  Raises:
111
96
  ValueError: If the primary LLM binding cannot be created.
@@ -160,93 +145,119 @@ class LollmsClient():
160
145
  except Exception as e:
161
146
  trace_exception(e)
162
147
  ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
148
+ self.tts = None
163
149
 
164
150
  if tti_binding_name:
165
- if tti_binding_config:
166
- self.tti = self.tti_binding_manager.create_binding(
167
- binding_name=tti_binding_name,
168
- **{
169
- k: v
170
- for k, v in (tti_binding_config or {}).items()
171
- if k != "binding_name"
172
- }
173
- )
174
- else:
175
- self.tti = self.tti_binding_manager.create_binding(
176
- binding_name=tti_binding_name
177
- )
178
- if self.tti is None:
179
- ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
180
-
151
+ try:
152
+ if tti_binding_config:
153
+ self.tti = self.tti_binding_manager.create_binding(
154
+ binding_name=tti_binding_name,
155
+ **{
156
+ k: v
157
+ for k, v in (tti_binding_config or {}).items()
158
+ if k != "binding_name"
159
+ }
160
+ )
161
+ else:
162
+ self.tti = self.tti_binding_manager.create_binding(
163
+ binding_name=tti_binding_name
164
+ )
165
+ if self.tti is None:
166
+ ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
167
+ except Exception as e:
168
+ trace_exception(e)
169
+ ASCIIColors.warning(f"Exception occurred while creating TTI binding: {str(e)}")
170
+ self.tti = None
171
+
181
172
  if stt_binding_name:
182
- if stt_binding_config:
183
- self.stt = self.stt_binding_manager.create_binding(
184
- binding_name=stt_binding_name,
185
- **{
186
- k: v
187
- for k, v in (stt_binding_config or {}).items()
188
- if k != "binding_name"
189
- }
190
- )
173
+ try:
174
+ if stt_binding_config:
175
+ self.stt = self.stt_binding_manager.create_binding(
176
+ binding_name=stt_binding_name,
177
+ **{
178
+ k: v
179
+ for k, v in (stt_binding_config or {}).items()
180
+ if k != "binding_name"
181
+ }
182
+ )
191
183
 
192
- else:
193
- self.stt = self.stt_binding_manager.create_binding(
194
- binding_name=stt_binding_name,
195
- )
196
- if self.stt is None:
197
- ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
184
+ else:
185
+ self.stt = self.stt_binding_manager.create_binding(
186
+ binding_name=stt_binding_name,
187
+ )
188
+ if self.stt is None:
189
+ ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
190
+ except Exception as e:
191
+ trace_exception(e)
192
+ ASCIIColors.warning(f"Exception occurred while creating STT binding: {str(e)}")
193
+ self.stt = None
194
+
198
195
  if ttv_binding_name:
199
- if ttv_binding_config:
200
- self.ttv = self.ttv_binding_manager.create_binding(
201
- binding_name=ttv_binding_name,
202
- **{
203
- k: v
204
- for k, v in ttv_binding_config.items()
205
- if k != "binding_name"
206
- }
207
- )
196
+ try:
197
+ if ttv_binding_config:
198
+ self.ttv = self.ttv_binding_manager.create_binding(
199
+ binding_name=ttv_binding_name,
200
+ **{
201
+ k: v
202
+ for k, v in ttv_binding_config.items()
203
+ if k != "binding_name"
204
+ }
205
+ )
208
206
 
209
- else:
210
- self.ttv = self.ttv_binding_manager.create_binding(
211
- binding_name=ttv_binding_name
212
- )
213
- if self.ttv is None:
214
- ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
207
+ else:
208
+ self.ttv = self.ttv_binding_manager.create_binding(
209
+ binding_name=ttv_binding_name
210
+ )
211
+ if self.ttv is None:
212
+ ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
213
+ except Exception as e:
214
+ trace_exception(e)
215
+ ASCIIColors.warning(f"Exception occurred while creating TTV binding: {str(e)}")
216
+ self.ttv = None
215
217
 
216
218
  if ttm_binding_name:
217
- if ttm_binding_config:
218
- self.ttm = self.ttm_binding_manager.create_binding(
219
- binding_name=ttm_binding_name,
220
- **{
221
- k: v
222
- for k, v in (ttm_binding_config or {}).items()
223
- if k != "binding_name"
224
- }
225
- )
226
- else:
227
- self.ttm = self.ttm_binding_manager.create_binding(
228
- binding_name=ttm_binding_name
229
- )
230
- if self.ttm is None:
231
- ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
219
+ try:
220
+ if ttm_binding_config:
221
+ self.ttm = self.ttm_binding_manager.create_binding(
222
+ binding_name=ttm_binding_name,
223
+ **{
224
+ k: v
225
+ for k, v in (ttm_binding_config or {}).items()
226
+ if k != "binding_name"
227
+ }
228
+ )
229
+ else:
230
+ self.ttm = self.ttm_binding_manager.create_binding(
231
+ binding_name=ttm_binding_name
232
+ )
233
+ if self.ttm is None:
234
+ ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
235
+ except Exception as e:
236
+ trace_exception(e)
237
+ ASCIIColors.warning(f"Exception occurred while creating TTM binding: {str(e)}")
238
+ self.ttm = None
232
239
 
233
240
  if mcp_binding_name:
234
- if mcp_binding_config:
235
- self.mcp = self.mcp_binding_manager.create_binding(
236
- binding_name=mcp_binding_name,
237
- **{
238
- k: v
239
- for k, v in (mcp_binding_config or {}).items()
240
- if k != "binding_name"
241
- }
242
- )
243
- else:
244
- self.mcp = self.mcp_binding_manager.create_binding(
245
- mcp_binding_name
246
- )
247
- if self.mcp is None:
248
- ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
249
-
241
+ try:
242
+ if mcp_binding_config:
243
+ self.mcp = self.mcp_binding_manager.create_binding(
244
+ binding_name=mcp_binding_name,
245
+ **{
246
+ k: v
247
+ for k, v in (mcp_binding_config or {}).items()
248
+ if k != "binding_name"
249
+ }
250
+ )
251
+ else:
252
+ self.mcp = self.mcp_binding_manager.create_binding(
253
+ mcp_binding_name
254
+ )
255
+ if self.mcp is None:
256
+ ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
257
+ except Exception as e:
258
+ trace_exception(e)
259
+ ASCIIColors.warning(f"Exception occurred while creating MCP binding: {str(e)}")
260
+ self.mcp = None
250
261
  # --- Store Default Generation Parameters ---
251
262
 
252
263
  # --- Prompt Formatting Attributes ---
@@ -1465,7 +1476,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1465
1476
  return "🎨 Creating an image based on your request"
1466
1477
 
1467
1478
  # Handle RAG (data store) tools by their pattern
1468
- elif "research::" in tool_name:
1479
+ elif "rag::" in tool_name:
1469
1480
  # Extract the friendly name of the data source
1470
1481
  source_name = tool_name.split("::")[-1].replace("_", " ").title()
1471
1482
  return f"🔍 Searching {source_name} for relevant information"
@@ -1516,7 +1527,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1516
1527
  final_answer_temperature=0.7
1517
1528
  if rag_top_k is None:
1518
1529
  rag_top_k=5
1519
-
1530
+
1531
+ tools_infos = []
1520
1532
  def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
1521
1533
  if not streaming_callback: return None
1522
1534
  is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
@@ -1543,38 +1555,44 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1543
1555
  mcp_tools = self.mcp.discover_tools(force_refresh=True)
1544
1556
  if isinstance(use_mcps, list):
1545
1557
  filtered_tools = [t for t in mcp_tools if t["name"] in use_mcps]
1558
+ tools_infos+=[f" 🛠️{f['name']}" for f in filtered_tools]
1546
1559
  all_discovered_tools.extend(filtered_tools)
1547
1560
  log_event(f" ✅ Loaded {len(filtered_tools)} specific MCP tools: {', '.join(use_mcps)}", MSG_TYPE.MSG_TYPE_INFO)
1548
1561
  elif use_mcps is True:
1562
+ tools_infos+=[f" 🛠️{f['name']}" for f in mcp_tools]
1549
1563
  all_discovered_tools.extend(mcp_tools)
1550
1564
  log_event(f" ✅ Loaded {len(mcp_tools)} MCP tools", MSG_TYPE.MSG_TYPE_INFO)
1551
1565
 
1552
1566
  if use_data_store:
1553
1567
  log_event(f" 📚 Setting up {len(use_data_store)} knowledge bases...", MSG_TYPE.MSG_TYPE_INFO)
1554
1568
  for name, info in use_data_store.items():
1555
- tool_name, description, call_fn = f"research::{name}", f"Queries the '{name}' knowledge base.", None
1569
+ ASCIIColors.info(f"use_data_store item:\n{name}\n{info}")
1570
+ tool_name, description, call_fn = f"rag::{name}", f"Queries the '{name}' knowledge base.", None
1556
1571
  if callable(info): call_fn = info
1557
1572
  elif isinstance(info, dict):
1558
1573
  if "callable" in info and callable(info["callable"]): call_fn = info["callable"]
1559
- description = info.get("description", description)
1574
+ description = info.get("description", "This is a datastore with the following description: \n" + description)
1560
1575
  if call_fn:
1561
1576
  visible_tools.append({"name": tool_name, "description": description, "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
1562
1577
  rag_registry[tool_name] = call_fn
1563
1578
  rag_tool_specs[tool_name] = {"default_top_k": rag_top_k, "default_min_sim": rag_min_similarity_percent}
1564
- log_event(f" 📖 Ready: {name}", MSG_TYPE.MSG_TYPE_INFO)
1565
-
1579
+ tools_infos.append(f" 📖 {name}")
1566
1580
  visible_tools.extend(all_discovered_tools)
1567
1581
  built_in_tools = [
1568
1582
  {"name": "local_tools::final_answer", "description": "Provide the final answer directly to the user.", "input_schema": {}},
1569
1583
  {"name": "local_tools::request_clarification", "description": "Ask the user for more specific information when the request is ambiguous.", "input_schema": {"type": "object", "properties": {"question": {"type": "string"}}, "required": ["question"]}},
1570
1584
  {"name": "local_tools::revise_plan", "description": "Update the execution plan based on new discoveries or changing requirements.", "input_schema": {"type": "object", "properties": {"reason": {"type": "string"}, "new_plan": {"type": "array"}}, "required": ["reason", "new_plan"]}}
1571
1585
  ]
1586
+ tools_infos+=[f" 🔨 final_answer"," 🔨 request_clarification"," 🔨 revise_plan"]
1587
+
1588
+
1572
1589
  if getattr(self, "tti", None):
1573
1590
  built_in_tools.append({"name": "local_tools::generate_image", "description": "Generate an image from a text description.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"]}})
1574
1591
 
1575
1592
  all_visible_tools = visible_tools + built_in_tools
1576
1593
  tool_summary = "\n".join([f"- **{t['name']}**: {t['description']}" for t in all_visible_tools[:20]])
1577
1594
 
1595
+ log_event("\n".join(tools_infos), MSG_TYPE.MSG_TYPE_INFO)
1578
1596
  log_event(f"✅ Ready with {len(all_visible_tools)} total capabilities", MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id, meta={"tool_count": len(all_visible_tools), "mcp_tools": len(all_discovered_tools), "rag_tools": len(rag_registry)})
1579
1597
 
1580
1598
  # Enhanced triage with better prompting
@@ -1594,7 +1612,7 @@ AVAILABLE CAPABILITIES:
1594
1612
  Based on the request complexity and available tools, choose the optimal strategy:
1595
1613
 
1596
1614
  1. **DIRECT_ANSWER**: For simple greetings, basic questions, or requests that don't require any tools
1597
- - Use when: The request can be fully answered with your existing knowledge
1615
+ - Use when: The request can be fully answered with your existing knowledge with confidence, and no tool seems to add any significant value to the answer
1598
1616
  - Example: "Hello", "What is Python?", "Explain quantum physics"
1599
1617
 
1600
1618
  2. **REQUEST_CLARIFICATION**: When the request is too vague or ambiguous
@@ -1612,16 +1630,14 @@ Based on the request complexity and available tools, choose the optimal strategy
1612
1630
  Provide your analysis in JSON format:
1613
1631
  {{"thought": "Detailed reasoning about the request complexity and requirements", "strategy": "ONE_OF_THE_FOUR_OPTIONS", "confidence": 0.8, "text_output": "Direct answer or clarification question if applicable", "required_tool_name": "specific tool name if SINGLE_TOOL strategy", "estimated_steps": 3}}"""
1614
1632
 
1615
- log_prompt("Triage Prompt", triage_prompt)
1616
-
1617
1633
  triage_schema = {
1618
1634
  "thought": "string", "strategy": "string", "confidence": "number",
1619
1635
  "text_output": "string", "required_tool_name": "string", "estimated_steps": "number"
1620
1636
  }
1621
- strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, **llm_generation_kwargs)
1637
+ strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, system_prompt=system_prompt, **llm_generation_kwargs)
1622
1638
  strategy = strategy_data.get("strategy") if strategy_data else "COMPLEX_PLAN"
1623
1639
 
1624
- log_event(f"Strategy analysis complete", MSG_TYPE.MSG_TYPE_INFO, meta={
1640
+ log_event(f"Strategy analysis complete.\n**confidence**: {strategy_data.get('confidence', 0.5)}\n**reasoning**: {strategy_data.get('thought', 'None')}", MSG_TYPE.MSG_TYPE_INFO, meta={
1625
1641
  "strategy": strategy,
1626
1642
  "confidence": strategy_data.get("confidence", 0.5),
1627
1643
  "estimated_steps": strategy_data.get("estimated_steps", 1),
@@ -1760,7 +1776,7 @@ RESPONSE:"""
1760
1776
  }
1761
1777
  if tool_name in descriptions:
1762
1778
  return descriptions[tool_name]
1763
- if "research::" in tool_name:
1779
+ if "rag::" in tool_name:
1764
1780
  return f"🔍 Searching {tool_name.split('::')[-1]} knowledge base"
1765
1781
  if requires_code:
1766
1782
  return "💻 Processing code"
@@ -1829,7 +1845,7 @@ RESPONSE:"""
1829
1845
 
1830
1846
  # Enhanced planning phase
1831
1847
  planning_step_id = log_event_fn("📋 Creating adaptive execution plan...", MSG_TYPE.MSG_TYPE_STEP_START)
1832
- execution_plan = planner.decompose_task(original_user_prompt, context or "")
1848
+ execution_plan = planner.decompose_task(original_user_prompt, context or "", "\n".join([f"{tool['name']}:{tool['description']}" for tool in all_visible_tools]))
1833
1849
  current_plan_version = 1
1834
1850
 
1835
1851
  log_event_fn(f"Initial plan created with {len(execution_plan.tasks)} tasks", MSG_TYPE.MSG_TYPE_INFO, meta={
@@ -4309,27 +4325,29 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4309
4325
  context_fill_percentage: float = 0.75,
4310
4326
  overlap_tokens: int = 150,
4311
4327
  expected_generation_tokens: int = 1500,
4312
- max_scratchpad_tokens: int = 4000, # NEW: Hard limit for scratchpad
4313
- scratchpad_compression_threshold: int = 3000, # NEW: When to compress
4328
+ max_scratchpad_tokens: int = 4000,
4329
+ scratchpad_compression_threshold: int = 3000,
4314
4330
  streaming_callback: Optional[Callable] = None,
4315
4331
  return_scratchpad_only: bool = False,
4316
4332
  debug: bool = True,
4333
+ ctx_size=None,
4317
4334
  **kwargs
4318
4335
  ) -> str:
4319
4336
  """
4320
4337
  Processes long text with FIXED chunk sizing and managed scratchpad growth.
4338
+ Now uses dynamic token calculation based on actual model tokenizer.
4321
4339
  """
4322
4340
 
4323
4341
  if debug:
4324
4342
  print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
4325
4343
 
4326
4344
  # Validate context fill percentage
4327
- if not (0.1 <= context_fill_percentage <= 0.9):
4328
- raise ValueError(f"context_fill_percentage must be between 0.1 and 0.9, got {context_fill_percentage}")
4345
+ if not (0.1 <= context_fill_percentage <= 1.0):
4346
+ raise ValueError(f"context_fill_percentage must be between 0.1 and 1.0, got {context_fill_percentage}")
4329
4347
 
4330
4348
  # Get context size
4331
4349
  try:
4332
- context_size = self.llm.get_context_size() or 8192
4350
+ context_size = ctx_size or self.llm.default_ctx_size or self.llm.get_context_size() or 8192
4333
4351
  except:
4334
4352
  context_size = 8192
4335
4353
 
@@ -4346,26 +4364,97 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4346
4364
  print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
4347
4365
 
4348
4366
  # ========================================
4349
- # FIXED: Calculate chunk size ONCE upfront
4367
+ # ENHANCED: Dynamically calculate token sizes using actual tokenizer
4350
4368
  # ========================================
4351
- base_system_tokens = 150
4352
- user_template_tokens = 250
4353
-
4369
+
4370
+ # Create template system prompt to measure its token size
4371
+ template_system_prompt = (
4372
+ f"You are a component in a multi-step text processing pipeline analyzing step 1 of 100.\n\n"
4373
+ f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
4374
+ f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
4375
+ f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
4376
+ f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
4377
+ )
4378
+ base_system_tokens = len(self.tokenize(template_system_prompt))
4379
+
4380
+ # Create MINIMAL template user prompt (structure only, without content placeholders)
4381
+ summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
4382
+
4383
+ # Measure only the structural overhead (headers, formatting, instructions)
4384
+ template_structure = (
4385
+ f"--- Global Objective ---\n{summarization_objective}\n\n"
4386
+ f"--- Progress ---\nStep 100/100 | 10 sections completed, 4000 tokens\n\n" # Worst-case progress text
4387
+ f"--- Existing Scratchpad (for context) ---\n"
4388
+ f"--- New Text Chunk ---\n"
4389
+ f"--- Instructions ---\n"
4390
+ f"Extract NEW key information from this chunk that aligns with the objective. "
4391
+ f"Be concise. Avoid repeating scratchpad content."
4392
+ )
4393
+ user_template_overhead = len(self.tokenize(template_structure))
4394
+
4395
+ if debug:
4396
+ print(f"🔧 DEBUG: Computed system prompt tokens: {base_system_tokens}")
4397
+ print(f"🔧 DEBUG: Computed user template overhead: {user_template_overhead}")
4398
+ print(f"🔧 DEBUG: (Note: Scratchpad and chunk content allocated separately)")
4399
+
4354
4400
  # Reserve space for maximum expected scratchpad size
4355
4401
  reserved_scratchpad_tokens = max_scratchpad_tokens
4356
-
4402
+
4357
4403
  total_budget = int(context_size * context_fill_percentage)
4358
- used_tokens = base_system_tokens + user_template_tokens + reserved_scratchpad_tokens + expected_generation_tokens
4359
-
4404
+ # Only count overhead, not the actual chunk/scratchpad content (that's reserved separately)
4405
+ used_tokens = base_system_tokens + user_template_overhead + reserved_scratchpad_tokens + expected_generation_tokens
4406
+
4360
4407
  # FIXED chunk size - never changes during processing
4361
- FIXED_CHUNK_SIZE = max(500, int(total_budget - used_tokens))
4408
+ FIXED_CHUNK_SIZE = max(1024, int(total_budget - used_tokens))
4409
+
4362
4410
 
4363
4411
  if debug:
4364
- print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens (will not change)")
4365
- print(f"🔧 DEBUG: Reserved scratchpad space: {reserved_scratchpad_tokens} tokens")
4366
- print(f"🔧 DEBUG: Total budget: {total_budget} tokens")
4412
+ print(f"\n🔧 DEBUG: Token budget breakdown:")
4413
+ print(f" - Context size: {context_size} tokens")
4414
+ print(f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)")
4415
+ print(f" - Total budget: {total_budget} tokens")
4416
+ print(f" - System prompt: {base_system_tokens} tokens")
4417
+ print(f" - User template overhead: {user_template_overhead} tokens")
4418
+ print(f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens")
4419
+ print(f" - Expected generation: {expected_generation_tokens} tokens")
4420
+ print(f" - Total overhead: {used_tokens} tokens")
4421
+ print(f" - Remaining for chunks: {total_budget - used_tokens} tokens")
4422
+ print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens")
4423
+
4424
+ # Safety check
4425
+ if FIXED_CHUNK_SIZE == 1024:
4426
+ print(f"⚠️ WARNING: Chunk size is at minimum (1024)!")
4427
+ print(f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available")
4428
+ print(f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens")
4367
4429
 
4368
4430
  if streaming_callback:
4431
+ streaming_callback(
4432
+ "\n".join([
4433
+ f"\n🔧 DEBUG: Token budget breakdown:",
4434
+ f" - Context size: {context_size} tokens",
4435
+ f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)",
4436
+ f" - Total budget: {total_budget} tokens",
4437
+ f" - System prompt: {base_system_tokens} tokens",
4438
+ f" - User template overhead: {user_template_overhead} tokens",
4439
+ f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens",
4440
+ f" - Expected generation: {expected_generation_tokens} tokens",
4441
+ f" - Total overhead: {used_tokens} tokens",
4442
+ f" - Remaining for chunks: {total_budget - used_tokens} tokens",
4443
+ f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens"
4444
+ ]
4445
+ ),
4446
+ MSG_TYPE.MSG_TYPE_STEP
4447
+ )
4448
+ if FIXED_CHUNK_SIZE == 1024:
4449
+ streaming_callback(
4450
+ "\n".join([
4451
+ f"⚠️ WARNING: Chunk size is at minimum (1024)!",
4452
+ f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available",
4453
+ f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens"
4454
+ ]
4455
+ ),
4456
+ MSG_TYPE.MSG_TYPE_STEP
4457
+ )
4369
4458
  streaming_callback(
4370
4459
  f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
4371
4460
  MSG_TYPE.MSG_TYPE_STEP,
@@ -4416,7 +4505,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4416
4505
  print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
4417
4506
 
4418
4507
  # ========================================
4419
- # NEW: Scratchpad compression helper
4508
+ # NEW: Scratchpad compression helper with dynamic token counting
4420
4509
  # ========================================
4421
4510
  def compress_scratchpad(scratchpad_sections: list) -> list:
4422
4511
  """Compress scratchpad when it gets too large"""
@@ -4424,7 +4513,8 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4424
4513
  return scratchpad_sections
4425
4514
 
4426
4515
  combined = "\n\n---\n\n".join(scratchpad_sections)
4427
- current_size = len(combined.split())
4516
+ # ENHANCED: Use actual tokenizer to count
4517
+ current_size = len(self.tokenize(combined))
4428
4518
 
4429
4519
  if current_size <= scratchpad_compression_threshold:
4430
4520
  return scratchpad_sections
@@ -4448,7 +4538,8 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4448
4538
  )
4449
4539
 
4450
4540
  if debug:
4451
- compressed_size = len(compressed.split())
4541
+ # ENHANCED: Use actual tokenizer
4542
+ compressed_size = len(self.tokenize(compressed))
4452
4543
  print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
4453
4544
 
4454
4545
  return [compressed]
@@ -4479,16 +4570,16 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4479
4570
  {"step": step_number, "total_steps": total_steps, "progress": progress}
4480
4571
  )
4481
4572
 
4482
- # Check and compress scratchpad if needed
4573
+ # ENHANCED: Check and compress scratchpad with actual token counting
4483
4574
  current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4484
- scratchpad_size = len(current_scratchpad.split())
4575
+ scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
4485
4576
 
4486
4577
  if scratchpad_size > scratchpad_compression_threshold:
4487
4578
  if debug:
4488
4579
  print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
4489
4580
  chunk_summaries = compress_scratchpad(chunk_summaries)
4490
4581
  current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4491
- scratchpad_size = len(current_scratchpad.split())
4582
+ scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
4492
4583
 
4493
4584
  try:
4494
4585
  system_prompt = (
@@ -4512,8 +4603,15 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4512
4603
  f"Be concise. Avoid repeating scratchpad content."
4513
4604
  )
4514
4605
 
4606
+ # ENHANCED: Compute actual prompt size
4607
+ actual_prompt_tokens = len(self.tokenize(user_prompt))
4608
+ actual_system_tokens = len(self.tokenize(system_prompt))
4609
+
4515
4610
  if debug:
4516
- print(f"🔧 DEBUG: Prompt size: {len(user_prompt)} chars, Scratchpad: {scratchpad_size} tokens")
4611
+ print(f"🔧 DEBUG: Actual prompt tokens: {actual_prompt_tokens}")
4612
+ print(f"🔧 DEBUG: Actual system tokens: {actual_system_tokens}")
4613
+ print(f"🔧 DEBUG: Total input tokens: {actual_prompt_tokens + actual_system_tokens}")
4614
+ print(f"🔧 DEBUG: Scratchpad: {scratchpad_size} tokens")
4517
4615
 
4518
4616
  chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
4519
4617
 
@@ -4589,7 +4687,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4589
4687
  streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
4590
4688
  return final_scratchpad.strip()
4591
4689
 
4592
- # Final synthesis
4690
+ # Final synthesis with STRONG objective reinforcement
4593
4691
  if streaming_callback:
4594
4692
  streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
4595
4693
 
@@ -4603,20 +4701,47 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4603
4701
  synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
4604
4702
 
4605
4703
  if debug:
4606
- print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {len(chunk_summaries)} sections")
4704
+ final_scratchpad_tokens = len(self.tokenize(combined_scratchpad))
4705
+ print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {final_scratchpad_tokens} tokens, {len(chunk_summaries)} sections")
4607
4706
 
4707
+ # ENHANCED: Strong objective-focused synthesis
4608
4708
  synthesis_system_prompt = (
4609
- "You are an expert at synthesizing information. "
4610
- "Consolidate the analysis sections into a coherent final response. "
4611
- "Eliminate redundancy, organize logically, and use markdown formatting."
4709
+ f"You are completing a multi-step text processing task. "
4710
+ f"Your role is to take analysis sections and produce the FINAL OUTPUT that directly fulfills the user's original objective.\n\n"
4711
+ f"**CRITICAL:** Your output must DIRECTLY ADDRESS the user's objective, NOT just summarize the sections. "
4712
+ f"The sections are intermediate work - transform them into the final deliverable the user requested."
4612
4713
  )
4613
4714
 
4715
+ # ENHANCED: Explicit task reinforcement with examples of what NOT to do
4716
+ task_type_hint = ""
4717
+ if contextual_prompt:
4718
+ lower_prompt = contextual_prompt.lower()
4719
+ if any(word in lower_prompt for word in ['extract', 'list', 'identify', 'find']):
4720
+ task_type_hint = "\n**Task Type:** This is an EXTRACTION/IDENTIFICATION task. Provide a structured list or catalog of items found, NOT a narrative summary."
4721
+ elif any(word in lower_prompt for word in ['analyze', 'evaluate', 'assess', 'examine']):
4722
+ task_type_hint = "\n**Task Type:** This is an ANALYSIS task. Provide insights, patterns, and evaluations, NOT just a description of content."
4723
+ elif any(word in lower_prompt for word in ['compare', 'contrast', 'difference']):
4724
+ task_type_hint = "\n**Task Type:** This is a COMPARISON task. Highlight similarities and differences, NOT separate summaries."
4725
+ elif any(word in lower_prompt for word in ['answer', 'question', 'explain why', 'how does']):
4726
+ task_type_hint = "\n**Task Type:** This is a QUESTION-ANSWERING task. Provide a direct answer, NOT a general overview."
4727
+
4614
4728
  synthesis_user_prompt = (
4615
- f"--- Final Objective ---\n{synthesis_objective}\n\n"
4616
- f"--- Collected Analysis Sections ---\n{combined_scratchpad}\n\n"
4617
- f"--- Instructions ---\n"
4618
- f"Synthesize all information into a comprehensive response addressing the objective. "
4619
- f"Organize with markdown headers, remove repetition, create a polished final document."
4729
+ f"=== ORIGINAL USER OBJECTIVE (MOST IMPORTANT) ===\n{synthesis_objective}\n"
4730
+ f"{task_type_hint}\n\n"
4731
+ f"=== ANALYSIS SECTIONS (Raw Working Material) ===\n{combined_scratchpad}\n\n"
4732
+ f"=== YOUR TASK ===\n"
4733
+ f"Transform the analysis sections above into a final output that DIRECTLY FULFILLS the original objective.\n\n"
4734
+ f"**DO:**\n"
4735
+ f"- Focus exclusively on satisfying the user's original objective stated above\n"
4736
+ f"- Organize information in whatever format best serves that objective\n"
4737
+ f"- Remove redundancy and consolidate related points\n"
4738
+ f"- Use markdown formatting for clarity\n\n"
4739
+ f"**DO NOT:**\n"
4740
+ f"- Provide a generic summary of the sections\n"
4741
+ f"- Describe what the sections contain\n"
4742
+ f"- Create an overview of the analysis process\n"
4743
+ f"- Change the task into something different\n\n"
4744
+ f"Remember: The user asked for '{synthesis_objective}' - deliver exactly that."
4620
4745
  )
4621
4746
 
4622
4747
  try:
@@ -1208,14 +1208,27 @@ class LollmsDiscussion:
1208
1208
  prompt_for_agent = self.export("markdown", branch_tip_id if branch_tip_id else self.active_branch_id)
1209
1209
  if debug:
1210
1210
  ASCIIColors.cyan("\n" + "="*50 + "\n--- DEBUG: AGENTIC TURN TRIGGERED ---\n" + f"--- PROMPT FOR AGENT (from discussion history) ---\n{prompt_for_agent}\n" + "="*50 + "\n")
1211
-
1211
+
1212
+
1213
+ # Combine system prompt and data zones
1214
+ system_prompt_part = (self._system_prompt or "").strip()
1215
+ data_zone_part = self.get_full_data_zone() # This now returns a clean, multi-part block or an empty string
1216
+ full_system_prompt = ""
1217
+
1218
+ # Combine them intelligently
1219
+ if system_prompt_part and data_zone_part:
1220
+ full_system_prompt = f"{system_prompt_part}\n\n{data_zone_part}"
1221
+ elif system_prompt_part:
1222
+ full_system_prompt = system_prompt_part
1223
+ else:
1224
+ full_system_prompt = data_zone_part
1212
1225
  agent_result = self.lollmsClient.generate_with_mcp_rag(
1213
1226
  prompt=prompt_for_agent,
1214
1227
  use_mcps=effective_use_mcps,
1215
1228
  use_data_store=use_data_store,
1216
1229
  max_reasoning_steps=max_reasoning_steps,
1217
1230
  images=images,
1218
- system_prompt = self._system_prompt,
1231
+ system_prompt = full_system_prompt,
1219
1232
  debug=debug,
1220
1233
  **kwargs
1221
1234
  )