lollms-client 1.6.6__py3-none-any.whl → 1.6.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -91,21 +91,6 @@ class LollmsClient():
91
91
  stt_binding_config (Optional[Dict]): Additional config for the STT binding.
92
92
  ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
93
93
  ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
94
- service_key (Optional[str]): Shared authentication key or client_id.
95
- verify_ssl_certificate (bool): Whether to verify SSL certificates.
96
- ctx_size (Optional[int]): Default context size for LLM.
97
- n_predict (Optional[int]): Default max tokens for LLM.
98
- stream (bool): Default streaming mode for LLM.
99
- temperature (float): Default temperature for LLM.
100
- top_k (int): Default top_k for LLM.
101
- top_p (float): Default top_p for LLM.
102
- repeat_penalty (float): Default repeat penalty for LLM.
103
- repeat_last_n (int): Default repeat last n for LLM.
104
- seed (Optional[int]): Default seed for LLM.
105
- n_threads (int): Default threads for LLM.
106
- streaming_callback (Optional[Callable]): Default streaming callback for LLM.
107
- user_name (str): Default user name for prompts.
108
- ai_name (str): Default AI name for prompts.
109
94
 
110
95
  Raises:
111
96
  ValueError: If the primary LLM binding cannot be created.
@@ -160,93 +145,119 @@ class LollmsClient():
160
145
  except Exception as e:
161
146
  trace_exception(e)
162
147
  ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
148
+ self.tts = None
163
149
 
164
150
  if tti_binding_name:
165
- if tti_binding_config:
166
- self.tti = self.tti_binding_manager.create_binding(
167
- binding_name=tti_binding_name,
168
- **{
169
- k: v
170
- for k, v in (tti_binding_config or {}).items()
171
- if k != "binding_name"
172
- }
173
- )
174
- else:
175
- self.tti = self.tti_binding_manager.create_binding(
176
- binding_name=tti_binding_name
177
- )
178
- if self.tti is None:
179
- ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
180
-
151
+ try:
152
+ if tti_binding_config:
153
+ self.tti = self.tti_binding_manager.create_binding(
154
+ binding_name=tti_binding_name,
155
+ **{
156
+ k: v
157
+ for k, v in (tti_binding_config or {}).items()
158
+ if k != "binding_name"
159
+ }
160
+ )
161
+ else:
162
+ self.tti = self.tti_binding_manager.create_binding(
163
+ binding_name=tti_binding_name
164
+ )
165
+ if self.tti is None:
166
+ ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
167
+ except Exception as e:
168
+ trace_exception(e)
169
+ ASCIIColors.warning(f"Exception occurred while creating TTI binding: {str(e)}")
170
+ self.tti = None
171
+
181
172
  if stt_binding_name:
182
- if stt_binding_config:
183
- self.stt = self.stt_binding_manager.create_binding(
184
- binding_name=stt_binding_name,
185
- **{
186
- k: v
187
- for k, v in (stt_binding_config or {}).items()
188
- if k != "binding_name"
189
- }
190
- )
173
+ try:
174
+ if stt_binding_config:
175
+ self.stt = self.stt_binding_manager.create_binding(
176
+ binding_name=stt_binding_name,
177
+ **{
178
+ k: v
179
+ for k, v in (stt_binding_config or {}).items()
180
+ if k != "binding_name"
181
+ }
182
+ )
191
183
 
192
- else:
193
- self.stt = self.stt_binding_manager.create_binding(
194
- binding_name=stt_binding_name,
195
- )
196
- if self.stt is None:
197
- ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
184
+ else:
185
+ self.stt = self.stt_binding_manager.create_binding(
186
+ binding_name=stt_binding_name,
187
+ )
188
+ if self.stt is None:
189
+ ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
190
+ except Exception as e:
191
+ trace_exception(e)
192
+ ASCIIColors.warning(f"Exception occurred while creating STT binding: {str(e)}")
193
+ self.stt = None
194
+
198
195
  if ttv_binding_name:
199
- if ttv_binding_config:
200
- self.ttv = self.ttv_binding_manager.create_binding(
201
- binding_name=ttv_binding_name,
202
- **{
203
- k: v
204
- for k, v in ttv_binding_config.items()
205
- if k != "binding_name"
206
- }
207
- )
196
+ try:
197
+ if ttv_binding_config:
198
+ self.ttv = self.ttv_binding_manager.create_binding(
199
+ binding_name=ttv_binding_name,
200
+ **{
201
+ k: v
202
+ for k, v in ttv_binding_config.items()
203
+ if k != "binding_name"
204
+ }
205
+ )
208
206
 
209
- else:
210
- self.ttv = self.ttv_binding_manager.create_binding(
211
- binding_name=ttv_binding_name
212
- )
213
- if self.ttv is None:
214
- ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
207
+ else:
208
+ self.ttv = self.ttv_binding_manager.create_binding(
209
+ binding_name=ttv_binding_name
210
+ )
211
+ if self.ttv is None:
212
+ ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
213
+ except Exception as e:
214
+ trace_exception(e)
215
+ ASCIIColors.warning(f"Exception occurred while creating TTV binding: {str(e)}")
216
+ self.ttv = None
215
217
 
216
218
  if ttm_binding_name:
217
- if ttm_binding_config:
218
- self.ttm = self.ttm_binding_manager.create_binding(
219
- binding_name=ttm_binding_name,
220
- **{
221
- k: v
222
- for k, v in (ttm_binding_config or {}).items()
223
- if k != "binding_name"
224
- }
225
- )
226
- else:
227
- self.ttm = self.ttm_binding_manager.create_binding(
228
- binding_name=ttm_binding_name
229
- )
230
- if self.ttm is None:
231
- ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
219
+ try:
220
+ if ttm_binding_config:
221
+ self.ttm = self.ttm_binding_manager.create_binding(
222
+ binding_name=ttm_binding_name,
223
+ **{
224
+ k: v
225
+ for k, v in (ttm_binding_config or {}).items()
226
+ if k != "binding_name"
227
+ }
228
+ )
229
+ else:
230
+ self.ttm = self.ttm_binding_manager.create_binding(
231
+ binding_name=ttm_binding_name
232
+ )
233
+ if self.ttm is None:
234
+ ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
235
+ except Exception as e:
236
+ trace_exception(e)
237
+ ASCIIColors.warning(f"Exception occurred while creating TTM binding: {str(e)}")
238
+ self.ttm = None
232
239
 
233
240
  if mcp_binding_name:
234
- if mcp_binding_config:
235
- self.mcp = self.mcp_binding_manager.create_binding(
236
- binding_name=mcp_binding_name,
237
- **{
238
- k: v
239
- for k, v in (mcp_binding_config or {}).items()
240
- if k != "binding_name"
241
- }
242
- )
243
- else:
244
- self.mcp = self.mcp_binding_manager.create_binding(
245
- mcp_binding_name
246
- )
247
- if self.mcp is None:
248
- ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
249
-
241
+ try:
242
+ if mcp_binding_config:
243
+ self.mcp = self.mcp_binding_manager.create_binding(
244
+ binding_name=mcp_binding_name,
245
+ **{
246
+ k: v
247
+ for k, v in (mcp_binding_config or {}).items()
248
+ if k != "binding_name"
249
+ }
250
+ )
251
+ else:
252
+ self.mcp = self.mcp_binding_manager.create_binding(
253
+ mcp_binding_name
254
+ )
255
+ if self.mcp is None:
256
+ ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
257
+ except Exception as e:
258
+ trace_exception(e)
259
+ ASCIIColors.warning(f"Exception occurred while creating MCP binding: {str(e)}")
260
+ self.mcp = None
250
261
  # --- Store Default Generation Parameters ---
251
262
 
252
263
  # --- Prompt Formatting Attributes ---
@@ -1465,7 +1476,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1465
1476
  return "🎨 Creating an image based on your request"
1466
1477
 
1467
1478
  # Handle RAG (data store) tools by their pattern
1468
- elif "research::" in tool_name:
1479
+ elif "rag::" in tool_name:
1469
1480
  # Extract the friendly name of the data source
1470
1481
  source_name = tool_name.split("::")[-1].replace("_", " ").title()
1471
1482
  return f"🔍 Searching {source_name} for relevant information"
@@ -1516,7 +1527,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1516
1527
  final_answer_temperature=0.7
1517
1528
  if rag_top_k is None:
1518
1529
  rag_top_k=5
1519
-
1530
+
1531
+ tools_infos = []
1520
1532
  def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
1521
1533
  if not streaming_callback: return None
1522
1534
  is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
@@ -1543,38 +1555,44 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1543
1555
  mcp_tools = self.mcp.discover_tools(force_refresh=True)
1544
1556
  if isinstance(use_mcps, list):
1545
1557
  filtered_tools = [t for t in mcp_tools if t["name"] in use_mcps]
1558
+ tools_infos+=[f" 🛠️{f['name']}" for f in filtered_tools]
1546
1559
  all_discovered_tools.extend(filtered_tools)
1547
1560
  log_event(f" ✅ Loaded {len(filtered_tools)} specific MCP tools: {', '.join(use_mcps)}", MSG_TYPE.MSG_TYPE_INFO)
1548
1561
  elif use_mcps is True:
1562
+ tools_infos+=[f" 🛠️{f['name']}" for f in mcp_tools]
1549
1563
  all_discovered_tools.extend(mcp_tools)
1550
1564
  log_event(f" ✅ Loaded {len(mcp_tools)} MCP tools", MSG_TYPE.MSG_TYPE_INFO)
1551
1565
 
1552
1566
  if use_data_store:
1553
1567
  log_event(f" 📚 Setting up {len(use_data_store)} knowledge bases...", MSG_TYPE.MSG_TYPE_INFO)
1554
1568
  for name, info in use_data_store.items():
1555
- tool_name, description, call_fn = f"research::{name}", f"Queries the '{name}' knowledge base.", None
1569
+ ASCIIColors.info(f"use_data_store item:\n{name}\n{info}")
1570
+ tool_name, description, call_fn = f"rag::{name}", f"Queries the '{name}' knowledge base.", None
1556
1571
  if callable(info): call_fn = info
1557
1572
  elif isinstance(info, dict):
1558
1573
  if "callable" in info and callable(info["callable"]): call_fn = info["callable"]
1559
- description = info.get("description", description)
1574
+ description = info.get("description", "This is a datastore with the following description: \n" + description)
1560
1575
  if call_fn:
1561
1576
  visible_tools.append({"name": tool_name, "description": description, "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
1562
1577
  rag_registry[tool_name] = call_fn
1563
1578
  rag_tool_specs[tool_name] = {"default_top_k": rag_top_k, "default_min_sim": rag_min_similarity_percent}
1564
- log_event(f" 📖 Ready: {name}", MSG_TYPE.MSG_TYPE_INFO)
1565
-
1579
+ tools_infos.append(f" 📖 {name}")
1566
1580
  visible_tools.extend(all_discovered_tools)
1567
1581
  built_in_tools = [
1568
1582
  {"name": "local_tools::final_answer", "description": "Provide the final answer directly to the user.", "input_schema": {}},
1569
1583
  {"name": "local_tools::request_clarification", "description": "Ask the user for more specific information when the request is ambiguous.", "input_schema": {"type": "object", "properties": {"question": {"type": "string"}}, "required": ["question"]}},
1570
1584
  {"name": "local_tools::revise_plan", "description": "Update the execution plan based on new discoveries or changing requirements.", "input_schema": {"type": "object", "properties": {"reason": {"type": "string"}, "new_plan": {"type": "array"}}, "required": ["reason", "new_plan"]}}
1571
1585
  ]
1586
+ tools_infos+=[f" 🔨 final_answer"," 🔨 request_clarification"," 🔨 revise_plan"]
1587
+
1588
+
1572
1589
  if getattr(self, "tti", None):
1573
1590
  built_in_tools.append({"name": "local_tools::generate_image", "description": "Generate an image from a text description.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"]}})
1574
1591
 
1575
1592
  all_visible_tools = visible_tools + built_in_tools
1576
1593
  tool_summary = "\n".join([f"- **{t['name']}**: {t['description']}" for t in all_visible_tools[:20]])
1577
1594
 
1595
+ log_event("\n".join(tools_infos), MSG_TYPE.MSG_TYPE_INFO)
1578
1596
  log_event(f"✅ Ready with {len(all_visible_tools)} total capabilities", MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id, meta={"tool_count": len(all_visible_tools), "mcp_tools": len(all_discovered_tools), "rag_tools": len(rag_registry)})
1579
1597
 
1580
1598
  # Enhanced triage with better prompting
@@ -1594,7 +1612,7 @@ AVAILABLE CAPABILITIES:
1594
1612
  Based on the request complexity and available tools, choose the optimal strategy:
1595
1613
 
1596
1614
  1. **DIRECT_ANSWER**: For simple greetings, basic questions, or requests that don't require any tools
1597
- - Use when: The request can be fully answered with your existing knowledge
1615
+ - Use when: The request can be fully answered with your existing knowledge with confidence, and no tool seems to add any significant value to the answer
1598
1616
  - Example: "Hello", "What is Python?", "Explain quantum physics"
1599
1617
 
1600
1618
  2. **REQUEST_CLARIFICATION**: When the request is too vague or ambiguous
@@ -1612,16 +1630,14 @@ Based on the request complexity and available tools, choose the optimal strategy
1612
1630
  Provide your analysis in JSON format:
1613
1631
  {{"thought": "Detailed reasoning about the request complexity and requirements", "strategy": "ONE_OF_THE_FOUR_OPTIONS", "confidence": 0.8, "text_output": "Direct answer or clarification question if applicable", "required_tool_name": "specific tool name if SINGLE_TOOL strategy", "estimated_steps": 3}}"""
1614
1632
 
1615
- log_prompt("Triage Prompt", triage_prompt)
1616
-
1617
1633
  triage_schema = {
1618
1634
  "thought": "string", "strategy": "string", "confidence": "number",
1619
1635
  "text_output": "string", "required_tool_name": "string", "estimated_steps": "number"
1620
1636
  }
1621
- strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, **llm_generation_kwargs)
1637
+ strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, system_prompt=system_prompt, **llm_generation_kwargs)
1622
1638
  strategy = strategy_data.get("strategy") if strategy_data else "COMPLEX_PLAN"
1623
1639
 
1624
- log_event(f"Strategy analysis complete", MSG_TYPE.MSG_TYPE_INFO, meta={
1640
+ log_event(f"Strategy analysis complete.\n**confidence**: {strategy_data.get('confidence', 0.5)}\n**reasoning**: {strategy_data.get('thought', 'None')}", MSG_TYPE.MSG_TYPE_INFO, meta={
1625
1641
  "strategy": strategy,
1626
1642
  "confidence": strategy_data.get("confidence", 0.5),
1627
1643
  "estimated_steps": strategy_data.get("estimated_steps", 1),
@@ -1760,7 +1776,7 @@ RESPONSE:"""
1760
1776
  }
1761
1777
  if tool_name in descriptions:
1762
1778
  return descriptions[tool_name]
1763
- if "research::" in tool_name:
1779
+ if "rag::" in tool_name:
1764
1780
  return f"🔍 Searching {tool_name.split('::')[-1]} knowledge base"
1765
1781
  if requires_code:
1766
1782
  return "💻 Processing code"
@@ -1829,7 +1845,7 @@ RESPONSE:"""
1829
1845
 
1830
1846
  # Enhanced planning phase
1831
1847
  planning_step_id = log_event_fn("📋 Creating adaptive execution plan...", MSG_TYPE.MSG_TYPE_STEP_START)
1832
- execution_plan = planner.decompose_task(original_user_prompt, context or "")
1848
+ execution_plan = planner.decompose_task(original_user_prompt, context or "", "\n".join([f"{tool['name']}:{tool['description']}" for tool in all_visible_tools]))
1833
1849
  current_plan_version = 1
1834
1850
 
1835
1851
  log_event_fn(f"Initial plan created with {len(execution_plan.tasks)} tasks", MSG_TYPE.MSG_TYPE_INFO, meta={
@@ -4307,28 +4323,31 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4307
4323
  contextual_prompt: Optional[str] = None,
4308
4324
  system_prompt: str | None = None,
4309
4325
  context_fill_percentage: float = 0.75,
4310
- overlap_tokens: int = 150, # Added a default for better context continuity
4326
+ overlap_tokens: int = 150,
4311
4327
  expected_generation_tokens: int = 1500,
4328
+ max_scratchpad_tokens: int = 4000,
4329
+ scratchpad_compression_threshold: int = 3000,
4312
4330
  streaming_callback: Optional[Callable] = None,
4313
4331
  return_scratchpad_only: bool = False,
4314
4332
  debug: bool = True,
4333
+ ctx_size=None,
4315
4334
  **kwargs
4316
4335
  ) -> str:
4317
4336
  """
4318
- Processes long text by breaking it down into chunks, analyzing each one incrementally,
4319
- and synthesizing the results into a comprehensive final response based on a user-defined objective.
4337
+ Processes long text with FIXED chunk sizing and managed scratchpad growth.
4338
+ Now uses dynamic token calculation based on actual model tokenizer.
4320
4339
  """
4321
4340
 
4322
4341
  if debug:
4323
4342
  print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
4324
4343
 
4325
4344
  # Validate context fill percentage
4326
- if not (0.1 <= context_fill_percentage <= 0.9):
4327
- raise ValueError(f"context_fill_percentage must be between 0.1 and 0.9, got {context_fill_percentage}")
4345
+ if not (0.1 <= context_fill_percentage <= 1.0):
4346
+ raise ValueError(f"context_fill_percentage must be between 0.1 and 1.0, got {context_fill_percentage}")
4328
4347
 
4329
4348
  # Get context size
4330
4349
  try:
4331
- context_size = self.llm.get_context_size() or 8192 # Using a more modern default
4350
+ context_size = ctx_size or self.llm.default_ctx_size or self.llm.get_context_size() or 8192
4332
4351
  except:
4333
4352
  context_size = 8192
4334
4353
 
@@ -4339,65 +4358,121 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4339
4358
  if not text_to_process:
4340
4359
  return ""
4341
4360
 
4342
- # Use a simple word-based split for token estimation
4361
+ # Use word-based split for token estimation
4343
4362
  tokens = text_to_process.split()
4344
4363
  if debug:
4345
4364
  print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
4346
4365
 
4347
- # Dynamic token budget calculation
4348
- def calculate_token_budgets(scratchpad_content: str = "", step_num: int = 0) -> dict:
4349
- # Generic prompt templates are more concise
4350
- base_system_tokens = 150
4351
- user_template_tokens = 250
4352
- scratchpad_tokens = len(scratchpad_content.split()) * 1.3 if scratchpad_content else 0
4353
-
4354
- used_tokens = base_system_tokens + user_template_tokens + scratchpad_tokens + expected_generation_tokens
4355
- total_budget = int(context_size * context_fill_percentage)
4356
- available_for_chunk = max(500, int(total_budget - used_tokens)) # Ensure a reasonable minimum chunk size
4357
-
4358
- budget_info = {
4359
- "total_budget": total_budget,
4360
- "chunk_budget": available_for_chunk,
4361
- "efficiency_ratio": available_for_chunk / total_budget if total_budget > 0 else 0,
4362
- "scratchpad_tokens": int(scratchpad_tokens),
4363
- "used_tokens": int(used_tokens)
4364
- }
4366
+ # ========================================
4367
+ # ENHANCED: Dynamically calculate token sizes using actual tokenizer
4368
+ # ========================================
4369
+
4370
+ # Create template system prompt to measure its token size
4371
+ template_system_prompt = (
4372
+ f"You are a component in a multi-step text processing pipeline analyzing step 1 of 100.\n\n"
4373
+ f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
4374
+ f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
4375
+ f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
4376
+ f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
4377
+ )
4378
+ base_system_tokens = len(self.tokenize(template_system_prompt))
4379
+
4380
+ # Create MINIMAL template user prompt (structure only, without content placeholders)
4381
+ summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
4382
+
4383
+ # Measure only the structural overhead (headers, formatting, instructions)
4384
+ template_structure = (
4385
+ f"--- Global Objective ---\n{summarization_objective}\n\n"
4386
+ f"--- Progress ---\nStep 100/100 | 10 sections completed, 4000 tokens\n\n" # Worst-case progress text
4387
+ f"--- Existing Scratchpad (for context) ---\n"
4388
+ f"--- New Text Chunk ---\n"
4389
+ f"--- Instructions ---\n"
4390
+ f"Extract NEW key information from this chunk that aligns with the objective. "
4391
+ f"Be concise. Avoid repeating scratchpad content."
4392
+ )
4393
+ user_template_overhead = len(self.tokenize(template_structure))
4365
4394
 
4366
- if debug:
4367
- print(f"🔧 DEBUG Step {step_num}: Budget = {available_for_chunk}/{total_budget} tokens, "
4368
- f"Scratchpad = {int(scratchpad_tokens)} tokens")
4395
+ if debug:
4396
+ print(f"🔧 DEBUG: Computed system prompt tokens: {base_system_tokens}")
4397
+ print(f"🔧 DEBUG: Computed user template overhead: {user_template_overhead}")
4398
+ print(f"🔧 DEBUG: (Note: Scratchpad and chunk content allocated separately)")
4369
4399
 
4370
- return budget_info
4400
+ # Reserve space for maximum expected scratchpad size
4401
+ reserved_scratchpad_tokens = max_scratchpad_tokens
4371
4402
 
4372
- # Initial budget calculation
4373
- initial_budget = calculate_token_budgets()
4374
- chunk_size_tokens = initial_budget["chunk_budget"]
4403
+ total_budget = int(context_size * context_fill_percentage)
4404
+ # Only count overhead, not the actual chunk/scratchpad content (that's reserved separately)
4405
+ used_tokens = base_system_tokens + user_template_overhead + reserved_scratchpad_tokens + expected_generation_tokens
4375
4406
 
4407
+ # FIXED chunk size - never changes during processing
4408
+ FIXED_CHUNK_SIZE = max(1024, int(total_budget - used_tokens))
4409
+
4410
+
4376
4411
  if debug:
4377
- print(f"🔧 DEBUG: Initial chunk size: {chunk_size_tokens} word tokens")
4412
+ print(f"\n🔧 DEBUG: Token budget breakdown:")
4413
+ print(f" - Context size: {context_size} tokens")
4414
+ print(f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)")
4415
+ print(f" - Total budget: {total_budget} tokens")
4416
+ print(f" - System prompt: {base_system_tokens} tokens")
4417
+ print(f" - User template overhead: {user_template_overhead} tokens")
4418
+ print(f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens")
4419
+ print(f" - Expected generation: {expected_generation_tokens} tokens")
4420
+ print(f" - Total overhead: {used_tokens} tokens")
4421
+ print(f" - Remaining for chunks: {total_budget - used_tokens} tokens")
4422
+ print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens")
4423
+
4424
+ # Safety check
4425
+ if FIXED_CHUNK_SIZE == 1024:
4426
+ print(f"⚠️ WARNING: Chunk size is at minimum (1024)!")
4427
+ print(f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available")
4428
+ print(f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens")
4378
4429
 
4379
4430
  if streaming_callback:
4380
4431
  streaming_callback(
4381
- f"Context Budget: {initial_budget['chunk_budget']:,}/{initial_budget['total_budget']:,} tokens "
4382
- f"({initial_budget['efficiency_ratio']:.1%} efficiency)",
4432
+ "\n".join([
4433
+ f"\n🔧 DEBUG: Token budget breakdown:",
4434
+ f" - Context size: {context_size} tokens",
4435
+ f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)",
4436
+ f" - Total budget: {total_budget} tokens",
4437
+ f" - System prompt: {base_system_tokens} tokens",
4438
+ f" - User template overhead: {user_template_overhead} tokens",
4439
+ f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens",
4440
+ f" - Expected generation: {expected_generation_tokens} tokens",
4441
+ f" - Total overhead: {used_tokens} tokens",
4442
+ f" - Remaining for chunks: {total_budget - used_tokens} tokens",
4443
+ f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens"
4444
+ ]
4445
+ ),
4446
+ MSG_TYPE.MSG_TYPE_STEP
4447
+ )
4448
+ if FIXED_CHUNK_SIZE == 1024:
4449
+ streaming_callback(
4450
+ "\n".join([
4451
+ f"⚠️ WARNING: Chunk size is at minimum (1024)!",
4452
+ f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available",
4453
+ f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens"
4454
+ ]
4455
+ ),
4456
+ MSG_TYPE.MSG_TYPE_STEP
4457
+ )
4458
+ streaming_callback(
4459
+ f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
4383
4460
  MSG_TYPE.MSG_TYPE_STEP,
4384
- {"budget_info": initial_budget}
4461
+ {"fixed_chunk_size": FIXED_CHUNK_SIZE, "total_budget": total_budget}
4385
4462
  )
4386
4463
 
4387
4464
  # Single pass for short content
4388
- if len(tokens) <= chunk_size_tokens:
4465
+ if len(tokens) <= FIXED_CHUNK_SIZE:
4389
4466
  if debug:
4390
- print("🔧 DEBUG: Content is short enough for single-pass processing")
4467
+ print("🔧 DEBUG: Content fits in single pass")
4391
4468
 
4392
4469
  if streaming_callback:
4393
4470
  streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
4394
4471
 
4395
- # Generic single-pass system prompt
4396
4472
  system_prompt = (
4397
4473
  "You are an expert AI assistant for text analysis and summarization. "
4398
4474
  "Your task is to carefully analyze the provided text and generate a comprehensive, "
4399
- "accurate, and well-structured response that directly addresses the user's objective. "
4400
- "Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
4475
+ "accurate, and well-structured response that directly addresses the user's objective."
4401
4476
  )
4402
4477
 
4403
4478
  prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
@@ -4413,120 +4488,173 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4413
4488
  print(f"🔧 DEBUG: Single-pass processing failed: {e}")
4414
4489
  return f"Error in single-pass processing: {e}"
4415
4490
 
4416
- # Multi-chunk processing for long content
4491
+ # ========================================
4492
+ # FIXED: Multi-chunk processing with static sizing
4493
+ # ========================================
4417
4494
  if debug:
4418
- print("🔧 DEBUG: Using multi-chunk processing for long content")
4495
+ print("🔧 DEBUG: Using multi-chunk processing with FIXED chunk size")
4419
4496
 
4420
4497
  chunk_summaries = []
4421
4498
  current_position = 0
4422
4499
  step_number = 1
4500
+
4501
+ # Pre-calculate total steps (won't change since chunk size is fixed)
4502
+ total_steps = -(-len(tokens) // (FIXED_CHUNK_SIZE - overlap_tokens)) # Ceiling division
4503
+
4504
+ if debug:
4505
+ print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
4506
+
4507
+ # ========================================
4508
+ # NEW: Scratchpad compression helper with dynamic token counting
4509
+ # ========================================
4510
+ def compress_scratchpad(scratchpad_sections: list) -> list:
4511
+ """Compress scratchpad when it gets too large"""
4512
+ if len(scratchpad_sections) <= 2:
4513
+ return scratchpad_sections
4514
+
4515
+ combined = "\n\n---\n\n".join(scratchpad_sections)
4516
+ # ENHANCED: Use actual tokenizer to count
4517
+ current_size = len(self.tokenize(combined))
4518
+
4519
+ if current_size <= scratchpad_compression_threshold:
4520
+ return scratchpad_sections
4521
+
4522
+ if debug:
4523
+ print(f"🔧 DEBUG: Compressing scratchpad from {current_size} tokens")
4524
+
4525
+ compression_prompt = (
4526
+ f"Consolidate the following analysis sections into a more concise summary. "
4527
+ f"Retain all key facts, data points, and conclusions, but eliminate redundancy:\n\n"
4528
+ f"{combined}"
4529
+ )
4530
+
4531
+ try:
4532
+ compressed = self.remove_thinking_blocks(
4533
+ self.llm.generate_text(
4534
+ compression_prompt,
4535
+ system_prompt="You are a text consolidation expert. Create concise summaries that preserve all important information.",
4536
+ **kwargs
4537
+ )
4538
+ )
4539
+
4540
+ if debug:
4541
+ # ENHANCED: Use actual tokenizer
4542
+ compressed_size = len(self.tokenize(compressed))
4543
+ print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
4544
+
4545
+ return [compressed]
4546
+ except Exception as e:
4547
+ if debug:
4548
+ print(f"🔧 DEBUG: Compression failed: {e}, keeping last 3 sections")
4549
+ # Fallback: keep only recent sections
4550
+ return scratchpad_sections[-3:]
4423
4551
 
4552
+ # Main processing loop with FIXED chunk size
4424
4553
  while current_position < len(tokens):
4425
- # Recalculate budget for each step for dynamic adaptation
4426
- current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4427
- current_budget = calculate_token_budgets(current_scratchpad, step_number)
4428
- adaptive_chunk_size = max(500, current_budget["chunk_budget"])
4429
-
4430
- # Extract the next chunk of text
4431
- chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
4554
+ # Extract chunk using FIXED size
4555
+ chunk_end = min(current_position + FIXED_CHUNK_SIZE, len(tokens))
4432
4556
  chunk_tokens = tokens[current_position:chunk_end]
4433
4557
  chunk_text = " ".join(chunk_tokens)
4434
4558
 
4435
4559
  if debug:
4436
- print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
4437
- f"({len(chunk_tokens)} tokens)")
4560
+ print(f"\n🔧 DEBUG Step {step_number}/{total_steps}: Processing chunk from {current_position} to {chunk_end} "
4561
+ f"({len(chunk_tokens)} tokens)")
4438
4562
 
4439
- # Progress calculation
4440
- remaining_tokens = len(tokens) - current_position
4441
- estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
4442
- total_estimated_steps = step_number + estimated_remaining_steps -1
4443
- progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
4563
+ # Progress calculation (based on fixed steps)
4564
+ progress = (step_number / total_steps) * 90
4444
4565
 
4445
4566
  if streaming_callback:
4446
4567
  streaming_callback(
4447
- f"Processing chunk {step_number}/{total_estimated_steps} - "
4448
- f"Budget: {adaptive_chunk_size:,} tokens",
4568
+ f"Processing chunk {step_number}/{total_steps} - Fixed size: {FIXED_CHUNK_SIZE:,} tokens",
4449
4569
  MSG_TYPE.MSG_TYPE_STEP_START,
4450
- {"step": step_number, "progress": progress}
4570
+ {"step": step_number, "total_steps": total_steps, "progress": progress}
4451
4571
  )
4452
4572
 
4573
+ # ENHANCED: Check and compress scratchpad with actual token counting
4574
+ current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4575
+ scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
4576
+
4577
+ if scratchpad_size > scratchpad_compression_threshold:
4578
+ if debug:
4579
+ print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
4580
+ chunk_summaries = compress_scratchpad(chunk_summaries)
4581
+ current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4582
+ scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
4583
+
4453
4584
  try:
4454
- # Generic, state-aware system prompt
4455
4585
  system_prompt = (
4456
- f"You are a component in a multi-step text processing pipeline. Your role is to analyze a chunk of text and extract key information relevant to a global objective.\n\n"
4457
- f"**Current Status:** You are on step {step_number} of approximately {total_estimated_steps} steps. Progress is at {progress:.1f}%.\n\n"
4458
- f"**Your Task:**\n"
4459
- f"Analyze the 'New Text Chunk' provided below. Extract and summarize any information, data points, or key ideas that are relevant to the 'Global Objective'.\n"
4460
- f"Review the 'Existing Scratchpad Content' to understand what has already been found. Your goal is to add *new* insights that are not already captured.\n\n"
4461
- f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
4586
+ f"You are a component in a multi-step text processing pipeline analyzing step {step_number} of {total_steps}.\n\n"
4587
+ f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
4588
+ f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
4589
+ f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
4590
+ f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
4462
4591
  )
4463
4592
 
4464
- # Generic, context-aware user prompt
4465
- summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions from the text."
4466
- scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
4593
+ summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
4594
+ scratchpad_status = "First chunk analysis" if not chunk_summaries else f"{len(chunk_summaries)} sections completed, {scratchpad_size} tokens"
4467
4595
 
4468
4596
  user_prompt = (
4469
4597
  f"--- Global Objective ---\n{summarization_objective}\n\n"
4470
- f"--- Current Progress ---\n"
4471
- f"{scratchpad_status} (Step {step_number}/{total_estimated_steps})\n\n"
4472
- f"--- Existing Scratchpad Content (for context) ---\n{current_scratchpad}\n\n"
4473
- f"--- New Text Chunk to Analyze ---\n{chunk_text}\n\n"
4474
- f"--- Your Instructions ---\n"
4475
- f"Extract key information from the 'New Text Chunk' that aligns with the 'Global Objective'. "
4476
- f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
4477
- f"If no new relevant information is found, state that clearly."
4598
+ f"--- Progress ---\nStep {step_number}/{total_steps} | {scratchpad_status}\n\n"
4599
+ f"--- Existing Scratchpad (for context) ---\n{current_scratchpad}\n\n"
4600
+ f"--- New Text Chunk ---\n{chunk_text}\n\n"
4601
+ f"--- Instructions ---\n"
4602
+ f"Extract NEW key information from this chunk that aligns with the objective. "
4603
+ f"Be concise. Avoid repeating scratchpad content."
4478
4604
  )
4479
4605
 
4606
+ # ENHANCED: Compute actual prompt size
4607
+ actual_prompt_tokens = len(self.tokenize(user_prompt))
4608
+ actual_system_tokens = len(self.tokenize(system_prompt))
4609
+
4480
4610
  if debug:
4481
- print(f"🔧 DEBUG: Sending {len(user_prompt)} char prompt to LLM")
4611
+ print(f"🔧 DEBUG: Actual prompt tokens: {actual_prompt_tokens}")
4612
+ print(f"🔧 DEBUG: Actual system tokens: {actual_system_tokens}")
4613
+ print(f"🔧 DEBUG: Total input tokens: {actual_prompt_tokens + actual_system_tokens}")
4614
+ print(f"🔧 DEBUG: Scratchpad: {scratchpad_size} tokens")
4482
4615
 
4483
4616
  chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
4484
4617
 
4485
4618
  if debug:
4486
- print(f"🔧 DEBUG: Received {len(chunk_summary)} char response preview: {chunk_summary[:200]}...")
4619
+ print(f"🔧 DEBUG: Received {len(chunk_summary)} char response")
4487
4620
 
4488
- # Generic content filtering
4621
+ # Filter logic
4489
4622
  filter_out = False
4490
4623
  filter_reason = "content accepted"
4491
4624
 
4492
- # Check for explicit rejection signals
4493
4625
  if (chunk_summary.strip().lower().startswith('[no new') or
4494
4626
  chunk_summary.strip().lower().startswith('no new information')):
4495
4627
  filter_out = True
4496
4628
  filter_reason = "explicit rejection signal"
4497
- # Check for overly short or generic refusal responses
4498
4629
  elif len(chunk_summary.strip()) < 25:
4499
4630
  filter_out = True
4500
- filter_reason = "response too short to be useful"
4501
- # Check for common error phrases
4502
- elif any(error_phrase in chunk_summary.lower()[:150] for error_phrase in [
4503
- 'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
4631
+ filter_reason = "response too short"
4632
+ elif any(error in chunk_summary.lower()[:150] for error in [
4633
+ 'error', 'failed', 'cannot provide', 'unable to analyze']):
4504
4634
  filter_out = True
4505
- filter_reason = "error or refusal response detected"
4635
+ filter_reason = "error response"
4506
4636
 
4507
4637
  if not filter_out:
4508
4638
  chunk_summaries.append(chunk_summary.strip())
4509
4639
  content_added = True
4510
4640
  if debug:
4511
- print(f"🔧 DEBUG: ✅ Content added to scratchpad (total sections: {len(chunk_summaries)})")
4641
+ print(f"🔧 DEBUG: ✅ Content added (total sections: {len(chunk_summaries)})")
4512
4642
  else:
4513
4643
  content_added = False
4514
4644
  if debug:
4515
- print(f"🔧 DEBUG: ❌ Content filtered out - {filter_reason}: {chunk_summary[:100]}...")
4645
+ print(f"🔧 DEBUG: ❌ Filtered: {filter_reason}")
4516
4646
 
4517
- # Update progress via callback
4518
4647
  if streaming_callback:
4519
4648
  updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4520
4649
  streaming_callback(
4521
4650
  updated_scratchpad,
4522
4651
  MSG_TYPE.MSG_TYPE_SCRATCHPAD,
4523
- {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added, "filter_reason": filter_reason}
4652
+ {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added}
4524
4653
  )
4525
- progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
4526
4654
  streaming_callback(
4527
4655
  f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
4528
4656
  MSG_TYPE.MSG_TYPE_STEP_END,
4529
- {"progress": progress_after}
4657
+ {"progress": progress}
4530
4658
  )
4531
4659
 
4532
4660
  except Exception as e:
@@ -4536,82 +4664,106 @@ Provide the final aggregated answer in {output_format} format, directly addressi
4536
4664
  self.trace_exception(e)
4537
4665
  if streaming_callback:
4538
4666
  streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
4539
- chunk_summaries.append(f"[Error processing chunk at step {step_number}: {str(e)[:150]}]")
4667
+ chunk_summaries.append(f"[Error at step {step_number}: {str(e)[:150]}]")
4540
4668
 
4541
- # Move to the next chunk, allowing for overlap
4542
- current_position += max(1, adaptive_chunk_size - overlap_tokens)
4669
+ # Move to next chunk with FIXED size
4670
+ current_position += max(1, FIXED_CHUNK_SIZE - overlap_tokens)
4543
4671
  step_number += 1
4544
4672
 
4545
- # Safety break for excessively long documents
4673
+ # Safety break
4546
4674
  if step_number > 200:
4547
- if debug: print(f"🔧 DEBUG: Safety break after {step_number-1} steps.")
4548
- chunk_summaries.append("[Processing halted due to exceeding maximum step limit.]")
4675
+ if debug:
4676
+ print(f"🔧 DEBUG: Safety break at step {step_number}")
4677
+ chunk_summaries.append("[Processing halted: exceeded maximum steps]")
4549
4678
  break
4550
4679
 
4551
4680
  if debug:
4552
- print(f"\n🔧 DEBUG: Chunk processing complete. Total sections gathered: {len(chunk_summaries)}")
4681
+ print(f"\n🔧 DEBUG: Processing complete. Sections: {len(chunk_summaries)}")
4553
4682
 
4554
- # Return only the scratchpad content if requested
4683
+ # Return scratchpad only if requested
4555
4684
  if return_scratchpad_only:
4556
4685
  final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4557
4686
  if streaming_callback:
4558
- streaming_callback("Returning scratchpad content as final output.", MSG_TYPE.MSG_TYPE_STEP, {})
4687
+ streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
4559
4688
  return final_scratchpad.strip()
4560
4689
 
4561
- # Final Synthesis Step
4690
+ # Final synthesis with STRONG objective reinforcement
4562
4691
  if streaming_callback:
4563
- streaming_callback("Synthesizing final comprehensive response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
4692
+ streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
4564
4693
 
4565
4694
  if not chunk_summaries:
4566
- error_msg = "No content was successfully processed or extracted from the document. The input might be empty or an issue occurred during processing."
4695
+ error_msg = "No content was successfully processed."
4567
4696
  if debug:
4568
4697
  print(f"🔧 DEBUG: ❌ {error_msg}")
4569
4698
  return error_msg
4570
4699
 
4571
4700
  combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
4572
- synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis of the provided text."
4701
+ synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
4573
4702
 
4574
4703
  if debug:
4575
- print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} char scratchpad with {len(chunk_summaries)} sections.")
4704
+ final_scratchpad_tokens = len(self.tokenize(combined_scratchpad))
4705
+ print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {final_scratchpad_tokens} tokens, {len(chunk_summaries)} sections")
4576
4706
 
4577
- # Generic synthesis prompts
4707
+ # ENHANCED: Strong objective-focused synthesis
4578
4708
  synthesis_system_prompt = (
4579
- "You are an expert AI assistant specializing in synthesizing information. "
4580
- "Your task is to consolidate a series of text analysis sections from a scratchpad into a single, coherent, and well-structured final response. "
4581
- "Eliminate redundancy, organize the content logically, and ensure the final output directly and comprehensively addresses the user's primary objective. "
4582
- "Use markdown for clear formatting (e.g., headers, lists, bold text)."
4709
+ f"You are completing a multi-step text processing task. "
4710
+ f"Your role is to take analysis sections and produce the FINAL OUTPUT that directly fulfills the user's original objective.\n\n"
4711
+ f"**CRITICAL:** Your output must DIRECTLY ADDRESS the user's objective, NOT just summarize the sections. "
4712
+ f"The sections are intermediate work - transform them into the final deliverable the user requested."
4583
4713
  )
4584
4714
 
4715
+ # ENHANCED: Explicit task reinforcement with examples of what NOT to do
4716
+ task_type_hint = ""
4717
+ if contextual_prompt:
4718
+ lower_prompt = contextual_prompt.lower()
4719
+ if any(word in lower_prompt for word in ['extract', 'list', 'identify', 'find']):
4720
+ task_type_hint = "\n**Task Type:** This is an EXTRACTION/IDENTIFICATION task. Provide a structured list or catalog of items found, NOT a narrative summary."
4721
+ elif any(word in lower_prompt for word in ['analyze', 'evaluate', 'assess', 'examine']):
4722
+ task_type_hint = "\n**Task Type:** This is an ANALYSIS task. Provide insights, patterns, and evaluations, NOT just a description of content."
4723
+ elif any(word in lower_prompt for word in ['compare', 'contrast', 'difference']):
4724
+ task_type_hint = "\n**Task Type:** This is a COMPARISON task. Highlight similarities and differences, NOT separate summaries."
4725
+ elif any(word in lower_prompt for word in ['answer', 'question', 'explain why', 'how does']):
4726
+ task_type_hint = "\n**Task Type:** This is a QUESTION-ANSWERING task. Provide a direct answer, NOT a general overview."
4727
+
4585
4728
  synthesis_user_prompt = (
4586
- f"--- Final Objective ---\n{synthesis_objective}\n\n"
4587
- f"--- Collected Analysis Sections (Scratchpad) ---\n{combined_scratchpad}\n\n"
4588
- f"--- Your Final Task ---\n"
4589
- f"Synthesize all the information from the 'Collected Analysis Sections' into a single, high-quality, and comprehensive response. "
4590
- f"Your response must directly address the 'Final Objective'. "
4591
- f"Organize your answer logically with clear sections using markdown headers. "
4592
- f"Ensure all key information is included, remove any repetitive statements, and produce a polished, final document."
4729
+ f"=== ORIGINAL USER OBJECTIVE (MOST IMPORTANT) ===\n{synthesis_objective}\n"
4730
+ f"{task_type_hint}\n\n"
4731
+ f"=== ANALYSIS SECTIONS (Raw Working Material) ===\n{combined_scratchpad}\n\n"
4732
+ f"=== YOUR TASK ===\n"
4733
+ f"Transform the analysis sections above into a final output that DIRECTLY FULFILLS the original objective.\n\n"
4734
+ f"**DO:**\n"
4735
+ f"- Focus exclusively on satisfying the user's original objective stated above\n"
4736
+ f"- Organize information in whatever format best serves that objective\n"
4737
+ f"- Remove redundancy and consolidate related points\n"
4738
+ f"- Use markdown formatting for clarity\n\n"
4739
+ f"**DO NOT:**\n"
4740
+ f"- Provide a generic summary of the sections\n"
4741
+ f"- Describe what the sections contain\n"
4742
+ f"- Create an overview of the analysis process\n"
4743
+ f"- Change the task into something different\n\n"
4744
+ f"Remember: The user asked for '{synthesis_objective}' - deliver exactly that."
4593
4745
  )
4594
4746
 
4595
4747
  try:
4596
4748
  final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
4597
4749
  if debug:
4598
- print(f"🔧 DEBUG: Final synthesis generated: {len(final_answer):,} characters")
4750
+ print(f"🔧 DEBUG: Final synthesis: {len(final_answer):,} characters")
4599
4751
  if streaming_callback:
4600
- streaming_callback("Final synthesis complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
4752
+ streaming_callback("Final synthesis complete", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
4601
4753
  return final_answer.strip()
4602
4754
 
4603
4755
  except Exception as e:
4604
- error_msg = f"The final synthesis step failed: {str(e)}. Returning the organized scratchpad content as a fallback."
4605
- if debug: print(f"🔧 DEBUG: ❌ {error_msg}")
4756
+ error_msg = f"Synthesis failed: {str(e)}. Returning scratchpad."
4757
+ if debug:
4758
+ print(f"🔧 DEBUG: ❌ {error_msg}")
4606
4759
 
4607
- # Fallback to returning the organized scratchpad
4608
4760
  organized_scratchpad = (
4609
4761
  f"# Analysis Summary\n\n"
4610
- f"*Note: The final synthesis process encountered an error. The raw, organized analysis sections are provided below.*\n\n"
4611
- f"## Collected Sections\n\n"
4612
- f"{combined_scratchpad}"
4762
+ f"*Note: Final synthesis failed. Raw analysis sections below.*\n\n"
4763
+ f"## Collected Sections\n\n{combined_scratchpad}"
4613
4764
  )
4614
4765
  return organized_scratchpad
4766
+
4615
4767
 
4616
4768
 
4617
4769
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):