lollms-client 1.6.6__py3-none-any.whl → 1.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_core.py +385 -233
- lollms_client/lollms_discussion.py +15 -2
- lollms_client/lollms_stt_binding.py +56 -2
- lollms_client/stt_bindings/whisper/__init__.py +3 -2
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +6 -6
- lollms_client/tti_bindings/diffusers/server/main.py +67 -92
- lollms_client/tts_bindings/xtts/__init__.py +10 -11
- lollms_client/tts_bindings/xtts/server/main.py +1 -1
- {lollms_client-1.6.6.dist-info → lollms_client-1.6.10.dist-info}/METADATA +1 -1
- {lollms_client-1.6.6.dist-info → lollms_client-1.6.10.dist-info}/RECORD +16 -16
- {lollms_client-1.6.6.dist-info → lollms_client-1.6.10.dist-info}/WHEEL +0 -0
- {lollms_client-1.6.6.dist-info → lollms_client-1.6.10.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.6.6.dist-info → lollms_client-1.6.10.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -91,21 +91,6 @@ class LollmsClient():
|
|
|
91
91
|
stt_binding_config (Optional[Dict]): Additional config for the STT binding.
|
|
92
92
|
ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
|
|
93
93
|
ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
|
|
94
|
-
service_key (Optional[str]): Shared authentication key or client_id.
|
|
95
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
96
|
-
ctx_size (Optional[int]): Default context size for LLM.
|
|
97
|
-
n_predict (Optional[int]): Default max tokens for LLM.
|
|
98
|
-
stream (bool): Default streaming mode for LLM.
|
|
99
|
-
temperature (float): Default temperature for LLM.
|
|
100
|
-
top_k (int): Default top_k for LLM.
|
|
101
|
-
top_p (float): Default top_p for LLM.
|
|
102
|
-
repeat_penalty (float): Default repeat penalty for LLM.
|
|
103
|
-
repeat_last_n (int): Default repeat last n for LLM.
|
|
104
|
-
seed (Optional[int]): Default seed for LLM.
|
|
105
|
-
n_threads (int): Default threads for LLM.
|
|
106
|
-
streaming_callback (Optional[Callable]): Default streaming callback for LLM.
|
|
107
|
-
user_name (str): Default user name for prompts.
|
|
108
|
-
ai_name (str): Default AI name for prompts.
|
|
109
94
|
|
|
110
95
|
Raises:
|
|
111
96
|
ValueError: If the primary LLM binding cannot be created.
|
|
@@ -160,93 +145,119 @@ class LollmsClient():
|
|
|
160
145
|
except Exception as e:
|
|
161
146
|
trace_exception(e)
|
|
162
147
|
ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
|
|
148
|
+
self.tts = None
|
|
163
149
|
|
|
164
150
|
if tti_binding_name:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
151
|
+
try:
|
|
152
|
+
if tti_binding_config:
|
|
153
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
154
|
+
binding_name=tti_binding_name,
|
|
155
|
+
**{
|
|
156
|
+
k: v
|
|
157
|
+
for k, v in (tti_binding_config or {}).items()
|
|
158
|
+
if k != "binding_name"
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
163
|
+
binding_name=tti_binding_name
|
|
164
|
+
)
|
|
165
|
+
if self.tti is None:
|
|
166
|
+
ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
|
|
167
|
+
except Exception as e:
|
|
168
|
+
trace_exception(e)
|
|
169
|
+
ASCIIColors.warning(f"Exception occurred while creating TTI binding: {str(e)}")
|
|
170
|
+
self.tti = None
|
|
171
|
+
|
|
181
172
|
if stt_binding_name:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
173
|
+
try:
|
|
174
|
+
if stt_binding_config:
|
|
175
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
176
|
+
binding_name=stt_binding_name,
|
|
177
|
+
**{
|
|
178
|
+
k: v
|
|
179
|
+
for k, v in (stt_binding_config or {}).items()
|
|
180
|
+
if k != "binding_name"
|
|
181
|
+
}
|
|
182
|
+
)
|
|
191
183
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
184
|
+
else:
|
|
185
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
186
|
+
binding_name=stt_binding_name,
|
|
187
|
+
)
|
|
188
|
+
if self.stt is None:
|
|
189
|
+
ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
|
|
190
|
+
except Exception as e:
|
|
191
|
+
trace_exception(e)
|
|
192
|
+
ASCIIColors.warning(f"Exception occurred while creating STT binding: {str(e)}")
|
|
193
|
+
self.stt = None
|
|
194
|
+
|
|
198
195
|
if ttv_binding_name:
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
196
|
+
try:
|
|
197
|
+
if ttv_binding_config:
|
|
198
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
199
|
+
binding_name=ttv_binding_name,
|
|
200
|
+
**{
|
|
201
|
+
k: v
|
|
202
|
+
for k, v in ttv_binding_config.items()
|
|
203
|
+
if k != "binding_name"
|
|
204
|
+
}
|
|
205
|
+
)
|
|
208
206
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
207
|
+
else:
|
|
208
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
209
|
+
binding_name=ttv_binding_name
|
|
210
|
+
)
|
|
211
|
+
if self.ttv is None:
|
|
212
|
+
ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
|
|
213
|
+
except Exception as e:
|
|
214
|
+
trace_exception(e)
|
|
215
|
+
ASCIIColors.warning(f"Exception occurred while creating TTV binding: {str(e)}")
|
|
216
|
+
self.ttv = None
|
|
215
217
|
|
|
216
218
|
if ttm_binding_name:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
219
|
+
try:
|
|
220
|
+
if ttm_binding_config:
|
|
221
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
222
|
+
binding_name=ttm_binding_name,
|
|
223
|
+
**{
|
|
224
|
+
k: v
|
|
225
|
+
for k, v in (ttm_binding_config or {}).items()
|
|
226
|
+
if k != "binding_name"
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
231
|
+
binding_name=ttm_binding_name
|
|
232
|
+
)
|
|
233
|
+
if self.ttm is None:
|
|
234
|
+
ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
|
|
235
|
+
except Exception as e:
|
|
236
|
+
trace_exception(e)
|
|
237
|
+
ASCIIColors.warning(f"Exception occurred while creating TTM binding: {str(e)}")
|
|
238
|
+
self.ttm = None
|
|
232
239
|
|
|
233
240
|
if mcp_binding_name:
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
241
|
+
try:
|
|
242
|
+
if mcp_binding_config:
|
|
243
|
+
self.mcp = self.mcp_binding_manager.create_binding(
|
|
244
|
+
binding_name=mcp_binding_name,
|
|
245
|
+
**{
|
|
246
|
+
k: v
|
|
247
|
+
for k, v in (mcp_binding_config or {}).items()
|
|
248
|
+
if k != "binding_name"
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
else:
|
|
252
|
+
self.mcp = self.mcp_binding_manager.create_binding(
|
|
253
|
+
mcp_binding_name
|
|
254
|
+
)
|
|
255
|
+
if self.mcp is None:
|
|
256
|
+
ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
|
|
257
|
+
except Exception as e:
|
|
258
|
+
trace_exception(e)
|
|
259
|
+
ASCIIColors.warning(f"Exception occurred while creating MCP binding: {str(e)}")
|
|
260
|
+
self.mcp = None
|
|
250
261
|
# --- Store Default Generation Parameters ---
|
|
251
262
|
|
|
252
263
|
# --- Prompt Formatting Attributes ---
|
|
@@ -1465,7 +1476,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1465
1476
|
return "🎨 Creating an image based on your request"
|
|
1466
1477
|
|
|
1467
1478
|
# Handle RAG (data store) tools by their pattern
|
|
1468
|
-
elif "
|
|
1479
|
+
elif "rag::" in tool_name:
|
|
1469
1480
|
# Extract the friendly name of the data source
|
|
1470
1481
|
source_name = tool_name.split("::")[-1].replace("_", " ").title()
|
|
1471
1482
|
return f"🔍 Searching {source_name} for relevant information"
|
|
@@ -1516,7 +1527,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1516
1527
|
final_answer_temperature=0.7
|
|
1517
1528
|
if rag_top_k is None:
|
|
1518
1529
|
rag_top_k=5
|
|
1519
|
-
|
|
1530
|
+
|
|
1531
|
+
tools_infos = []
|
|
1520
1532
|
def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
|
|
1521
1533
|
if not streaming_callback: return None
|
|
1522
1534
|
is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
|
|
@@ -1543,38 +1555,44 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1543
1555
|
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1544
1556
|
if isinstance(use_mcps, list):
|
|
1545
1557
|
filtered_tools = [t for t in mcp_tools if t["name"] in use_mcps]
|
|
1558
|
+
tools_infos+=[f" 🛠️{f['name']}" for f in filtered_tools]
|
|
1546
1559
|
all_discovered_tools.extend(filtered_tools)
|
|
1547
1560
|
log_event(f" ✅ Loaded {len(filtered_tools)} specific MCP tools: {', '.join(use_mcps)}", MSG_TYPE.MSG_TYPE_INFO)
|
|
1548
1561
|
elif use_mcps is True:
|
|
1562
|
+
tools_infos+=[f" 🛠️{f['name']}" for f in mcp_tools]
|
|
1549
1563
|
all_discovered_tools.extend(mcp_tools)
|
|
1550
1564
|
log_event(f" ✅ Loaded {len(mcp_tools)} MCP tools", MSG_TYPE.MSG_TYPE_INFO)
|
|
1551
1565
|
|
|
1552
1566
|
if use_data_store:
|
|
1553
1567
|
log_event(f" 📚 Setting up {len(use_data_store)} knowledge bases...", MSG_TYPE.MSG_TYPE_INFO)
|
|
1554
1568
|
for name, info in use_data_store.items():
|
|
1555
|
-
|
|
1569
|
+
ASCIIColors.info(f"use_data_store item:\n{name}\n{info}")
|
|
1570
|
+
tool_name, description, call_fn = f"rag::{name}", f"Queries the '{name}' knowledge base.", None
|
|
1556
1571
|
if callable(info): call_fn = info
|
|
1557
1572
|
elif isinstance(info, dict):
|
|
1558
1573
|
if "callable" in info and callable(info["callable"]): call_fn = info["callable"]
|
|
1559
|
-
description = info.get("description", description)
|
|
1574
|
+
description = info.get("description", "This is a datastore with the following description: \n" + description)
|
|
1560
1575
|
if call_fn:
|
|
1561
1576
|
visible_tools.append({"name": tool_name, "description": description, "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
|
|
1562
1577
|
rag_registry[tool_name] = call_fn
|
|
1563
1578
|
rag_tool_specs[tool_name] = {"default_top_k": rag_top_k, "default_min_sim": rag_min_similarity_percent}
|
|
1564
|
-
|
|
1565
|
-
|
|
1579
|
+
tools_infos.append(f" 📖 {name}")
|
|
1566
1580
|
visible_tools.extend(all_discovered_tools)
|
|
1567
1581
|
built_in_tools = [
|
|
1568
1582
|
{"name": "local_tools::final_answer", "description": "Provide the final answer directly to the user.", "input_schema": {}},
|
|
1569
1583
|
{"name": "local_tools::request_clarification", "description": "Ask the user for more specific information when the request is ambiguous.", "input_schema": {"type": "object", "properties": {"question": {"type": "string"}}, "required": ["question"]}},
|
|
1570
1584
|
{"name": "local_tools::revise_plan", "description": "Update the execution plan based on new discoveries or changing requirements.", "input_schema": {"type": "object", "properties": {"reason": {"type": "string"}, "new_plan": {"type": "array"}}, "required": ["reason", "new_plan"]}}
|
|
1571
1585
|
]
|
|
1586
|
+
tools_infos+=[f" 🔨 final_answer"," 🔨 request_clarification"," 🔨 revise_plan"]
|
|
1587
|
+
|
|
1588
|
+
|
|
1572
1589
|
if getattr(self, "tti", None):
|
|
1573
1590
|
built_in_tools.append({"name": "local_tools::generate_image", "description": "Generate an image from a text description.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"]}})
|
|
1574
1591
|
|
|
1575
1592
|
all_visible_tools = visible_tools + built_in_tools
|
|
1576
1593
|
tool_summary = "\n".join([f"- **{t['name']}**: {t['description']}" for t in all_visible_tools[:20]])
|
|
1577
1594
|
|
|
1595
|
+
log_event("\n".join(tools_infos), MSG_TYPE.MSG_TYPE_INFO)
|
|
1578
1596
|
log_event(f"✅ Ready with {len(all_visible_tools)} total capabilities", MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id, meta={"tool_count": len(all_visible_tools), "mcp_tools": len(all_discovered_tools), "rag_tools": len(rag_registry)})
|
|
1579
1597
|
|
|
1580
1598
|
# Enhanced triage with better prompting
|
|
@@ -1594,7 +1612,7 @@ AVAILABLE CAPABILITIES:
|
|
|
1594
1612
|
Based on the request complexity and available tools, choose the optimal strategy:
|
|
1595
1613
|
|
|
1596
1614
|
1. **DIRECT_ANSWER**: For simple greetings, basic questions, or requests that don't require any tools
|
|
1597
|
-
- Use when: The request can be fully answered with your existing knowledge
|
|
1615
|
+
- Use when: The request can be fully answered with your existing knowledge with confidence, and no tool seems to add any significant value to the answer
|
|
1598
1616
|
- Example: "Hello", "What is Python?", "Explain quantum physics"
|
|
1599
1617
|
|
|
1600
1618
|
2. **REQUEST_CLARIFICATION**: When the request is too vague or ambiguous
|
|
@@ -1612,16 +1630,14 @@ Based on the request complexity and available tools, choose the optimal strategy
|
|
|
1612
1630
|
Provide your analysis in JSON format:
|
|
1613
1631
|
{{"thought": "Detailed reasoning about the request complexity and requirements", "strategy": "ONE_OF_THE_FOUR_OPTIONS", "confidence": 0.8, "text_output": "Direct answer or clarification question if applicable", "required_tool_name": "specific tool name if SINGLE_TOOL strategy", "estimated_steps": 3}}"""
|
|
1614
1632
|
|
|
1615
|
-
log_prompt("Triage Prompt", triage_prompt)
|
|
1616
|
-
|
|
1617
1633
|
triage_schema = {
|
|
1618
1634
|
"thought": "string", "strategy": "string", "confidence": "number",
|
|
1619
1635
|
"text_output": "string", "required_tool_name": "string", "estimated_steps": "number"
|
|
1620
1636
|
}
|
|
1621
|
-
strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, **llm_generation_kwargs)
|
|
1637
|
+
strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, system_prompt=system_prompt, **llm_generation_kwargs)
|
|
1622
1638
|
strategy = strategy_data.get("strategy") if strategy_data else "COMPLEX_PLAN"
|
|
1623
1639
|
|
|
1624
|
-
log_event(f"Strategy analysis complete", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
1640
|
+
log_event(f"Strategy analysis complete.\n**confidence**: {strategy_data.get('confidence', 0.5)}\n**reasoning**: {strategy_data.get('thought', 'None')}", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
1625
1641
|
"strategy": strategy,
|
|
1626
1642
|
"confidence": strategy_data.get("confidence", 0.5),
|
|
1627
1643
|
"estimated_steps": strategy_data.get("estimated_steps", 1),
|
|
@@ -1760,7 +1776,7 @@ RESPONSE:"""
|
|
|
1760
1776
|
}
|
|
1761
1777
|
if tool_name in descriptions:
|
|
1762
1778
|
return descriptions[tool_name]
|
|
1763
|
-
if "
|
|
1779
|
+
if "rag::" in tool_name:
|
|
1764
1780
|
return f"🔍 Searching {tool_name.split('::')[-1]} knowledge base"
|
|
1765
1781
|
if requires_code:
|
|
1766
1782
|
return "💻 Processing code"
|
|
@@ -1829,7 +1845,7 @@ RESPONSE:"""
|
|
|
1829
1845
|
|
|
1830
1846
|
# Enhanced planning phase
|
|
1831
1847
|
planning_step_id = log_event_fn("📋 Creating adaptive execution plan...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1832
|
-
execution_plan = planner.decompose_task(original_user_prompt, context or "")
|
|
1848
|
+
execution_plan = planner.decompose_task(original_user_prompt, context or "", "\n".join([f"{tool['name']}:{tool['description']}" for tool in all_visible_tools]))
|
|
1833
1849
|
current_plan_version = 1
|
|
1834
1850
|
|
|
1835
1851
|
log_event_fn(f"Initial plan created with {len(execution_plan.tasks)} tasks", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
@@ -4307,28 +4323,31 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4307
4323
|
contextual_prompt: Optional[str] = None,
|
|
4308
4324
|
system_prompt: str | None = None,
|
|
4309
4325
|
context_fill_percentage: float = 0.75,
|
|
4310
|
-
overlap_tokens: int = 150,
|
|
4326
|
+
overlap_tokens: int = 150,
|
|
4311
4327
|
expected_generation_tokens: int = 1500,
|
|
4328
|
+
max_scratchpad_tokens: int = 4000,
|
|
4329
|
+
scratchpad_compression_threshold: int = 3000,
|
|
4312
4330
|
streaming_callback: Optional[Callable] = None,
|
|
4313
4331
|
return_scratchpad_only: bool = False,
|
|
4314
4332
|
debug: bool = True,
|
|
4333
|
+
ctx_size=None,
|
|
4315
4334
|
**kwargs
|
|
4316
4335
|
) -> str:
|
|
4317
4336
|
"""
|
|
4318
|
-
Processes long text
|
|
4319
|
-
|
|
4337
|
+
Processes long text with FIXED chunk sizing and managed scratchpad growth.
|
|
4338
|
+
Now uses dynamic token calculation based on actual model tokenizer.
|
|
4320
4339
|
"""
|
|
4321
4340
|
|
|
4322
4341
|
if debug:
|
|
4323
4342
|
print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
|
|
4324
4343
|
|
|
4325
4344
|
# Validate context fill percentage
|
|
4326
|
-
if not (0.1 <= context_fill_percentage <= 0
|
|
4327
|
-
raise ValueError(f"context_fill_percentage must be between 0.1 and 0
|
|
4345
|
+
if not (0.1 <= context_fill_percentage <= 1.0):
|
|
4346
|
+
raise ValueError(f"context_fill_percentage must be between 0.1 and 1.0, got {context_fill_percentage}")
|
|
4328
4347
|
|
|
4329
4348
|
# Get context size
|
|
4330
4349
|
try:
|
|
4331
|
-
context_size = self.llm.get_context_size() or 8192
|
|
4350
|
+
context_size = ctx_size or self.llm.default_ctx_size or self.llm.get_context_size() or 8192
|
|
4332
4351
|
except:
|
|
4333
4352
|
context_size = 8192
|
|
4334
4353
|
|
|
@@ -4339,65 +4358,121 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4339
4358
|
if not text_to_process:
|
|
4340
4359
|
return ""
|
|
4341
4360
|
|
|
4342
|
-
# Use
|
|
4361
|
+
# Use word-based split for token estimation
|
|
4343
4362
|
tokens = text_to_process.split()
|
|
4344
4363
|
if debug:
|
|
4345
4364
|
print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
|
|
4346
4365
|
|
|
4347
|
-
#
|
|
4348
|
-
|
|
4349
|
-
|
|
4350
|
-
|
|
4351
|
-
|
|
4352
|
-
|
|
4353
|
-
|
|
4354
|
-
|
|
4355
|
-
|
|
4356
|
-
|
|
4357
|
-
|
|
4358
|
-
|
|
4359
|
-
|
|
4360
|
-
|
|
4361
|
-
|
|
4362
|
-
|
|
4363
|
-
|
|
4364
|
-
|
|
4366
|
+
# ========================================
|
|
4367
|
+
# ENHANCED: Dynamically calculate token sizes using actual tokenizer
|
|
4368
|
+
# ========================================
|
|
4369
|
+
|
|
4370
|
+
# Create template system prompt to measure its token size
|
|
4371
|
+
template_system_prompt = (
|
|
4372
|
+
f"You are a component in a multi-step text processing pipeline analyzing step 1 of 100.\n\n"
|
|
4373
|
+
f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
|
|
4374
|
+
f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
|
|
4375
|
+
f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
|
|
4376
|
+
f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
|
|
4377
|
+
)
|
|
4378
|
+
base_system_tokens = len(self.tokenize(template_system_prompt))
|
|
4379
|
+
|
|
4380
|
+
# Create MINIMAL template user prompt (structure only, without content placeholders)
|
|
4381
|
+
summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
|
|
4382
|
+
|
|
4383
|
+
# Measure only the structural overhead (headers, formatting, instructions)
|
|
4384
|
+
template_structure = (
|
|
4385
|
+
f"--- Global Objective ---\n{summarization_objective}\n\n"
|
|
4386
|
+
f"--- Progress ---\nStep 100/100 | 10 sections completed, 4000 tokens\n\n" # Worst-case progress text
|
|
4387
|
+
f"--- Existing Scratchpad (for context) ---\n"
|
|
4388
|
+
f"--- New Text Chunk ---\n"
|
|
4389
|
+
f"--- Instructions ---\n"
|
|
4390
|
+
f"Extract NEW key information from this chunk that aligns with the objective. "
|
|
4391
|
+
f"Be concise. Avoid repeating scratchpad content."
|
|
4392
|
+
)
|
|
4393
|
+
user_template_overhead = len(self.tokenize(template_structure))
|
|
4365
4394
|
|
|
4366
|
-
|
|
4367
|
-
|
|
4368
|
-
|
|
4395
|
+
if debug:
|
|
4396
|
+
print(f"🔧 DEBUG: Computed system prompt tokens: {base_system_tokens}")
|
|
4397
|
+
print(f"🔧 DEBUG: Computed user template overhead: {user_template_overhead}")
|
|
4398
|
+
print(f"🔧 DEBUG: (Note: Scratchpad and chunk content allocated separately)")
|
|
4369
4399
|
|
|
4370
|
-
|
|
4400
|
+
# Reserve space for maximum expected scratchpad size
|
|
4401
|
+
reserved_scratchpad_tokens = max_scratchpad_tokens
|
|
4371
4402
|
|
|
4372
|
-
|
|
4373
|
-
|
|
4374
|
-
|
|
4403
|
+
total_budget = int(context_size * context_fill_percentage)
|
|
4404
|
+
# Only count overhead, not the actual chunk/scratchpad content (that's reserved separately)
|
|
4405
|
+
used_tokens = base_system_tokens + user_template_overhead + reserved_scratchpad_tokens + expected_generation_tokens
|
|
4375
4406
|
|
|
4407
|
+
# FIXED chunk size - never changes during processing
|
|
4408
|
+
FIXED_CHUNK_SIZE = max(1024, int(total_budget - used_tokens))
|
|
4409
|
+
|
|
4410
|
+
|
|
4376
4411
|
if debug:
|
|
4377
|
-
print(f"🔧 DEBUG:
|
|
4412
|
+
print(f"\n🔧 DEBUG: Token budget breakdown:")
|
|
4413
|
+
print(f" - Context size: {context_size} tokens")
|
|
4414
|
+
print(f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)")
|
|
4415
|
+
print(f" - Total budget: {total_budget} tokens")
|
|
4416
|
+
print(f" - System prompt: {base_system_tokens} tokens")
|
|
4417
|
+
print(f" - User template overhead: {user_template_overhead} tokens")
|
|
4418
|
+
print(f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens")
|
|
4419
|
+
print(f" - Expected generation: {expected_generation_tokens} tokens")
|
|
4420
|
+
print(f" - Total overhead: {used_tokens} tokens")
|
|
4421
|
+
print(f" - Remaining for chunks: {total_budget - used_tokens} tokens")
|
|
4422
|
+
print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens")
|
|
4423
|
+
|
|
4424
|
+
# Safety check
|
|
4425
|
+
if FIXED_CHUNK_SIZE == 1024:
|
|
4426
|
+
print(f"⚠️ WARNING: Chunk size is at minimum (1024)!")
|
|
4427
|
+
print(f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available")
|
|
4428
|
+
print(f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens")
|
|
4378
4429
|
|
|
4379
4430
|
if streaming_callback:
|
|
4380
4431
|
streaming_callback(
|
|
4381
|
-
|
|
4382
|
-
|
|
4432
|
+
"\n".join([
|
|
4433
|
+
f"\n🔧 DEBUG: Token budget breakdown:",
|
|
4434
|
+
f" - Context size: {context_size} tokens",
|
|
4435
|
+
f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)",
|
|
4436
|
+
f" - Total budget: {total_budget} tokens",
|
|
4437
|
+
f" - System prompt: {base_system_tokens} tokens",
|
|
4438
|
+
f" - User template overhead: {user_template_overhead} tokens",
|
|
4439
|
+
f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens",
|
|
4440
|
+
f" - Expected generation: {expected_generation_tokens} tokens",
|
|
4441
|
+
f" - Total overhead: {used_tokens} tokens",
|
|
4442
|
+
f" - Remaining for chunks: {total_budget - used_tokens} tokens",
|
|
4443
|
+
f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens"
|
|
4444
|
+
]
|
|
4445
|
+
),
|
|
4446
|
+
MSG_TYPE.MSG_TYPE_STEP
|
|
4447
|
+
)
|
|
4448
|
+
if FIXED_CHUNK_SIZE == 1024:
|
|
4449
|
+
streaming_callback(
|
|
4450
|
+
"\n".join([
|
|
4451
|
+
f"⚠️ WARNING: Chunk size is at minimum (1024)!",
|
|
4452
|
+
f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available",
|
|
4453
|
+
f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens"
|
|
4454
|
+
]
|
|
4455
|
+
),
|
|
4456
|
+
MSG_TYPE.MSG_TYPE_STEP
|
|
4457
|
+
)
|
|
4458
|
+
streaming_callback(
|
|
4459
|
+
f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
|
|
4383
4460
|
MSG_TYPE.MSG_TYPE_STEP,
|
|
4384
|
-
{"
|
|
4461
|
+
{"fixed_chunk_size": FIXED_CHUNK_SIZE, "total_budget": total_budget}
|
|
4385
4462
|
)
|
|
4386
4463
|
|
|
4387
4464
|
# Single pass for short content
|
|
4388
|
-
if len(tokens) <=
|
|
4465
|
+
if len(tokens) <= FIXED_CHUNK_SIZE:
|
|
4389
4466
|
if debug:
|
|
4390
|
-
print("🔧 DEBUG: Content
|
|
4467
|
+
print("🔧 DEBUG: Content fits in single pass")
|
|
4391
4468
|
|
|
4392
4469
|
if streaming_callback:
|
|
4393
4470
|
streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
|
|
4394
4471
|
|
|
4395
|
-
# Generic single-pass system prompt
|
|
4396
4472
|
system_prompt = (
|
|
4397
4473
|
"You are an expert AI assistant for text analysis and summarization. "
|
|
4398
4474
|
"Your task is to carefully analyze the provided text and generate a comprehensive, "
|
|
4399
|
-
"accurate, and well-structured response that directly addresses the user's objective.
|
|
4400
|
-
"Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
|
|
4475
|
+
"accurate, and well-structured response that directly addresses the user's objective."
|
|
4401
4476
|
)
|
|
4402
4477
|
|
|
4403
4478
|
prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
|
|
@@ -4413,120 +4488,173 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4413
4488
|
print(f"🔧 DEBUG: Single-pass processing failed: {e}")
|
|
4414
4489
|
return f"Error in single-pass processing: {e}"
|
|
4415
4490
|
|
|
4416
|
-
#
|
|
4491
|
+
# ========================================
|
|
4492
|
+
# FIXED: Multi-chunk processing with static sizing
|
|
4493
|
+
# ========================================
|
|
4417
4494
|
if debug:
|
|
4418
|
-
print("🔧 DEBUG: Using multi-chunk processing
|
|
4495
|
+
print("🔧 DEBUG: Using multi-chunk processing with FIXED chunk size")
|
|
4419
4496
|
|
|
4420
4497
|
chunk_summaries = []
|
|
4421
4498
|
current_position = 0
|
|
4422
4499
|
step_number = 1
|
|
4500
|
+
|
|
4501
|
+
# Pre-calculate total steps (won't change since chunk size is fixed)
|
|
4502
|
+
total_steps = -(-len(tokens) // (FIXED_CHUNK_SIZE - overlap_tokens)) # Ceiling division
|
|
4503
|
+
|
|
4504
|
+
if debug:
|
|
4505
|
+
print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
|
|
4506
|
+
|
|
4507
|
+
# ========================================
|
|
4508
|
+
# NEW: Scratchpad compression helper with dynamic token counting
|
|
4509
|
+
# ========================================
|
|
4510
|
+
def compress_scratchpad(scratchpad_sections: list) -> list:
|
|
4511
|
+
"""Compress scratchpad when it gets too large"""
|
|
4512
|
+
if len(scratchpad_sections) <= 2:
|
|
4513
|
+
return scratchpad_sections
|
|
4514
|
+
|
|
4515
|
+
combined = "\n\n---\n\n".join(scratchpad_sections)
|
|
4516
|
+
# ENHANCED: Use actual tokenizer to count
|
|
4517
|
+
current_size = len(self.tokenize(combined))
|
|
4518
|
+
|
|
4519
|
+
if current_size <= scratchpad_compression_threshold:
|
|
4520
|
+
return scratchpad_sections
|
|
4521
|
+
|
|
4522
|
+
if debug:
|
|
4523
|
+
print(f"🔧 DEBUG: Compressing scratchpad from {current_size} tokens")
|
|
4524
|
+
|
|
4525
|
+
compression_prompt = (
|
|
4526
|
+
f"Consolidate the following analysis sections into a more concise summary. "
|
|
4527
|
+
f"Retain all key facts, data points, and conclusions, but eliminate redundancy:\n\n"
|
|
4528
|
+
f"{combined}"
|
|
4529
|
+
)
|
|
4530
|
+
|
|
4531
|
+
try:
|
|
4532
|
+
compressed = self.remove_thinking_blocks(
|
|
4533
|
+
self.llm.generate_text(
|
|
4534
|
+
compression_prompt,
|
|
4535
|
+
system_prompt="You are a text consolidation expert. Create concise summaries that preserve all important information.",
|
|
4536
|
+
**kwargs
|
|
4537
|
+
)
|
|
4538
|
+
)
|
|
4539
|
+
|
|
4540
|
+
if debug:
|
|
4541
|
+
# ENHANCED: Use actual tokenizer
|
|
4542
|
+
compressed_size = len(self.tokenize(compressed))
|
|
4543
|
+
print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
|
|
4544
|
+
|
|
4545
|
+
return [compressed]
|
|
4546
|
+
except Exception as e:
|
|
4547
|
+
if debug:
|
|
4548
|
+
print(f"🔧 DEBUG: Compression failed: {e}, keeping last 3 sections")
|
|
4549
|
+
# Fallback: keep only recent sections
|
|
4550
|
+
return scratchpad_sections[-3:]
|
|
4423
4551
|
|
|
4552
|
+
# Main processing loop with FIXED chunk size
|
|
4424
4553
|
while current_position < len(tokens):
|
|
4425
|
-
#
|
|
4426
|
-
|
|
4427
|
-
current_budget = calculate_token_budgets(current_scratchpad, step_number)
|
|
4428
|
-
adaptive_chunk_size = max(500, current_budget["chunk_budget"])
|
|
4429
|
-
|
|
4430
|
-
# Extract the next chunk of text
|
|
4431
|
-
chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
|
|
4554
|
+
# Extract chunk using FIXED size
|
|
4555
|
+
chunk_end = min(current_position + FIXED_CHUNK_SIZE, len(tokens))
|
|
4432
4556
|
chunk_tokens = tokens[current_position:chunk_end]
|
|
4433
4557
|
chunk_text = " ".join(chunk_tokens)
|
|
4434
4558
|
|
|
4435
4559
|
if debug:
|
|
4436
|
-
print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
|
|
4437
|
-
|
|
4560
|
+
print(f"\n🔧 DEBUG Step {step_number}/{total_steps}: Processing chunk from {current_position} to {chunk_end} "
|
|
4561
|
+
f"({len(chunk_tokens)} tokens)")
|
|
4438
4562
|
|
|
4439
|
-
# Progress calculation
|
|
4440
|
-
|
|
4441
|
-
estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
|
|
4442
|
-
total_estimated_steps = step_number + estimated_remaining_steps -1
|
|
4443
|
-
progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
|
|
4563
|
+
# Progress calculation (based on fixed steps)
|
|
4564
|
+
progress = (step_number / total_steps) * 90
|
|
4444
4565
|
|
|
4445
4566
|
if streaming_callback:
|
|
4446
4567
|
streaming_callback(
|
|
4447
|
-
f"Processing chunk {step_number}/{
|
|
4448
|
-
f"Budget: {adaptive_chunk_size:,} tokens",
|
|
4568
|
+
f"Processing chunk {step_number}/{total_steps} - Fixed size: {FIXED_CHUNK_SIZE:,} tokens",
|
|
4449
4569
|
MSG_TYPE.MSG_TYPE_STEP_START,
|
|
4450
|
-
{"step": step_number, "progress": progress}
|
|
4570
|
+
{"step": step_number, "total_steps": total_steps, "progress": progress}
|
|
4451
4571
|
)
|
|
4452
4572
|
|
|
4573
|
+
# ENHANCED: Check and compress scratchpad with actual token counting
|
|
4574
|
+
current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4575
|
+
scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
|
|
4576
|
+
|
|
4577
|
+
if scratchpad_size > scratchpad_compression_threshold:
|
|
4578
|
+
if debug:
|
|
4579
|
+
print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
|
|
4580
|
+
chunk_summaries = compress_scratchpad(chunk_summaries)
|
|
4581
|
+
current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4582
|
+
scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
|
|
4583
|
+
|
|
4453
4584
|
try:
|
|
4454
|
-
# Generic, state-aware system prompt
|
|
4455
4585
|
system_prompt = (
|
|
4456
|
-
f"You are a component in a multi-step text processing pipeline
|
|
4457
|
-
f"**
|
|
4458
|
-
f"
|
|
4459
|
-
f"
|
|
4460
|
-
f"
|
|
4461
|
-
f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
|
|
4586
|
+
f"You are a component in a multi-step text processing pipeline analyzing step {step_number} of {total_steps}.\n\n"
|
|
4587
|
+
f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
|
|
4588
|
+
f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
|
|
4589
|
+
f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
|
|
4590
|
+
f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
|
|
4462
4591
|
)
|
|
4463
4592
|
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
|
|
4593
|
+
summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
|
|
4594
|
+
scratchpad_status = "First chunk analysis" if not chunk_summaries else f"{len(chunk_summaries)} sections completed, {scratchpad_size} tokens"
|
|
4467
4595
|
|
|
4468
4596
|
user_prompt = (
|
|
4469
4597
|
f"--- Global Objective ---\n{summarization_objective}\n\n"
|
|
4470
|
-
f"---
|
|
4471
|
-
f"
|
|
4472
|
-
f"---
|
|
4473
|
-
f"---
|
|
4474
|
-
f"
|
|
4475
|
-
f"
|
|
4476
|
-
f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
|
|
4477
|
-
f"If no new relevant information is found, state that clearly."
|
|
4598
|
+
f"--- Progress ---\nStep {step_number}/{total_steps} | {scratchpad_status}\n\n"
|
|
4599
|
+
f"--- Existing Scratchpad (for context) ---\n{current_scratchpad}\n\n"
|
|
4600
|
+
f"--- New Text Chunk ---\n{chunk_text}\n\n"
|
|
4601
|
+
f"--- Instructions ---\n"
|
|
4602
|
+
f"Extract NEW key information from this chunk that aligns with the objective. "
|
|
4603
|
+
f"Be concise. Avoid repeating scratchpad content."
|
|
4478
4604
|
)
|
|
4479
4605
|
|
|
4606
|
+
# ENHANCED: Compute actual prompt size
|
|
4607
|
+
actual_prompt_tokens = len(self.tokenize(user_prompt))
|
|
4608
|
+
actual_system_tokens = len(self.tokenize(system_prompt))
|
|
4609
|
+
|
|
4480
4610
|
if debug:
|
|
4481
|
-
print(f"🔧 DEBUG:
|
|
4611
|
+
print(f"🔧 DEBUG: Actual prompt tokens: {actual_prompt_tokens}")
|
|
4612
|
+
print(f"🔧 DEBUG: Actual system tokens: {actual_system_tokens}")
|
|
4613
|
+
print(f"🔧 DEBUG: Total input tokens: {actual_prompt_tokens + actual_system_tokens}")
|
|
4614
|
+
print(f"🔧 DEBUG: Scratchpad: {scratchpad_size} tokens")
|
|
4482
4615
|
|
|
4483
4616
|
chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
|
|
4484
4617
|
|
|
4485
4618
|
if debug:
|
|
4486
|
-
print(f"🔧 DEBUG: Received {len(chunk_summary)} char response
|
|
4619
|
+
print(f"🔧 DEBUG: Received {len(chunk_summary)} char response")
|
|
4487
4620
|
|
|
4488
|
-
#
|
|
4621
|
+
# Filter logic
|
|
4489
4622
|
filter_out = False
|
|
4490
4623
|
filter_reason = "content accepted"
|
|
4491
4624
|
|
|
4492
|
-
# Check for explicit rejection signals
|
|
4493
4625
|
if (chunk_summary.strip().lower().startswith('[no new') or
|
|
4494
4626
|
chunk_summary.strip().lower().startswith('no new information')):
|
|
4495
4627
|
filter_out = True
|
|
4496
4628
|
filter_reason = "explicit rejection signal"
|
|
4497
|
-
# Check for overly short or generic refusal responses
|
|
4498
4629
|
elif len(chunk_summary.strip()) < 25:
|
|
4499
4630
|
filter_out = True
|
|
4500
|
-
filter_reason = "response too short
|
|
4501
|
-
|
|
4502
|
-
|
|
4503
|
-
'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
|
|
4631
|
+
filter_reason = "response too short"
|
|
4632
|
+
elif any(error in chunk_summary.lower()[:150] for error in [
|
|
4633
|
+
'error', 'failed', 'cannot provide', 'unable to analyze']):
|
|
4504
4634
|
filter_out = True
|
|
4505
|
-
filter_reason = "error
|
|
4635
|
+
filter_reason = "error response"
|
|
4506
4636
|
|
|
4507
4637
|
if not filter_out:
|
|
4508
4638
|
chunk_summaries.append(chunk_summary.strip())
|
|
4509
4639
|
content_added = True
|
|
4510
4640
|
if debug:
|
|
4511
|
-
print(f"🔧 DEBUG: ✅ Content added
|
|
4641
|
+
print(f"🔧 DEBUG: ✅ Content added (total sections: {len(chunk_summaries)})")
|
|
4512
4642
|
else:
|
|
4513
4643
|
content_added = False
|
|
4514
4644
|
if debug:
|
|
4515
|
-
print(f"🔧 DEBUG: ❌
|
|
4645
|
+
print(f"🔧 DEBUG: ❌ Filtered: {filter_reason}")
|
|
4516
4646
|
|
|
4517
|
-
# Update progress via callback
|
|
4518
4647
|
if streaming_callback:
|
|
4519
4648
|
updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4520
4649
|
streaming_callback(
|
|
4521
4650
|
updated_scratchpad,
|
|
4522
4651
|
MSG_TYPE.MSG_TYPE_SCRATCHPAD,
|
|
4523
|
-
{"step": step_number, "sections": len(chunk_summaries), "content_added": content_added
|
|
4652
|
+
{"step": step_number, "sections": len(chunk_summaries), "content_added": content_added}
|
|
4524
4653
|
)
|
|
4525
|
-
progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
|
|
4526
4654
|
streaming_callback(
|
|
4527
4655
|
f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
|
|
4528
4656
|
MSG_TYPE.MSG_TYPE_STEP_END,
|
|
4529
|
-
{"progress":
|
|
4657
|
+
{"progress": progress}
|
|
4530
4658
|
)
|
|
4531
4659
|
|
|
4532
4660
|
except Exception as e:
|
|
@@ -4536,82 +4664,106 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4536
4664
|
self.trace_exception(e)
|
|
4537
4665
|
if streaming_callback:
|
|
4538
4666
|
streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
4539
|
-
chunk_summaries.append(f"[Error
|
|
4667
|
+
chunk_summaries.append(f"[Error at step {step_number}: {str(e)[:150]}]")
|
|
4540
4668
|
|
|
4541
|
-
# Move to
|
|
4542
|
-
current_position += max(1,
|
|
4669
|
+
# Move to next chunk with FIXED size
|
|
4670
|
+
current_position += max(1, FIXED_CHUNK_SIZE - overlap_tokens)
|
|
4543
4671
|
step_number += 1
|
|
4544
4672
|
|
|
4545
|
-
# Safety break
|
|
4673
|
+
# Safety break
|
|
4546
4674
|
if step_number > 200:
|
|
4547
|
-
if debug:
|
|
4548
|
-
|
|
4675
|
+
if debug:
|
|
4676
|
+
print(f"🔧 DEBUG: Safety break at step {step_number}")
|
|
4677
|
+
chunk_summaries.append("[Processing halted: exceeded maximum steps]")
|
|
4549
4678
|
break
|
|
4550
4679
|
|
|
4551
4680
|
if debug:
|
|
4552
|
-
print(f"\n🔧 DEBUG:
|
|
4681
|
+
print(f"\n🔧 DEBUG: Processing complete. Sections: {len(chunk_summaries)}")
|
|
4553
4682
|
|
|
4554
|
-
# Return
|
|
4683
|
+
# Return scratchpad only if requested
|
|
4555
4684
|
if return_scratchpad_only:
|
|
4556
4685
|
final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4557
4686
|
if streaming_callback:
|
|
4558
|
-
streaming_callback("Returning scratchpad content
|
|
4687
|
+
streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
|
|
4559
4688
|
return final_scratchpad.strip()
|
|
4560
4689
|
|
|
4561
|
-
# Final
|
|
4690
|
+
# Final synthesis with STRONG objective reinforcement
|
|
4562
4691
|
if streaming_callback:
|
|
4563
|
-
streaming_callback("Synthesizing final
|
|
4692
|
+
streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
|
|
4564
4693
|
|
|
4565
4694
|
if not chunk_summaries:
|
|
4566
|
-
error_msg = "No content was successfully processed
|
|
4695
|
+
error_msg = "No content was successfully processed."
|
|
4567
4696
|
if debug:
|
|
4568
4697
|
print(f"🔧 DEBUG: ❌ {error_msg}")
|
|
4569
4698
|
return error_msg
|
|
4570
4699
|
|
|
4571
4700
|
combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4572
|
-
synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis
|
|
4701
|
+
synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
|
|
4573
4702
|
|
|
4574
4703
|
if debug:
|
|
4575
|
-
|
|
4704
|
+
final_scratchpad_tokens = len(self.tokenize(combined_scratchpad))
|
|
4705
|
+
print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {final_scratchpad_tokens} tokens, {len(chunk_summaries)} sections")
|
|
4576
4706
|
|
|
4577
|
-
#
|
|
4707
|
+
# ENHANCED: Strong objective-focused synthesis
|
|
4578
4708
|
synthesis_system_prompt = (
|
|
4579
|
-
"You are
|
|
4580
|
-
"Your
|
|
4581
|
-
"
|
|
4582
|
-
"
|
|
4709
|
+
f"You are completing a multi-step text processing task. "
|
|
4710
|
+
f"Your role is to take analysis sections and produce the FINAL OUTPUT that directly fulfills the user's original objective.\n\n"
|
|
4711
|
+
f"**CRITICAL:** Your output must DIRECTLY ADDRESS the user's objective, NOT just summarize the sections. "
|
|
4712
|
+
f"The sections are intermediate work - transform them into the final deliverable the user requested."
|
|
4583
4713
|
)
|
|
4584
4714
|
|
|
4715
|
+
# ENHANCED: Explicit task reinforcement with examples of what NOT to do
|
|
4716
|
+
task_type_hint = ""
|
|
4717
|
+
if contextual_prompt:
|
|
4718
|
+
lower_prompt = contextual_prompt.lower()
|
|
4719
|
+
if any(word in lower_prompt for word in ['extract', 'list', 'identify', 'find']):
|
|
4720
|
+
task_type_hint = "\n**Task Type:** This is an EXTRACTION/IDENTIFICATION task. Provide a structured list or catalog of items found, NOT a narrative summary."
|
|
4721
|
+
elif any(word in lower_prompt for word in ['analyze', 'evaluate', 'assess', 'examine']):
|
|
4722
|
+
task_type_hint = "\n**Task Type:** This is an ANALYSIS task. Provide insights, patterns, and evaluations, NOT just a description of content."
|
|
4723
|
+
elif any(word in lower_prompt for word in ['compare', 'contrast', 'difference']):
|
|
4724
|
+
task_type_hint = "\n**Task Type:** This is a COMPARISON task. Highlight similarities and differences, NOT separate summaries."
|
|
4725
|
+
elif any(word in lower_prompt for word in ['answer', 'question', 'explain why', 'how does']):
|
|
4726
|
+
task_type_hint = "\n**Task Type:** This is a QUESTION-ANSWERING task. Provide a direct answer, NOT a general overview."
|
|
4727
|
+
|
|
4585
4728
|
synthesis_user_prompt = (
|
|
4586
|
-
f"
|
|
4587
|
-
f"
|
|
4588
|
-
f"
|
|
4589
|
-
f"
|
|
4590
|
-
f"
|
|
4591
|
-
f"
|
|
4592
|
-
f"
|
|
4729
|
+
f"=== ORIGINAL USER OBJECTIVE (MOST IMPORTANT) ===\n{synthesis_objective}\n"
|
|
4730
|
+
f"{task_type_hint}\n\n"
|
|
4731
|
+
f"=== ANALYSIS SECTIONS (Raw Working Material) ===\n{combined_scratchpad}\n\n"
|
|
4732
|
+
f"=== YOUR TASK ===\n"
|
|
4733
|
+
f"Transform the analysis sections above into a final output that DIRECTLY FULFILLS the original objective.\n\n"
|
|
4734
|
+
f"**DO:**\n"
|
|
4735
|
+
f"- Focus exclusively on satisfying the user's original objective stated above\n"
|
|
4736
|
+
f"- Organize information in whatever format best serves that objective\n"
|
|
4737
|
+
f"- Remove redundancy and consolidate related points\n"
|
|
4738
|
+
f"- Use markdown formatting for clarity\n\n"
|
|
4739
|
+
f"**DO NOT:**\n"
|
|
4740
|
+
f"- Provide a generic summary of the sections\n"
|
|
4741
|
+
f"- Describe what the sections contain\n"
|
|
4742
|
+
f"- Create an overview of the analysis process\n"
|
|
4743
|
+
f"- Change the task into something different\n\n"
|
|
4744
|
+
f"Remember: The user asked for '{synthesis_objective}' - deliver exactly that."
|
|
4593
4745
|
)
|
|
4594
4746
|
|
|
4595
4747
|
try:
|
|
4596
4748
|
final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
|
|
4597
4749
|
if debug:
|
|
4598
|
-
print(f"🔧 DEBUG: Final synthesis
|
|
4750
|
+
print(f"🔧 DEBUG: Final synthesis: {len(final_answer):,} characters")
|
|
4599
4751
|
if streaming_callback:
|
|
4600
|
-
streaming_callback("Final synthesis complete
|
|
4752
|
+
streaming_callback("Final synthesis complete", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
|
|
4601
4753
|
return final_answer.strip()
|
|
4602
4754
|
|
|
4603
4755
|
except Exception as e:
|
|
4604
|
-
error_msg = f"
|
|
4605
|
-
if debug:
|
|
4756
|
+
error_msg = f"Synthesis failed: {str(e)}. Returning scratchpad."
|
|
4757
|
+
if debug:
|
|
4758
|
+
print(f"🔧 DEBUG: ❌ {error_msg}")
|
|
4606
4759
|
|
|
4607
|
-
# Fallback to returning the organized scratchpad
|
|
4608
4760
|
organized_scratchpad = (
|
|
4609
4761
|
f"# Analysis Summary\n\n"
|
|
4610
|
-
f"*Note:
|
|
4611
|
-
f"## Collected Sections\n\n"
|
|
4612
|
-
f"{combined_scratchpad}"
|
|
4762
|
+
f"*Note: Final synthesis failed. Raw analysis sections below.*\n\n"
|
|
4763
|
+
f"## Collected Sections\n\n{combined_scratchpad}"
|
|
4613
4764
|
)
|
|
4614
4765
|
return organized_scratchpad
|
|
4766
|
+
|
|
4615
4767
|
|
|
4616
4768
|
|
|
4617
4769
|
def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
|