lollms-client 1.6.7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_agentic.py +4 -2
- lollms_client/lollms_core.py +263 -138
- lollms_client/lollms_discussion.py +15 -2
- lollms_client/lollms_stt_binding.py +59 -3
- lollms_client/lollms_tti_binding.py +3 -1
- lollms_client/lollms_ttm_binding.py +3 -1
- lollms_client/lollms_tts_binding.py +2 -2
- lollms_client/lollms_ttv_binding.py +3 -1
- lollms_client/stt_bindings/whisper/__init__.py +20 -12
- lollms_client/stt_bindings/whispercpp/__init__.py +7 -1
- lollms_client/tti_bindings/diffusers/__init__.py +9 -10
- lollms_client/tti_bindings/diffusers/server/main.py +10 -59
- lollms_client/tti_bindings/gemini/__init__.py +4 -1
- lollms_client/tti_bindings/leonardo_ai/__init__.py +5 -2
- lollms_client/tti_bindings/lollms/__init__.py +4 -1
- lollms_client/tti_bindings/novita_ai/__init__.py +4 -1
- lollms_client/tti_bindings/openai/__init__.py +10 -11
- lollms_client/tti_bindings/stability_ai/__init__.py +4 -2
- lollms_client/ttm_bindings/audiocraft/__init__.py +7 -12
- lollms_client/ttm_bindings/beatoven_ai/__init__.py +7 -3
- lollms_client/ttm_bindings/lollms/__init__.py +4 -17
- lollms_client/ttm_bindings/replicate/__init__.py +7 -4
- lollms_client/ttm_bindings/stability_ai/__init__.py +7 -4
- lollms_client/ttm_bindings/topmediai/__init__.py +6 -3
- lollms_client/tts_bindings/bark/__init__.py +7 -10
- lollms_client/tts_bindings/piper_tts/__init__.py +7 -10
- lollms_client/tts_bindings/xtts/__init__.py +8 -8
- {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/METADATA +1 -1
- {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/RECORD +33 -33
- {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/WHEEL +0 -0
- {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.6.7.dist-info → lollms_client-1.7.0.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -91,21 +91,6 @@ class LollmsClient():
|
|
|
91
91
|
stt_binding_config (Optional[Dict]): Additional config for the STT binding.
|
|
92
92
|
ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
|
|
93
93
|
ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
|
|
94
|
-
service_key (Optional[str]): Shared authentication key or client_id.
|
|
95
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
96
|
-
ctx_size (Optional[int]): Default context size for LLM.
|
|
97
|
-
n_predict (Optional[int]): Default max tokens for LLM.
|
|
98
|
-
stream (bool): Default streaming mode for LLM.
|
|
99
|
-
temperature (float): Default temperature for LLM.
|
|
100
|
-
top_k (int): Default top_k for LLM.
|
|
101
|
-
top_p (float): Default top_p for LLM.
|
|
102
|
-
repeat_penalty (float): Default repeat penalty for LLM.
|
|
103
|
-
repeat_last_n (int): Default repeat last n for LLM.
|
|
104
|
-
seed (Optional[int]): Default seed for LLM.
|
|
105
|
-
n_threads (int): Default threads for LLM.
|
|
106
|
-
streaming_callback (Optional[Callable]): Default streaming callback for LLM.
|
|
107
|
-
user_name (str): Default user name for prompts.
|
|
108
|
-
ai_name (str): Default AI name for prompts.
|
|
109
94
|
|
|
110
95
|
Raises:
|
|
111
96
|
ValueError: If the primary LLM binding cannot be created.
|
|
@@ -160,93 +145,119 @@ class LollmsClient():
|
|
|
160
145
|
except Exception as e:
|
|
161
146
|
trace_exception(e)
|
|
162
147
|
ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
|
|
148
|
+
self.tts = None
|
|
163
149
|
|
|
164
150
|
if tti_binding_name:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
151
|
+
try:
|
|
152
|
+
if tti_binding_config:
|
|
153
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
154
|
+
binding_name=tti_binding_name,
|
|
155
|
+
**{
|
|
156
|
+
k: v
|
|
157
|
+
for k, v in (tti_binding_config or {}).items()
|
|
158
|
+
if k != "binding_name"
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
163
|
+
binding_name=tti_binding_name
|
|
164
|
+
)
|
|
165
|
+
if self.tti is None:
|
|
166
|
+
ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
|
|
167
|
+
except Exception as e:
|
|
168
|
+
trace_exception(e)
|
|
169
|
+
ASCIIColors.warning(f"Exception occurred while creating TTI binding: {str(e)}")
|
|
170
|
+
self.tti = None
|
|
171
|
+
|
|
181
172
|
if stt_binding_name:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
173
|
+
try:
|
|
174
|
+
if stt_binding_config:
|
|
175
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
176
|
+
binding_name=stt_binding_name,
|
|
177
|
+
**{
|
|
178
|
+
k: v
|
|
179
|
+
for k, v in (stt_binding_config or {}).items()
|
|
180
|
+
if k != "binding_name"
|
|
181
|
+
}
|
|
182
|
+
)
|
|
191
183
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
184
|
+
else:
|
|
185
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
186
|
+
binding_name=stt_binding_name,
|
|
187
|
+
)
|
|
188
|
+
if self.stt is None:
|
|
189
|
+
ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
|
|
190
|
+
except Exception as e:
|
|
191
|
+
trace_exception(e)
|
|
192
|
+
ASCIIColors.warning(f"Exception occurred while creating STT binding: {str(e)}")
|
|
193
|
+
self.stt = None
|
|
194
|
+
|
|
198
195
|
if ttv_binding_name:
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
196
|
+
try:
|
|
197
|
+
if ttv_binding_config:
|
|
198
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
199
|
+
binding_name=ttv_binding_name,
|
|
200
|
+
**{
|
|
201
|
+
k: v
|
|
202
|
+
for k, v in ttv_binding_config.items()
|
|
203
|
+
if k != "binding_name"
|
|
204
|
+
}
|
|
205
|
+
)
|
|
208
206
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
207
|
+
else:
|
|
208
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
209
|
+
binding_name=ttv_binding_name
|
|
210
|
+
)
|
|
211
|
+
if self.ttv is None:
|
|
212
|
+
ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
|
|
213
|
+
except Exception as e:
|
|
214
|
+
trace_exception(e)
|
|
215
|
+
ASCIIColors.warning(f"Exception occurred while creating TTV binding: {str(e)}")
|
|
216
|
+
self.ttv = None
|
|
215
217
|
|
|
216
218
|
if ttm_binding_name:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
219
|
+
try:
|
|
220
|
+
if ttm_binding_config:
|
|
221
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
222
|
+
binding_name=ttm_binding_name,
|
|
223
|
+
**{
|
|
224
|
+
k: v
|
|
225
|
+
for k, v in (ttm_binding_config or {}).items()
|
|
226
|
+
if k != "binding_name"
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
231
|
+
binding_name=ttm_binding_name
|
|
232
|
+
)
|
|
233
|
+
if self.ttm is None:
|
|
234
|
+
ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
|
|
235
|
+
except Exception as e:
|
|
236
|
+
trace_exception(e)
|
|
237
|
+
ASCIIColors.warning(f"Exception occurred while creating TTM binding: {str(e)}")
|
|
238
|
+
self.ttm = None
|
|
232
239
|
|
|
233
240
|
if mcp_binding_name:
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
241
|
+
try:
|
|
242
|
+
if mcp_binding_config:
|
|
243
|
+
self.mcp = self.mcp_binding_manager.create_binding(
|
|
244
|
+
binding_name=mcp_binding_name,
|
|
245
|
+
**{
|
|
246
|
+
k: v
|
|
247
|
+
for k, v in (mcp_binding_config or {}).items()
|
|
248
|
+
if k != "binding_name"
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
else:
|
|
252
|
+
self.mcp = self.mcp_binding_manager.create_binding(
|
|
253
|
+
mcp_binding_name
|
|
254
|
+
)
|
|
255
|
+
if self.mcp is None:
|
|
256
|
+
ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
|
|
257
|
+
except Exception as e:
|
|
258
|
+
trace_exception(e)
|
|
259
|
+
ASCIIColors.warning(f"Exception occurred while creating MCP binding: {str(e)}")
|
|
260
|
+
self.mcp = None
|
|
250
261
|
# --- Store Default Generation Parameters ---
|
|
251
262
|
|
|
252
263
|
# --- Prompt Formatting Attributes ---
|
|
@@ -1465,7 +1476,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1465
1476
|
return "🎨 Creating an image based on your request"
|
|
1466
1477
|
|
|
1467
1478
|
# Handle RAG (data store) tools by their pattern
|
|
1468
|
-
elif "
|
|
1479
|
+
elif "rag::" in tool_name:
|
|
1469
1480
|
# Extract the friendly name of the data source
|
|
1470
1481
|
source_name = tool_name.split("::")[-1].replace("_", " ").title()
|
|
1471
1482
|
return f"🔍 Searching {source_name} for relevant information"
|
|
@@ -1516,7 +1527,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1516
1527
|
final_answer_temperature=0.7
|
|
1517
1528
|
if rag_top_k is None:
|
|
1518
1529
|
rag_top_k=5
|
|
1519
|
-
|
|
1530
|
+
|
|
1531
|
+
tools_infos = []
|
|
1520
1532
|
def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
|
|
1521
1533
|
if not streaming_callback: return None
|
|
1522
1534
|
is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
|
|
@@ -1543,38 +1555,44 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1543
1555
|
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1544
1556
|
if isinstance(use_mcps, list):
|
|
1545
1557
|
filtered_tools = [t for t in mcp_tools if t["name"] in use_mcps]
|
|
1558
|
+
tools_infos+=[f" 🛠️{f['name']}" for f in filtered_tools]
|
|
1546
1559
|
all_discovered_tools.extend(filtered_tools)
|
|
1547
1560
|
log_event(f" ✅ Loaded {len(filtered_tools)} specific MCP tools: {', '.join(use_mcps)}", MSG_TYPE.MSG_TYPE_INFO)
|
|
1548
1561
|
elif use_mcps is True:
|
|
1562
|
+
tools_infos+=[f" 🛠️{f['name']}" for f in mcp_tools]
|
|
1549
1563
|
all_discovered_tools.extend(mcp_tools)
|
|
1550
1564
|
log_event(f" ✅ Loaded {len(mcp_tools)} MCP tools", MSG_TYPE.MSG_TYPE_INFO)
|
|
1551
1565
|
|
|
1552
1566
|
if use_data_store:
|
|
1553
1567
|
log_event(f" 📚 Setting up {len(use_data_store)} knowledge bases...", MSG_TYPE.MSG_TYPE_INFO)
|
|
1554
1568
|
for name, info in use_data_store.items():
|
|
1555
|
-
|
|
1569
|
+
ASCIIColors.info(f"use_data_store item:\n{name}\n{info}")
|
|
1570
|
+
tool_name, description, call_fn = f"rag::{name}", f"Queries the '{name}' knowledge base.", None
|
|
1556
1571
|
if callable(info): call_fn = info
|
|
1557
1572
|
elif isinstance(info, dict):
|
|
1558
1573
|
if "callable" in info and callable(info["callable"]): call_fn = info["callable"]
|
|
1559
|
-
description = info.get("description", description)
|
|
1574
|
+
description = info.get("description", "This is a datastore with the following description: \n" + description)
|
|
1560
1575
|
if call_fn:
|
|
1561
1576
|
visible_tools.append({"name": tool_name, "description": description, "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
|
|
1562
1577
|
rag_registry[tool_name] = call_fn
|
|
1563
1578
|
rag_tool_specs[tool_name] = {"default_top_k": rag_top_k, "default_min_sim": rag_min_similarity_percent}
|
|
1564
|
-
|
|
1565
|
-
|
|
1579
|
+
tools_infos.append(f" 📖 {name}")
|
|
1566
1580
|
visible_tools.extend(all_discovered_tools)
|
|
1567
1581
|
built_in_tools = [
|
|
1568
1582
|
{"name": "local_tools::final_answer", "description": "Provide the final answer directly to the user.", "input_schema": {}},
|
|
1569
1583
|
{"name": "local_tools::request_clarification", "description": "Ask the user for more specific information when the request is ambiguous.", "input_schema": {"type": "object", "properties": {"question": {"type": "string"}}, "required": ["question"]}},
|
|
1570
1584
|
{"name": "local_tools::revise_plan", "description": "Update the execution plan based on new discoveries or changing requirements.", "input_schema": {"type": "object", "properties": {"reason": {"type": "string"}, "new_plan": {"type": "array"}}, "required": ["reason", "new_plan"]}}
|
|
1571
1585
|
]
|
|
1586
|
+
tools_infos+=[f" 🔨 final_answer"," 🔨 request_clarification"," 🔨 revise_plan"]
|
|
1587
|
+
|
|
1588
|
+
|
|
1572
1589
|
if getattr(self, "tti", None):
|
|
1573
1590
|
built_in_tools.append({"name": "local_tools::generate_image", "description": "Generate an image from a text description.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"]}})
|
|
1574
1591
|
|
|
1575
1592
|
all_visible_tools = visible_tools + built_in_tools
|
|
1576
1593
|
tool_summary = "\n".join([f"- **{t['name']}**: {t['description']}" for t in all_visible_tools[:20]])
|
|
1577
1594
|
|
|
1595
|
+
log_event("\n".join(tools_infos), MSG_TYPE.MSG_TYPE_INFO)
|
|
1578
1596
|
log_event(f"✅ Ready with {len(all_visible_tools)} total capabilities", MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id, meta={"tool_count": len(all_visible_tools), "mcp_tools": len(all_discovered_tools), "rag_tools": len(rag_registry)})
|
|
1579
1597
|
|
|
1580
1598
|
# Enhanced triage with better prompting
|
|
@@ -1594,7 +1612,7 @@ AVAILABLE CAPABILITIES:
|
|
|
1594
1612
|
Based on the request complexity and available tools, choose the optimal strategy:
|
|
1595
1613
|
|
|
1596
1614
|
1. **DIRECT_ANSWER**: For simple greetings, basic questions, or requests that don't require any tools
|
|
1597
|
-
- Use when: The request can be fully answered with your existing knowledge
|
|
1615
|
+
- Use when: The request can be fully answered with your existing knowledge with confidence, and no tool seems to add any significant value to the answer
|
|
1598
1616
|
- Example: "Hello", "What is Python?", "Explain quantum physics"
|
|
1599
1617
|
|
|
1600
1618
|
2. **REQUEST_CLARIFICATION**: When the request is too vague or ambiguous
|
|
@@ -1612,16 +1630,14 @@ Based on the request complexity and available tools, choose the optimal strategy
|
|
|
1612
1630
|
Provide your analysis in JSON format:
|
|
1613
1631
|
{{"thought": "Detailed reasoning about the request complexity and requirements", "strategy": "ONE_OF_THE_FOUR_OPTIONS", "confidence": 0.8, "text_output": "Direct answer or clarification question if applicable", "required_tool_name": "specific tool name if SINGLE_TOOL strategy", "estimated_steps": 3}}"""
|
|
1614
1632
|
|
|
1615
|
-
log_prompt("Triage Prompt", triage_prompt)
|
|
1616
|
-
|
|
1617
1633
|
triage_schema = {
|
|
1618
1634
|
"thought": "string", "strategy": "string", "confidence": "number",
|
|
1619
1635
|
"text_output": "string", "required_tool_name": "string", "estimated_steps": "number"
|
|
1620
1636
|
}
|
|
1621
|
-
strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, **llm_generation_kwargs)
|
|
1637
|
+
strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, system_prompt=system_prompt, **llm_generation_kwargs)
|
|
1622
1638
|
strategy = strategy_data.get("strategy") if strategy_data else "COMPLEX_PLAN"
|
|
1623
1639
|
|
|
1624
|
-
log_event(f"Strategy analysis complete", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
1640
|
+
log_event(f"Strategy analysis complete.\n**confidence**: {strategy_data.get('confidence', 0.5)}\n**reasoning**: {strategy_data.get('thought', 'None')}", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
1625
1641
|
"strategy": strategy,
|
|
1626
1642
|
"confidence": strategy_data.get("confidence", 0.5),
|
|
1627
1643
|
"estimated_steps": strategy_data.get("estimated_steps", 1),
|
|
@@ -1760,7 +1776,7 @@ RESPONSE:"""
|
|
|
1760
1776
|
}
|
|
1761
1777
|
if tool_name in descriptions:
|
|
1762
1778
|
return descriptions[tool_name]
|
|
1763
|
-
if "
|
|
1779
|
+
if "rag::" in tool_name:
|
|
1764
1780
|
return f"🔍 Searching {tool_name.split('::')[-1]} knowledge base"
|
|
1765
1781
|
if requires_code:
|
|
1766
1782
|
return "💻 Processing code"
|
|
@@ -1829,7 +1845,7 @@ RESPONSE:"""
|
|
|
1829
1845
|
|
|
1830
1846
|
# Enhanced planning phase
|
|
1831
1847
|
planning_step_id = log_event_fn("📋 Creating adaptive execution plan...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1832
|
-
execution_plan = planner.decompose_task(original_user_prompt, context or "")
|
|
1848
|
+
execution_plan = planner.decompose_task(original_user_prompt, context or "", "\n".join([f"{tool['name']}:{tool['description']}" for tool in all_visible_tools]))
|
|
1833
1849
|
current_plan_version = 1
|
|
1834
1850
|
|
|
1835
1851
|
log_event_fn(f"Initial plan created with {len(execution_plan.tasks)} tasks", MSG_TYPE.MSG_TYPE_INFO, meta={
|
|
@@ -4309,27 +4325,29 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4309
4325
|
context_fill_percentage: float = 0.75,
|
|
4310
4326
|
overlap_tokens: int = 150,
|
|
4311
4327
|
expected_generation_tokens: int = 1500,
|
|
4312
|
-
max_scratchpad_tokens: int = 4000,
|
|
4313
|
-
scratchpad_compression_threshold: int = 3000,
|
|
4328
|
+
max_scratchpad_tokens: int = 4000,
|
|
4329
|
+
scratchpad_compression_threshold: int = 3000,
|
|
4314
4330
|
streaming_callback: Optional[Callable] = None,
|
|
4315
4331
|
return_scratchpad_only: bool = False,
|
|
4316
4332
|
debug: bool = True,
|
|
4333
|
+
ctx_size=None,
|
|
4317
4334
|
**kwargs
|
|
4318
4335
|
) -> str:
|
|
4319
4336
|
"""
|
|
4320
4337
|
Processes long text with FIXED chunk sizing and managed scratchpad growth.
|
|
4338
|
+
Now uses dynamic token calculation based on actual model tokenizer.
|
|
4321
4339
|
"""
|
|
4322
4340
|
|
|
4323
4341
|
if debug:
|
|
4324
4342
|
print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
|
|
4325
4343
|
|
|
4326
4344
|
# Validate context fill percentage
|
|
4327
|
-
if not (0.1 <= context_fill_percentage <= 0
|
|
4328
|
-
raise ValueError(f"context_fill_percentage must be between 0.1 and 0
|
|
4345
|
+
if not (0.1 <= context_fill_percentage <= 1.0):
|
|
4346
|
+
raise ValueError(f"context_fill_percentage must be between 0.1 and 1.0, got {context_fill_percentage}")
|
|
4329
4347
|
|
|
4330
4348
|
# Get context size
|
|
4331
4349
|
try:
|
|
4332
|
-
context_size = self.llm.get_context_size() or 8192
|
|
4350
|
+
context_size = ctx_size or self.llm.default_ctx_size or self.llm.get_context_size() or 8192
|
|
4333
4351
|
except:
|
|
4334
4352
|
context_size = 8192
|
|
4335
4353
|
|
|
@@ -4346,26 +4364,97 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4346
4364
|
print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
|
|
4347
4365
|
|
|
4348
4366
|
# ========================================
|
|
4349
|
-
#
|
|
4367
|
+
# ENHANCED: Dynamically calculate token sizes using actual tokenizer
|
|
4350
4368
|
# ========================================
|
|
4351
|
-
|
|
4352
|
-
|
|
4353
|
-
|
|
4369
|
+
|
|
4370
|
+
# Create template system prompt to measure its token size
|
|
4371
|
+
template_system_prompt = (
|
|
4372
|
+
f"You are a component in a multi-step text processing pipeline analyzing step 1 of 100.\n\n"
|
|
4373
|
+
f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
|
|
4374
|
+
f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
|
|
4375
|
+
f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
|
|
4376
|
+
f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
|
|
4377
|
+
)
|
|
4378
|
+
base_system_tokens = len(self.tokenize(template_system_prompt))
|
|
4379
|
+
|
|
4380
|
+
# Create MINIMAL template user prompt (structure only, without content placeholders)
|
|
4381
|
+
summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
|
|
4382
|
+
|
|
4383
|
+
# Measure only the structural overhead (headers, formatting, instructions)
|
|
4384
|
+
template_structure = (
|
|
4385
|
+
f"--- Global Objective ---\n{summarization_objective}\n\n"
|
|
4386
|
+
f"--- Progress ---\nStep 100/100 | 10 sections completed, 4000 tokens\n\n" # Worst-case progress text
|
|
4387
|
+
f"--- Existing Scratchpad (for context) ---\n"
|
|
4388
|
+
f"--- New Text Chunk ---\n"
|
|
4389
|
+
f"--- Instructions ---\n"
|
|
4390
|
+
f"Extract NEW key information from this chunk that aligns with the objective. "
|
|
4391
|
+
f"Be concise. Avoid repeating scratchpad content."
|
|
4392
|
+
)
|
|
4393
|
+
user_template_overhead = len(self.tokenize(template_structure))
|
|
4394
|
+
|
|
4395
|
+
if debug:
|
|
4396
|
+
print(f"🔧 DEBUG: Computed system prompt tokens: {base_system_tokens}")
|
|
4397
|
+
print(f"🔧 DEBUG: Computed user template overhead: {user_template_overhead}")
|
|
4398
|
+
print(f"🔧 DEBUG: (Note: Scratchpad and chunk content allocated separately)")
|
|
4399
|
+
|
|
4354
4400
|
# Reserve space for maximum expected scratchpad size
|
|
4355
4401
|
reserved_scratchpad_tokens = max_scratchpad_tokens
|
|
4356
|
-
|
|
4402
|
+
|
|
4357
4403
|
total_budget = int(context_size * context_fill_percentage)
|
|
4358
|
-
|
|
4359
|
-
|
|
4404
|
+
# Only count overhead, not the actual chunk/scratchpad content (that's reserved separately)
|
|
4405
|
+
used_tokens = base_system_tokens + user_template_overhead + reserved_scratchpad_tokens + expected_generation_tokens
|
|
4406
|
+
|
|
4360
4407
|
# FIXED chunk size - never changes during processing
|
|
4361
|
-
FIXED_CHUNK_SIZE = max(
|
|
4408
|
+
FIXED_CHUNK_SIZE = max(1024, int(total_budget - used_tokens))
|
|
4409
|
+
|
|
4362
4410
|
|
|
4363
4411
|
if debug:
|
|
4364
|
-
print(f"🔧 DEBUG:
|
|
4365
|
-
print(f"
|
|
4366
|
-
print(f"
|
|
4412
|
+
print(f"\n🔧 DEBUG: Token budget breakdown:")
|
|
4413
|
+
print(f" - Context size: {context_size} tokens")
|
|
4414
|
+
print(f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)")
|
|
4415
|
+
print(f" - Total budget: {total_budget} tokens")
|
|
4416
|
+
print(f" - System prompt: {base_system_tokens} tokens")
|
|
4417
|
+
print(f" - User template overhead: {user_template_overhead} tokens")
|
|
4418
|
+
print(f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens")
|
|
4419
|
+
print(f" - Expected generation: {expected_generation_tokens} tokens")
|
|
4420
|
+
print(f" - Total overhead: {used_tokens} tokens")
|
|
4421
|
+
print(f" - Remaining for chunks: {total_budget - used_tokens} tokens")
|
|
4422
|
+
print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens")
|
|
4423
|
+
|
|
4424
|
+
# Safety check
|
|
4425
|
+
if FIXED_CHUNK_SIZE == 1024:
|
|
4426
|
+
print(f"⚠️ WARNING: Chunk size is at minimum (1024)!")
|
|
4427
|
+
print(f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available")
|
|
4428
|
+
print(f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens")
|
|
4367
4429
|
|
|
4368
4430
|
if streaming_callback:
|
|
4431
|
+
streaming_callback(
|
|
4432
|
+
"\n".join([
|
|
4433
|
+
f"\n🔧 DEBUG: Token budget breakdown:",
|
|
4434
|
+
f" - Context size: {context_size} tokens",
|
|
4435
|
+
f" - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)",
|
|
4436
|
+
f" - Total budget: {total_budget} tokens",
|
|
4437
|
+
f" - System prompt: {base_system_tokens} tokens",
|
|
4438
|
+
f" - User template overhead: {user_template_overhead} tokens",
|
|
4439
|
+
f" - Reserved scratchpad: {reserved_scratchpad_tokens} tokens",
|
|
4440
|
+
f" - Expected generation: {expected_generation_tokens} tokens",
|
|
4441
|
+
f" - Total overhead: {used_tokens} tokens",
|
|
4442
|
+
f" - Remaining for chunks: {total_budget - used_tokens} tokens",
|
|
4443
|
+
f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens"
|
|
4444
|
+
]
|
|
4445
|
+
),
|
|
4446
|
+
MSG_TYPE.MSG_TYPE_STEP
|
|
4447
|
+
)
|
|
4448
|
+
if FIXED_CHUNK_SIZE == 1024:
|
|
4449
|
+
streaming_callback(
|
|
4450
|
+
"\n".join([
|
|
4451
|
+
f"⚠️ WARNING: Chunk size is at minimum (1024)!",
|
|
4452
|
+
f"⚠️ Budget exhausted: {used_tokens} used / {total_budget} available",
|
|
4453
|
+
f"⚠️ Consider reducing max_scratchpad_tokens or expected_generation_tokens"
|
|
4454
|
+
]
|
|
4455
|
+
),
|
|
4456
|
+
MSG_TYPE.MSG_TYPE_STEP
|
|
4457
|
+
)
|
|
4369
4458
|
streaming_callback(
|
|
4370
4459
|
f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
|
|
4371
4460
|
MSG_TYPE.MSG_TYPE_STEP,
|
|
@@ -4416,7 +4505,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4416
4505
|
print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
|
|
4417
4506
|
|
|
4418
4507
|
# ========================================
|
|
4419
|
-
# NEW: Scratchpad compression helper
|
|
4508
|
+
# NEW: Scratchpad compression helper with dynamic token counting
|
|
4420
4509
|
# ========================================
|
|
4421
4510
|
def compress_scratchpad(scratchpad_sections: list) -> list:
|
|
4422
4511
|
"""Compress scratchpad when it gets too large"""
|
|
@@ -4424,7 +4513,8 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4424
4513
|
return scratchpad_sections
|
|
4425
4514
|
|
|
4426
4515
|
combined = "\n\n---\n\n".join(scratchpad_sections)
|
|
4427
|
-
|
|
4516
|
+
# ENHANCED: Use actual tokenizer to count
|
|
4517
|
+
current_size = len(self.tokenize(combined))
|
|
4428
4518
|
|
|
4429
4519
|
if current_size <= scratchpad_compression_threshold:
|
|
4430
4520
|
return scratchpad_sections
|
|
@@ -4448,7 +4538,8 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4448
4538
|
)
|
|
4449
4539
|
|
|
4450
4540
|
if debug:
|
|
4451
|
-
|
|
4541
|
+
# ENHANCED: Use actual tokenizer
|
|
4542
|
+
compressed_size = len(self.tokenize(compressed))
|
|
4452
4543
|
print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
|
|
4453
4544
|
|
|
4454
4545
|
return [compressed]
|
|
@@ -4479,16 +4570,16 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4479
4570
|
{"step": step_number, "total_steps": total_steps, "progress": progress}
|
|
4480
4571
|
)
|
|
4481
4572
|
|
|
4482
|
-
# Check and compress scratchpad
|
|
4573
|
+
# ENHANCED: Check and compress scratchpad with actual token counting
|
|
4483
4574
|
current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4484
|
-
scratchpad_size = len(
|
|
4575
|
+
scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
|
|
4485
4576
|
|
|
4486
4577
|
if scratchpad_size > scratchpad_compression_threshold:
|
|
4487
4578
|
if debug:
|
|
4488
4579
|
print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
|
|
4489
4580
|
chunk_summaries = compress_scratchpad(chunk_summaries)
|
|
4490
4581
|
current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
|
|
4491
|
-
scratchpad_size = len(
|
|
4582
|
+
scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
|
|
4492
4583
|
|
|
4493
4584
|
try:
|
|
4494
4585
|
system_prompt = (
|
|
@@ -4512,8 +4603,15 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4512
4603
|
f"Be concise. Avoid repeating scratchpad content."
|
|
4513
4604
|
)
|
|
4514
4605
|
|
|
4606
|
+
# ENHANCED: Compute actual prompt size
|
|
4607
|
+
actual_prompt_tokens = len(self.tokenize(user_prompt))
|
|
4608
|
+
actual_system_tokens = len(self.tokenize(system_prompt))
|
|
4609
|
+
|
|
4515
4610
|
if debug:
|
|
4516
|
-
print(f"🔧 DEBUG:
|
|
4611
|
+
print(f"🔧 DEBUG: Actual prompt tokens: {actual_prompt_tokens}")
|
|
4612
|
+
print(f"🔧 DEBUG: Actual system tokens: {actual_system_tokens}")
|
|
4613
|
+
print(f"🔧 DEBUG: Total input tokens: {actual_prompt_tokens + actual_system_tokens}")
|
|
4614
|
+
print(f"🔧 DEBUG: Scratchpad: {scratchpad_size} tokens")
|
|
4517
4615
|
|
|
4518
4616
|
chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
|
|
4519
4617
|
|
|
@@ -4589,7 +4687,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4589
4687
|
streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
|
|
4590
4688
|
return final_scratchpad.strip()
|
|
4591
4689
|
|
|
4592
|
-
# Final synthesis
|
|
4690
|
+
# Final synthesis with STRONG objective reinforcement
|
|
4593
4691
|
if streaming_callback:
|
|
4594
4692
|
streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
|
|
4595
4693
|
|
|
@@ -4603,20 +4701,47 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
4603
4701
|
synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
|
|
4604
4702
|
|
|
4605
4703
|
if debug:
|
|
4606
|
-
|
|
4704
|
+
final_scratchpad_tokens = len(self.tokenize(combined_scratchpad))
|
|
4705
|
+
print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {final_scratchpad_tokens} tokens, {len(chunk_summaries)} sections")
|
|
4607
4706
|
|
|
4707
|
+
# ENHANCED: Strong objective-focused synthesis
|
|
4608
4708
|
synthesis_system_prompt = (
|
|
4609
|
-
"You are
|
|
4610
|
-
"
|
|
4611
|
-
"
|
|
4709
|
+
f"You are completing a multi-step text processing task. "
|
|
4710
|
+
f"Your role is to take analysis sections and produce the FINAL OUTPUT that directly fulfills the user's original objective.\n\n"
|
|
4711
|
+
f"**CRITICAL:** Your output must DIRECTLY ADDRESS the user's objective, NOT just summarize the sections. "
|
|
4712
|
+
f"The sections are intermediate work - transform them into the final deliverable the user requested."
|
|
4612
4713
|
)
|
|
4613
4714
|
|
|
4715
|
+
# ENHANCED: Explicit task reinforcement with examples of what NOT to do
|
|
4716
|
+
task_type_hint = ""
|
|
4717
|
+
if contextual_prompt:
|
|
4718
|
+
lower_prompt = contextual_prompt.lower()
|
|
4719
|
+
if any(word in lower_prompt for word in ['extract', 'list', 'identify', 'find']):
|
|
4720
|
+
task_type_hint = "\n**Task Type:** This is an EXTRACTION/IDENTIFICATION task. Provide a structured list or catalog of items found, NOT a narrative summary."
|
|
4721
|
+
elif any(word in lower_prompt for word in ['analyze', 'evaluate', 'assess', 'examine']):
|
|
4722
|
+
task_type_hint = "\n**Task Type:** This is an ANALYSIS task. Provide insights, patterns, and evaluations, NOT just a description of content."
|
|
4723
|
+
elif any(word in lower_prompt for word in ['compare', 'contrast', 'difference']):
|
|
4724
|
+
task_type_hint = "\n**Task Type:** This is a COMPARISON task. Highlight similarities and differences, NOT separate summaries."
|
|
4725
|
+
elif any(word in lower_prompt for word in ['answer', 'question', 'explain why', 'how does']):
|
|
4726
|
+
task_type_hint = "\n**Task Type:** This is a QUESTION-ANSWERING task. Provide a direct answer, NOT a general overview."
|
|
4727
|
+
|
|
4614
4728
|
synthesis_user_prompt = (
|
|
4615
|
-
f"
|
|
4616
|
-
f"
|
|
4617
|
-
f"
|
|
4618
|
-
f"
|
|
4619
|
-
f"
|
|
4729
|
+
f"=== ORIGINAL USER OBJECTIVE (MOST IMPORTANT) ===\n{synthesis_objective}\n"
|
|
4730
|
+
f"{task_type_hint}\n\n"
|
|
4731
|
+
f"=== ANALYSIS SECTIONS (Raw Working Material) ===\n{combined_scratchpad}\n\n"
|
|
4732
|
+
f"=== YOUR TASK ===\n"
|
|
4733
|
+
f"Transform the analysis sections above into a final output that DIRECTLY FULFILLS the original objective.\n\n"
|
|
4734
|
+
f"**DO:**\n"
|
|
4735
|
+
f"- Focus exclusively on satisfying the user's original objective stated above\n"
|
|
4736
|
+
f"- Organize information in whatever format best serves that objective\n"
|
|
4737
|
+
f"- Remove redundancy and consolidate related points\n"
|
|
4738
|
+
f"- Use markdown formatting for clarity\n\n"
|
|
4739
|
+
f"**DO NOT:**\n"
|
|
4740
|
+
f"- Provide a generic summary of the sections\n"
|
|
4741
|
+
f"- Describe what the sections contain\n"
|
|
4742
|
+
f"- Create an overview of the analysis process\n"
|
|
4743
|
+
f"- Change the task into something different\n\n"
|
|
4744
|
+
f"Remember: The user asked for '{synthesis_objective}' - deliver exactly that."
|
|
4620
4745
|
)
|
|
4621
4746
|
|
|
4622
4747
|
try:
|
|
@@ -1208,14 +1208,27 @@ class LollmsDiscussion:
|
|
|
1208
1208
|
prompt_for_agent = self.export("markdown", branch_tip_id if branch_tip_id else self.active_branch_id)
|
|
1209
1209
|
if debug:
|
|
1210
1210
|
ASCIIColors.cyan("\n" + "="*50 + "\n--- DEBUG: AGENTIC TURN TRIGGERED ---\n" + f"--- PROMPT FOR AGENT (from discussion history) ---\n{prompt_for_agent}\n" + "="*50 + "\n")
|
|
1211
|
-
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
# Combine system prompt and data zones
|
|
1214
|
+
system_prompt_part = (self._system_prompt or "").strip()
|
|
1215
|
+
data_zone_part = self.get_full_data_zone() # This now returns a clean, multi-part block or an empty string
|
|
1216
|
+
full_system_prompt = ""
|
|
1217
|
+
|
|
1218
|
+
# Combine them intelligently
|
|
1219
|
+
if system_prompt_part and data_zone_part:
|
|
1220
|
+
full_system_prompt = f"{system_prompt_part}\n\n{data_zone_part}"
|
|
1221
|
+
elif system_prompt_part:
|
|
1222
|
+
full_system_prompt = system_prompt_part
|
|
1223
|
+
else:
|
|
1224
|
+
full_system_prompt = data_zone_part
|
|
1212
1225
|
agent_result = self.lollmsClient.generate_with_mcp_rag(
|
|
1213
1226
|
prompt=prompt_for_agent,
|
|
1214
1227
|
use_mcps=effective_use_mcps,
|
|
1215
1228
|
use_data_store=use_data_store,
|
|
1216
1229
|
max_reasoning_steps=max_reasoning_steps,
|
|
1217
1230
|
images=images,
|
|
1218
|
-
system_prompt =
|
|
1231
|
+
system_prompt = full_system_prompt,
|
|
1219
1232
|
debug=debug,
|
|
1220
1233
|
**kwargs
|
|
1221
1234
|
)
|