lollms-client 0.20.7__py3-none-any.whl → 0.20.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_core.py +760 -407
- lollms_client/lollms_discussion.py +251 -344
- lollms_client/lollms_types.py +3 -3
- lollms_client/lollms_utilities.py +97 -0
- lollms_client/mcp_bindings/remote_mcp/__init__.py +2 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.9.dist-info}/METADATA +1 -1
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.9.dist-info}/RECORD +11 -11
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.9.dist-info}/WHEEL +0 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.9.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.9.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import requests
|
|
3
3
|
from ascii_colors import ASCIIColors, trace_exception
|
|
4
4
|
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
5
|
-
from lollms_client.lollms_utilities import
|
|
5
|
+
from lollms_client.lollms_utilities import robust_json_parser # Keep utilities needed by core
|
|
6
6
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
|
|
7
7
|
# Import new Abstract Base Classes and Managers
|
|
8
8
|
from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
|
|
@@ -597,336 +597,327 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
597
597
|
response_full += response
|
|
598
598
|
codes = self.extract_code_blocks(response, format=code_tag_format)
|
|
599
599
|
return codes
|
|
600
|
+
|
|
601
|
+
def _synthesize_knowledge(
|
|
602
|
+
self,
|
|
603
|
+
previous_scratchpad: str,
|
|
604
|
+
tool_name: str,
|
|
605
|
+
tool_params: dict,
|
|
606
|
+
tool_result: dict
|
|
607
|
+
) -> str:
|
|
608
|
+
"""
|
|
609
|
+
A dedicated LLM call to interpret a tool's output and update the knowledge scratchpad.
|
|
610
|
+
"""
|
|
611
|
+
synthesis_prompt = (
|
|
612
|
+
"You are a data analyst assistant. Your sole job is to interpret the output of a tool and integrate it into the existing research summary (knowledge scratchpad).\n\n"
|
|
613
|
+
"--- PREVIOUS KNOWLEDGE SCRATCHPAD ---\n"
|
|
614
|
+
f"{previous_scratchpad}\n\n"
|
|
615
|
+
"--- ACTION JUST TAKEN ---\n"
|
|
616
|
+
f"Tool Called: `{tool_name}`\n"
|
|
617
|
+
f"Parameters: {json.dumps(tool_params)}\n\n"
|
|
618
|
+
"--- RAW TOOL OUTPUT ---\n"
|
|
619
|
+
f"```json\n{json.dumps(tool_result, indent=2)}\n```\n\n"
|
|
620
|
+
"--- YOUR TASK ---\n"
|
|
621
|
+
"Read the 'RAW TOOL OUTPUT' and explain what it means in plain language. Then, integrate this new information with the 'PREVIOUS KNOWLEDGE SCRATCHPAD' to create a new, complete, and self-contained summary.\n"
|
|
622
|
+
"Your output should be ONLY the text of the new scratchpad, with no extra commentary or formatting.\n\n"
|
|
623
|
+
"--- NEW KNOWLEDGE SCRATCHPAD ---\n"
|
|
624
|
+
)
|
|
625
|
+
new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
|
|
626
|
+
return self.remove_thinking_blocks(new_scratchpad_text).strip()
|
|
627
|
+
|
|
628
|
+
def _build_final_decision_prompt(
|
|
629
|
+
self,
|
|
630
|
+
formatted_tools_list: str,
|
|
631
|
+
formatted_conversation_history: str,
|
|
632
|
+
current_plan: str,
|
|
633
|
+
knowledge_scratchpad: str,
|
|
634
|
+
agent_work_history_str: str,
|
|
635
|
+
ctx_size: Optional[int],
|
|
636
|
+
) -> str:
|
|
637
|
+
"""
|
|
638
|
+
Builds the decision prompt with explicit state-checking instructions to prevent loops.
|
|
639
|
+
"""
|
|
640
|
+
final_agent_history = agent_work_history_str
|
|
641
|
+
|
|
642
|
+
if ctx_size:
|
|
643
|
+
get_token_count = len
|
|
644
|
+
static_parts_text = (
|
|
645
|
+
"You are a task-oriented AI assistant. Your goal is to execute a plan step-by-step without repeating work.\n\n"
|
|
646
|
+
"--- AVAILABLE TOOLS ---\n"
|
|
647
|
+
f"{formatted_tools_list}\n\n"
|
|
648
|
+
"--- CONVERSATION HISTORY ---\n"
|
|
649
|
+
f"{formatted_conversation_history}\n\n"
|
|
650
|
+
"--- CUMULATIVE KNOWLEDGE (What you know so far) ---\n"
|
|
651
|
+
f"{knowledge_scratchpad}\n\n"
|
|
652
|
+
"--- THE OVERALL PLAN ---\n"
|
|
653
|
+
f"{current_plan}\n\n"
|
|
654
|
+
"--- ACTIONS TAKEN THIS TURN ---\n"
|
|
655
|
+
"\n\n" # Empty history for size calculation
|
|
656
|
+
"--- YOUR TASK: STATE-DRIVEN EXECUTION ---\n"
|
|
657
|
+
"1. **Identify the next step:** Look at 'THE OVERALL PLAN' and identify the very next incomplete step.\n"
|
|
658
|
+
"2. **Check your knowledge:** Look at the 'CUMULATIVE KNOWLEDGE'. Have you already performed this step and recorded the result? For example, if the step is 'search for papers', check if the search results are already in the knowledge base.\n"
|
|
659
|
+
"3. **Decide your action:**\n"
|
|
660
|
+
" - **If the step is NOT DONE:** Your action is `call_tool` to execute it.\n"
|
|
661
|
+
" - **If the step IS ALREADY DONE:** Your job is to update the plan by removing the completed step. Then, re-evaluate from step 1 with the *new, shorter plan*.\n"
|
|
662
|
+
" - **If ALL steps are done:** Your action is `final_answer`.\n"
|
|
663
|
+
" - **If you are blocked:** Your action is `clarify`.\n\n"
|
|
664
|
+
"--- OUTPUT FORMAT ---\n"
|
|
665
|
+
"Respond with a single JSON object inside a ```json markdown tag.\n"
|
|
666
|
+
"```json\n{\n"
|
|
667
|
+
' "thought": "My explicit reasoning. First, I will state the next step from the plan. Second, I will check the cumulative knowledge to see if this step is already complete. Third, I will state my conclusion and chosen action based on that comparison.",\n'
|
|
668
|
+
' "updated_plan": "The new, remaining plan. It is CRITICAL that you remove any step that you have confirmed is complete in your thought process.",\n'
|
|
669
|
+
' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
|
|
670
|
+
' "action_details": {\n'
|
|
671
|
+
' "tool_name": "(Required if action is \'call_tool\') The tool for the CURRENT incomplete step.",\n'
|
|
672
|
+
' "tool_params": {},\n'
|
|
673
|
+
' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
|
|
674
|
+
" }\n}\n```"
|
|
675
|
+
)
|
|
676
|
+
fixed_parts_size = get_token_count(static_parts_text)
|
|
677
|
+
available_space_for_history = ctx_size - fixed_parts_size - 100
|
|
678
|
+
if get_token_count(agent_work_history_str) > available_space_for_history:
|
|
679
|
+
if available_space_for_history > 0:
|
|
680
|
+
truncation_point = len(agent_work_history_str) - available_space_for_history
|
|
681
|
+
final_agent_history = ("[...history truncated due to context size...]\n" + agent_work_history_str[truncation_point:])
|
|
682
|
+
ASCIIColors.warning("Agent history was truncated to fit the context window.")
|
|
683
|
+
else:
|
|
684
|
+
final_agent_history = "[...history truncated due to context size...]"
|
|
685
|
+
|
|
686
|
+
return (
|
|
687
|
+
"You are a task-oriented AI assistant. Your goal is to execute a plan step-by-step without repeating work.\n\n"
|
|
688
|
+
"--- AVAILABLE TOOLS ---\n"
|
|
689
|
+
f"{formatted_tools_list}\n\n"
|
|
690
|
+
"--- CONVERSATION HISTORY ---\n"
|
|
691
|
+
f"{formatted_conversation_history}\n\n"
|
|
692
|
+
"--- CUMULATIVE KNOWLEDGE (What you know so far) ---\n"
|
|
693
|
+
f"{knowledge_scratchpad}\n\n"
|
|
694
|
+
"--- THE OVERALL PLAN ---\n"
|
|
695
|
+
f"{current_plan}\n\n"
|
|
696
|
+
"--- ACTIONS TAKEN THIS TURN ---\n"
|
|
697
|
+
f"{final_agent_history}\n\n"
|
|
698
|
+
"--- YOUR TASK: STATE-DRIVEN EXECUTION ---\n"
|
|
699
|
+
"1. **Identify the next step:** Look at 'THE OVERALL PLAN' and identify the very next incomplete step.\n"
|
|
700
|
+
"2. **Check your knowledge:** Look at the 'CUMULATIVE KNOWLEDGE'. Have you already performed this step and recorded the result? For example, if the step is 'search for papers', check if the search results are already in the knowledge base.\n"
|
|
701
|
+
"3. **Decide your action:**\n"
|
|
702
|
+
" - **If the step is NOT DONE:** Your action is `call_tool` to execute it.\n"
|
|
703
|
+
" - **If the step IS ALREADY DONE:** Your job is to update the plan by removing the completed step. Then, re-evaluate from step 1 with the *new, shorter plan*.\n"
|
|
704
|
+
" - **If ALL steps are done:** Your action is `final_answer`.\n"
|
|
705
|
+
" - **If you are blocked:** Your action is `clarify`.\n\n"
|
|
706
|
+
"--- OUTPUT FORMAT ---\n"
|
|
707
|
+
"Respond with a single JSON object inside a ```json markdown tag.\n"
|
|
708
|
+
"```json\n"
|
|
709
|
+
"{\n"
|
|
710
|
+
' "thought": "My explicit reasoning. First, I will state the next step from the plan. Second, I will check the cumulative knowledge to see if this step is already complete. Third, I will state my conclusion and chosen action based on that comparison.",\n'
|
|
711
|
+
' "updated_plan": "The new, remaining plan. It is CRITICAL that you remove any step that you have confirmed is complete in your thought process.",\n'
|
|
712
|
+
' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
|
|
713
|
+
' "action_details": {\n'
|
|
714
|
+
' "tool_name": "(Required if action is \'call_tool\') The tool for the CURRENT incomplete step.",\n'
|
|
715
|
+
' "tool_params": {},\n'
|
|
716
|
+
' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
|
|
717
|
+
" }\n"
|
|
718
|
+
"}\n"
|
|
719
|
+
"```"
|
|
720
|
+
)
|
|
721
|
+
|
|
600
722
|
|
|
601
|
-
# --- Function Calling with MCP ---
|
|
602
723
|
def generate_with_mcp(
|
|
603
724
|
self,
|
|
604
725
|
prompt: str,
|
|
605
|
-
|
|
726
|
+
system_prompt:str = None,
|
|
727
|
+
objective_extraction_system_prompt="Build a plan",
|
|
606
728
|
images: Optional[List[str]] = None,
|
|
607
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
729
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
608
730
|
max_tool_calls: int = 5,
|
|
609
|
-
max_llm_iterations: int = 10,
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
731
|
+
max_llm_iterations: int = 10,
|
|
732
|
+
ctx_size: Optional[int] = None,
|
|
733
|
+
max_json_retries: int = 1,
|
|
734
|
+
tool_call_decision_temperature: float = 0.0,
|
|
735
|
+
final_answer_temperature: float = None,
|
|
736
|
+
streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
|
|
614
737
|
**llm_generation_kwargs
|
|
615
738
|
) -> Dict[str, Any]:
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
Args:
|
|
620
|
-
prompt (str): The user's initial prompt.
|
|
621
|
-
discussion_history (Optional[List[Dict[str, str]]]): Previous turns of conversation.
|
|
622
|
-
images (Optional[List[str]]): Images provided with the current user prompt.
|
|
623
|
-
tools (Optional[List[Dict[str, Any]]]): A list of MCP tool definitions available for this call.
|
|
624
|
-
If None, tools will be discovered from the MCP binding.
|
|
625
|
-
max_tool_calls (int): Maximum number of distinct tool calls allowed in one interaction turn.
|
|
626
|
-
max_llm_iterations (int): Maximum number of times the LLM can decide to call a tool
|
|
627
|
-
before being forced to generate a final answer.
|
|
628
|
-
tool_call_decision_temperature (float): Temperature for LLM when deciding on tool calls.
|
|
629
|
-
final_answer_temperature (float): Temperature for LLM when generating the final answer.
|
|
630
|
-
streaming_callback (Optional[Callable]): Callback for streaming LLM responses (tool decisions/final answer).
|
|
631
|
-
Signature: (chunk_str, msg_type, metadata_dict, history_list_of_dicts_for_this_turn) -> bool
|
|
632
|
-
interactive_tool_execution (bool): If True, ask user for confirmation before executing each tool.
|
|
739
|
+
if not self.binding or not self.mcp:
|
|
740
|
+
return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
|
|
633
741
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
- "final_answer" (str): The LLM's final textual answer.
|
|
637
|
-
- "tool_calls" (List[Dict]): A list of tools called, their params, and results.
|
|
638
|
-
- "error" (Optional[str]): Error message if something went wrong.
|
|
639
|
-
"""
|
|
640
|
-
if not self.binding:
|
|
641
|
-
return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
|
|
642
|
-
if not self.mcp:
|
|
643
|
-
return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
|
|
742
|
+
turn_history: List[Dict[str, Any]] = []
|
|
743
|
+
conversation_context = prompt
|
|
644
744
|
|
|
645
|
-
turn_history: List[Dict[str, Any]] = [] # Tracks this specific turn's interactions (LLM thoughts, tool calls, tool results)
|
|
646
|
-
|
|
647
|
-
# 1. Discover tools if not provided
|
|
648
745
|
if tools is None:
|
|
649
746
|
try:
|
|
650
747
|
tools = self.mcp.discover_tools(force_refresh=True)
|
|
651
|
-
if not tools:
|
|
652
|
-
ASCIIColors.warning("No MCP tools discovered by the binding.")
|
|
748
|
+
if not tools: ASCIIColors.warning("No MCP tools discovered.")
|
|
653
749
|
except Exception as e_disc:
|
|
654
750
|
return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
|
|
655
|
-
|
|
656
|
-
if not tools: # If still no tools after discovery attempt
|
|
657
|
-
ASCIIColors.info("No tools available for function calling. Generating direct response.")
|
|
658
|
-
final_answer = self.remove_thinking_blocks(self.generate_text(
|
|
659
|
-
prompt=prompt,
|
|
660
|
-
system_prompt= (discussion_history[0]['content'] if discussion_history and discussion_history[0]['role'] == 'system' else "") + "\nYou are a helpful assistant.", # Basic system prompt
|
|
661
|
-
images=images,
|
|
662
|
-
stream=streaming_callback is not None, # stream if callback is provided
|
|
663
|
-
streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, None, turn_history) if streaming_callback else None, # Adapt callback
|
|
664
|
-
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
665
|
-
**(llm_generation_kwargs or {})
|
|
666
|
-
))
|
|
667
|
-
if isinstance(final_answer, dict) and "error" in final_answer: # Handle generation error
|
|
668
|
-
return {"final_answer": "", "tool_calls": [], "error": final_answer["error"]}
|
|
669
|
-
return {"final_answer": final_answer, "tool_calls": [], "error": None}
|
|
670
751
|
|
|
752
|
+
if not tools:
|
|
753
|
+
final_answer_text = self.generate_text(prompt=prompt, system_prompt=system_prompt, stream=streaming_callback is not None, streaming_callback=streaming_callback)
|
|
754
|
+
return {"final_answer": self.remove_thinking_blocks(final_answer_text), "tool_calls": [], "error": None}
|
|
671
755
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
756
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
757
|
+
agent_work_history = []
|
|
758
|
+
formatted_tools_list = "\n".join([f"- Tool: {t.get('name')}\n Description: {t.get('description')}\n Schema: {json.dumps(t.get('input_schema'))}" for t in tools])
|
|
759
|
+
|
|
760
|
+
if streaming_callback:
|
|
761
|
+
streaming_callback("Building/Revising plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "plan_extraction"}, turn_history)
|
|
762
|
+
|
|
763
|
+
obj_prompt = (
|
|
764
|
+
"You are an Intelligent Workflow Planner. Your mission is to create the most efficient plan possible by analyzing the user's request within the context of the full conversation.\n\n"
|
|
765
|
+
"Your Guiding Principle: **Always choose the path of least resistance.**\n\n"
|
|
766
|
+
"**Your Logical Process:**\n"
|
|
767
|
+
"1. **Analyze the Entire Conversation:** Understand the user's ultimate goal based on all interaction so far.\n"
|
|
768
|
+
"2. **Check for a Single-Step Solution:** Scrutinize the available tools. Can a single tool call directly achieve the user's current goal? \n"
|
|
769
|
+
"3. **Formulate a Plan:** Based on your analysis, create a concise, numbered list of steps to achieve the goal. If the goal is simple, this may be only one step. If it is complex or multi-turn, it may be several steps.\n\n"
|
|
770
|
+
"**CRITICAL RULES:**\n"
|
|
771
|
+
"* **MANDATORY: NEVER add steps the user did not ask for.** Do not embellish or add 'nice-to-have' features.\n"
|
|
772
|
+
"* **Focus on the Goal:** Your plan should directly address the user's request as it stands now in the conversation.\n\n"
|
|
773
|
+
"---\n"
|
|
774
|
+
"**Available Tools:**\n"
|
|
775
|
+
f"{formatted_tools_list}\n\n"
|
|
776
|
+
"**Full Conversation History:**\n"
|
|
777
|
+
f'"{conversation_context}"'
|
|
778
|
+
)
|
|
779
|
+
initial_plan_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
780
|
+
current_plan = self.remove_thinking_blocks(initial_plan_gen).strip()
|
|
687
781
|
|
|
782
|
+
if streaming_callback:
|
|
783
|
+
streaming_callback(f"Current plan:\n{current_plan}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "plan_extraction"}, turn_history)
|
|
784
|
+
turn_history.append({"type": "initial_plan", "content": current_plan})
|
|
785
|
+
|
|
688
786
|
tool_calls_made_this_turn = []
|
|
689
787
|
llm_iterations = 0
|
|
690
788
|
|
|
691
789
|
while llm_iterations < max_llm_iterations:
|
|
692
790
|
llm_iterations += 1
|
|
791
|
+
if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
693
792
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
# Add tool execution results from previous iterations in this turn to the history string
|
|
706
|
-
for tc_info in tool_calls_made_this_turn:
|
|
707
|
-
if tc_info.get("result"): # Only add if there's a result (successful or error)
|
|
708
|
-
history_str += f"{self.ai_full_header}(Executed tool '{tc_info['name']}' with params {tc_info['params']}. Result: {json.dumps(tc_info['result'])})\n"
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
decision_prompt_template = f"""You are an AI assistant that can use tools to answer user requests.
|
|
712
|
-
Available tools:
|
|
713
|
-
{formatted_tools_list}
|
|
714
|
-
|
|
715
|
-
Current conversation:
|
|
716
|
-
{history_str}
|
|
717
|
-
|
|
718
|
-
Based on the available tools and the current conversation, decide the next step.
|
|
719
|
-
Respond with a JSON object containing ONE of the following structures:
|
|
720
|
-
1. If you need to use a tool:
|
|
721
|
-
{{"action": "call_tool", "tool_name": "<name_of_tool_to_call>", "tool_params": {{<parameters_for_tool_as_json_object>}}}}
|
|
722
|
-
2. If you can answer directly without using a tool OR if you have sufficient information from previous tool calls:
|
|
723
|
-
{{"action": "final_answer"}}
|
|
724
|
-
3. If the user's request is unclear or you need more information before deciding:
|
|
725
|
-
{{"action": "clarify", "clarification_request": "<your_question_to_the_user>"}}
|
|
726
|
-
""" # No {self.ai_full_header} here, generate_code will get raw JSON
|
|
727
|
-
|
|
728
|
-
if streaming_callback:
|
|
729
|
-
streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
|
|
730
|
-
|
|
731
|
-
# Use generate_code to get structured JSON output from LLM
|
|
732
|
-
# Note: generate_code itself uses generate_text. We are asking for JSON here.
|
|
733
|
-
raw_llm_decision_json = self.generate_text(
|
|
734
|
-
prompt=decision_prompt_template, # This is the full prompt for the LLM
|
|
735
|
-
n_predict=512, # Reasonable size for decision JSON
|
|
736
|
-
temperature=tool_call_decision_temperature,
|
|
737
|
-
images=images
|
|
738
|
-
# `images` are part of the history_str if relevant to the binding
|
|
739
|
-
# streaming_callback=None, # Decisions are usually not streamed chunk by chunk
|
|
793
|
+
formatted_agent_history = "No actions taken yet in this turn."
|
|
794
|
+
if agent_work_history:
|
|
795
|
+
history_parts = [ f"### Step {i+1}:\n**Thought:** {entry['thought']}\n**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n**Observation (Tool Output):**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```" for i, entry in enumerate(agent_work_history)]
|
|
796
|
+
formatted_agent_history = "\n\n".join(history_parts)
|
|
797
|
+
|
|
798
|
+
llm_decision = None
|
|
799
|
+
current_decision_prompt = self._build_final_decision_prompt(
|
|
800
|
+
formatted_tools_list=formatted_tools_list, formatted_conversation_history=conversation_context,
|
|
801
|
+
current_plan=current_plan, knowledge_scratchpad=knowledge_scratchpad,
|
|
802
|
+
agent_work_history_str=formatted_agent_history, ctx_size=ctx_size
|
|
740
803
|
)
|
|
741
|
-
if streaming_callback:
|
|
742
|
-
streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
|
|
743
|
-
|
|
744
804
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
turn_history.append({"type": "error", "content": "LLM failed to provide a decision."})
|
|
748
|
-
return {"final_answer": "I'm sorry, I encountered an issue trying to process your request.", "tool_calls": tool_calls_made_this_turn, "error": "LLM decision JSON was empty."}
|
|
749
|
-
|
|
750
|
-
processed_raw_json = raw_llm_decision_json.strip() # Strip whitespace first
|
|
751
|
-
try:
|
|
752
|
-
llm_decision = json.loads(processed_raw_json)
|
|
753
|
-
turn_history.append({"type": "llm_decision", "content": llm_decision})
|
|
754
|
-
except json.JSONDecodeError:
|
|
755
|
-
ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}")
|
|
805
|
+
for i in range(max_json_retries + 1):
|
|
806
|
+
raw_llm_decision_json = self.generate_text(prompt=current_decision_prompt, n_predict=2048, temperature=tool_call_decision_temperature)
|
|
756
807
|
try:
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
"
|
|
773
|
-
"
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
808
|
+
llm_decision = robust_json_parser(raw_llm_decision_json)
|
|
809
|
+
if "action" not in llm_decision or "action_details" not in llm_decision or "updated_plan" not in llm_decision:
|
|
810
|
+
raise KeyError("The JSON is missing required keys: 'action', 'action_details', or 'updated_plan'.")
|
|
811
|
+
break
|
|
812
|
+
except (json.JSONDecodeError, AttributeError, KeyError) as e:
|
|
813
|
+
error_message = f"JSON parsing failed (Attempt {i+1}/{max_json_retries+1}). Error: {e}"
|
|
814
|
+
ASCIIColors.warning(error_message)
|
|
815
|
+
if streaming_callback: streaming_callback(error_message, MSG_TYPE.MSG_TYPE_WARNING, None, turn_history)
|
|
816
|
+
turn_history.append({"type": "error", "content": f"Invalid JSON response: {raw_llm_decision_json}"})
|
|
817
|
+
if i >= max_json_retries:
|
|
818
|
+
ASCIIColors.error("Max JSON retries reached. Aborting agent loop.")
|
|
819
|
+
llm_decision = None
|
|
820
|
+
break
|
|
821
|
+
current_decision_prompt = (
|
|
822
|
+
"You previously failed to generate a valid JSON object. Review the error and your last output, then try again, adhering strictly to the required schema.\n\n"
|
|
823
|
+
"--- ERROR ---\n"
|
|
824
|
+
f"{str(e)}\n\n"
|
|
825
|
+
"--- YOUR PREVIOUS (INVALID) OUTPUT ---\n"
|
|
826
|
+
f"{raw_llm_decision_json}\n\n"
|
|
827
|
+
"--- REQUIRED SCHEMA REMINDER ---\n"
|
|
828
|
+
"Your response MUST be a single JSON object inside a ```json markdown tag. It must contain 'action', 'action_details', and 'updated_plan' keys.\n\n"
|
|
829
|
+
"Now, please re-generate the JSON response correctly."
|
|
830
|
+
)
|
|
831
|
+
if not llm_decision: break
|
|
832
|
+
|
|
833
|
+
turn_history.append({"type": "llm_decision", "content": llm_decision})
|
|
834
|
+
current_plan = llm_decision.get("updated_plan", current_plan)
|
|
780
835
|
action = llm_decision.get("action")
|
|
836
|
+
action_details = llm_decision.get("action_details", {})
|
|
837
|
+
if streaming_callback: streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
|
|
781
838
|
|
|
782
839
|
if action == "call_tool":
|
|
783
840
|
if len(tool_calls_made_this_turn) >= max_tool_calls:
|
|
784
|
-
ASCIIColors.warning("
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
tool_name
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
if not tool_name:
|
|
792
|
-
ASCIIColors.warning("LLM decided to call a tool but didn't specify tool_name.")
|
|
793
|
-
current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
|
|
794
|
-
break # Or ask LLM to try again without this faulty decision in history
|
|
795
|
-
|
|
796
|
-
tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
|
|
797
|
-
turn_history.append(tool_call_info)
|
|
798
|
-
if streaming_callback:
|
|
799
|
-
streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
|
|
800
|
-
streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
|
|
801
|
-
|
|
802
|
-
# Interactive execution if enabled
|
|
803
|
-
if interactive_tool_execution:
|
|
804
|
-
try:
|
|
805
|
-
user_confirmation = input(f"AI wants to execute tool '{tool_name}' with params {tool_params}. Allow? (yes/no/details): ").lower()
|
|
806
|
-
if user_confirmation == "details":
|
|
807
|
-
tool_def_for_details = next((t for t in tools if t.get("name") == tool_name), None)
|
|
808
|
-
print(f"Tool details: {json.dumps(tool_def_for_details, indent=2)}")
|
|
809
|
-
user_confirmation = input(f"Allow execution of '{tool_name}'? (yes/no): ").lower()
|
|
810
|
-
|
|
811
|
-
if user_confirmation != "yes":
|
|
812
|
-
ASCIIColors.info("Tool execution cancelled by user.")
|
|
813
|
-
tool_result = {"error": "Tool execution cancelled by user."}
|
|
814
|
-
# Add this info to conversation for LLM
|
|
815
|
-
current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' execution was cancelled by the user. What should I do next?)"})
|
|
816
|
-
tool_call_info["result"] = tool_result # Record cancellation
|
|
817
|
-
tool_calls_made_this_turn.append(tool_call_info)
|
|
818
|
-
continue # Back to LLM for next decision
|
|
819
|
-
except Exception as e_input: # Catch issues with input() e.g. in non-interactive env
|
|
820
|
-
ASCIIColors.warning(f"Error during interactive confirmation: {e_input}. Proceeding without confirmation.")
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
if streaming_callback:
|
|
824
|
-
streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
|
|
841
|
+
ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
|
|
842
|
+
break
|
|
843
|
+
tool_name = action_details.get("tool_name")
|
|
844
|
+
tool_params = action_details.get("tool_params", {})
|
|
845
|
+
if not tool_name or not isinstance(tool_params, dict):
|
|
846
|
+
ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
|
|
847
|
+
break
|
|
825
848
|
|
|
849
|
+
if streaming_callback: streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
826
850
|
tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
|
|
827
|
-
|
|
828
|
-
tool_call_info["result"] = tool_result # Add result to this call's info
|
|
829
|
-
tool_calls_made_this_turn.append(tool_call_info) # Log the completed call
|
|
830
851
|
if streaming_callback:
|
|
831
|
-
|
|
852
|
+
streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
853
|
+
streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
|
|
832
854
|
|
|
855
|
+
if streaming_callback: streaming_callback("Synthesizing new knowledge...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"synthesis_step_{llm_iterations}"}, turn_history)
|
|
856
|
+
new_scratchpad = self._synthesize_knowledge(previous_scratchpad=knowledge_scratchpad, tool_name=tool_name, tool_params=tool_params, tool_result=tool_result)
|
|
857
|
+
knowledge_scratchpad = new_scratchpad
|
|
833
858
|
if streaming_callback:
|
|
834
|
-
streaming_callback(f"
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
859
|
+
streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"synthesis_step_{llm_iterations}"}, turn_history)
|
|
860
|
+
streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id": "scratchpad_update"}, turn_history)
|
|
861
|
+
|
|
862
|
+
work_entry = { "thought": llm_decision.get("thought", "N/A"), "tool_name": tool_name, "tool_params": tool_params, "tool_result": tool_result, "synthesized_knowledge": knowledge_scratchpad }
|
|
863
|
+
agent_work_history.append(work_entry)
|
|
864
|
+
tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
865
|
+
|
|
842
866
|
elif action == "clarify":
|
|
843
|
-
clarification_request =
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
turn_history.append({"type":"clarification_request_sent", "content": clarification_request})
|
|
847
|
-
return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
848
|
-
|
|
867
|
+
clarification_request = action_details.get("clarification_request", "I need more information to proceed. Could you please clarify?")
|
|
868
|
+
return { "final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True }
|
|
869
|
+
|
|
849
870
|
elif action == "final_answer":
|
|
850
871
|
ASCIIColors.info("LLM decided to formulate a final answer.")
|
|
851
|
-
|
|
852
|
-
break # Exit loop to generate final answer
|
|
872
|
+
break
|
|
853
873
|
|
|
854
874
|
else:
|
|
855
|
-
ASCIIColors.warning(f"LLM returned unknown action: {action}")
|
|
856
|
-
current_conversation.append({"role":"assistant", "content":f"(Received an unexpected decision: {action}. I will try to answer directly.)"})
|
|
857
|
-
break # Exit loop
|
|
858
|
-
|
|
859
|
-
# Safety break if too many iterations without reaching final answer or max_tool_calls
|
|
860
|
-
if llm_iterations >= max_llm_iterations:
|
|
861
|
-
ASCIIColors.warning("Max LLM iterations reached. Forcing final answer.")
|
|
862
|
-
current_conversation.append({"role":"assistant", "content":"(Max iterations reached. I will now try to formulate an answer.)"})
|
|
875
|
+
ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
|
|
863
876
|
break
|
|
877
|
+
|
|
878
|
+
if streaming_callback:
|
|
879
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
880
|
+
|
|
881
|
+
if streaming_callback:
|
|
882
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
883
|
+
if streaming_callback:
|
|
884
|
+
streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
|
|
864
885
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
if msg["role"] == "system":
|
|
877
|
-
final_system_prompt += msg["content"] + "\n"
|
|
878
|
-
else:
|
|
879
|
-
interim_history_for_final_answer.append(msg)
|
|
880
|
-
|
|
881
|
-
if not any(msg['role'] == 'user' for msg in interim_history_for_final_answer): # Ensure there's a user turn if only system + tool calls
|
|
882
|
-
interim_history_for_final_answer.append({'role':'user', 'content': prompt}) # Add original prompt if lost
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
# The generate_text method needs a single prompt and an optional system_prompt.
|
|
886
|
-
# We need to format the interim_history_for_final_answer into a single prompt string,
|
|
887
|
-
# or modify generate_text to accept a list of messages.
|
|
888
|
-
# For now, flatten to string:
|
|
889
|
-
current_prompt_for_final_answer = ""
|
|
890
|
-
for i, msg in enumerate(interim_history_for_final_answer):
|
|
891
|
-
role_prefix = self.user_custom_header(msg["role"]) if msg["role"]=="user" else self.ai_custom_header(msg["role"]) if msg["role"]=="assistant" else f"!@>{msg['role']}:"
|
|
892
|
-
current_prompt_for_final_answer += f"{role_prefix}{msg['content']}"
|
|
893
|
-
if i < len(interim_history_for_final_answer) -1 : # Add newline separator except for last
|
|
894
|
-
current_prompt_for_final_answer += "\n"
|
|
895
|
-
# Add AI header to prompt AI to speak
|
|
896
|
-
current_prompt_for_final_answer += f"\n{self.ai_full_header}"
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
final_answer_text = self.generate_text(
|
|
900
|
-
prompt=current_prompt_for_final_answer, # Pass the conversation history as the prompt
|
|
901
|
-
system_prompt=final_system_prompt.strip(),
|
|
902
|
-
images=images if not tool_calls_made_this_turn else None, # Only pass initial images if no tool calls happened (context might be lost)
|
|
903
|
-
stream=streaming_callback is not None,
|
|
904
|
-
streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, {"type":"final_answer_chunk"}, turn_history) if streaming_callback else None,
|
|
905
|
-
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
906
|
-
**(llm_generation_kwargs or {})
|
|
886
|
+
final_answer_prompt = (
|
|
887
|
+
"You are an AI assistant tasked with providing a final, comprehensive answer to the user based on the research performed.\n\n"
|
|
888
|
+
"--- FULL CONVERSATION CONTEXT ---\n"
|
|
889
|
+
f"{conversation_context}\n\n"
|
|
890
|
+
"--- SUMMARY OF FINDINGS (Your Knowledge Scratchpad) ---\n"
|
|
891
|
+
f"{knowledge_scratchpad}\n\n"
|
|
892
|
+
"--- INSTRUCTIONS ---\n"
|
|
893
|
+
"- Synthesize a clear and complete answer for the user based ONLY on the information in the 'Summary of Findings'.\n"
|
|
894
|
+
"- Address the user directly and answer their latest query, considering the full conversation.\n"
|
|
895
|
+
"- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
|
|
896
|
+
"- Format your response clearly using markdown where appropriate.\n"
|
|
907
897
|
)
|
|
898
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
|
|
899
|
+
|
|
900
|
+
if streaming_callback:
|
|
901
|
+
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
|
|
908
902
|
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
turn_history.append({"type":"error", "content":f"LLM failed to generate final answer: {final_answer_text['error']}"})
|
|
914
|
-
return {"final_answer": "", "tool_calls": tool_calls_made_this_turn, "error": final_answer_text["error"]}
|
|
903
|
+
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
904
|
+
turn_history.append({"type":"final_answer_generated", "content": final_answer})
|
|
905
|
+
|
|
906
|
+
return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
915
907
|
|
|
916
|
-
turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
|
|
917
|
-
return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
918
908
|
|
|
919
909
|
def generate_text_with_rag(
|
|
920
910
|
self,
|
|
921
911
|
prompt: str,
|
|
922
912
|
rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
|
|
913
|
+
system_prompt: str = "",
|
|
914
|
+
objective_extraction_system_prompt="Extract objectives",
|
|
923
915
|
rag_query_text: Optional[str] = None,
|
|
924
916
|
rag_vectorizer_name: Optional[str] = None,
|
|
925
917
|
rag_top_k: int = 5,
|
|
926
918
|
rag_min_similarity_percent: float = 70.0,
|
|
927
|
-
max_rag_hops: int =
|
|
919
|
+
max_rag_hops: int = 3,
|
|
928
920
|
images: Optional[List[str]] = None,
|
|
929
|
-
system_prompt: str = "",
|
|
930
921
|
n_predict: Optional[int] = None,
|
|
931
922
|
stream: Optional[bool] = None,
|
|
932
923
|
temperature: Optional[float] = None,
|
|
@@ -943,13 +934,11 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
943
934
|
**llm_generation_kwargs
|
|
944
935
|
) -> Dict[str, Any]:
|
|
945
936
|
"""
|
|
946
|
-
Enhanced RAG with
|
|
947
|
-
when context grows beyond ctx_size or self.default_ctx_size.
|
|
937
|
+
Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
|
|
948
938
|
"""
|
|
949
939
|
if not self.binding:
|
|
950
940
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
951
941
|
|
|
952
|
-
# Determine effective context size limit
|
|
953
942
|
effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
|
|
954
943
|
|
|
955
944
|
turn_rag_history_for_callback: List[Dict[str, Any]] = []
|
|
@@ -957,183 +946,251 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
957
946
|
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
|
|
958
947
|
|
|
959
948
|
original_user_prompt = prompt
|
|
960
|
-
|
|
961
|
-
|
|
949
|
+
|
|
950
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
951
|
+
current_objectives = ""
|
|
952
|
+
|
|
962
953
|
if extract_objectives:
|
|
963
954
|
if streaming_callback:
|
|
964
|
-
streaming_callback("Extracting
|
|
955
|
+
streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
|
|
956
|
+
|
|
965
957
|
obj_prompt = (
|
|
966
|
-
"You are an expert analyst. "
|
|
967
|
-
"
|
|
968
|
-
"Output a
|
|
958
|
+
"You are an expert analyst. Your task is to extract and structure the key research objectives from the user's request below. "
|
|
959
|
+
"These objectives will guide a research process. Frame them as questions or tasks. "
|
|
960
|
+
"Output a bulleted list of objectives only without a comment.\n\n"
|
|
969
961
|
f"User request:\n\"{original_user_prompt}\""
|
|
970
962
|
)
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
temperature=0.0,
|
|
975
|
-
n_predict=200,
|
|
976
|
-
stream=False
|
|
977
|
-
)
|
|
978
|
-
objectives_text = self.remove_thinking_blocks(obj_gen).strip()
|
|
963
|
+
initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
964
|
+
current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
|
|
965
|
+
|
|
979
966
|
if streaming_callback:
|
|
980
|
-
streaming_callback(f"Objectives
|
|
967
|
+
streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
|
|
968
|
+
else:
|
|
969
|
+
current_objectives = f"Answer the user's request: '{original_user_prompt}'"
|
|
981
970
|
|
|
982
|
-
|
|
983
|
-
|
|
971
|
+
if streaming_callback:
|
|
972
|
+
streaming_callback("Generating initial search query...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
|
|
973
|
+
if not rag_query_text:
|
|
974
|
+
initial_query_gen_prompt = f"""
|
|
975
|
+
You are a research assistant. Your task is to formulate the first search query for a vector database based on an initial user request and research objectives. The query should be concise and target the most crucial information needed to start.
|
|
984
976
|
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
991
|
-
txt_previous_queries = f"Previous queries:\n"+'\n'.join(previous_queries)+"\n\n" if len(previous_queries)>0 else ""
|
|
992
|
-
txt_informations = f"Information:\n"+'\n'.join([f"(from {chunk['document']}):{chunk['content']}" for _, chunk in all_unique_retrieved_chunks_map.items()]) if len(all_unique_retrieved_chunks_map)>0 else "This is the first request. No data received yet. Build a new query."
|
|
993
|
-
txt_sp = (
|
|
994
|
-
"Your objective is to analyze the provided chunks of information to determine "
|
|
995
|
-
"whether they are sufficient to reach the objective. If not, formulate a refined and focused query "
|
|
996
|
-
"that can retrieve more relevant information from a vector database. Ensure the query captures the semantic essence "
|
|
997
|
-
"of what is missing, is contextually independent, and is optimized for vector-based similarity search. "
|
|
998
|
-
"Do not repeat or rephrase earlier queries—always generate a new, meaningful atomic query targeting the current gap in knowledge."
|
|
999
|
-
)
|
|
977
|
+
--- User's Request ---
|
|
978
|
+
{original_user_prompt}
|
|
979
|
+
|
|
980
|
+
--- Initial Research Objectives ---
|
|
981
|
+
{current_objectives}
|
|
1000
982
|
|
|
1001
|
-
|
|
983
|
+
--- INSTRUCTIONS ---
|
|
984
|
+
Generate a single, effective search query.
|
|
985
|
+
|
|
986
|
+
--- OUTPUT FORMAT ---
|
|
987
|
+
Provide your response as a single JSON object with one key, "query".
|
|
1002
988
|
```json
|
|
1003
|
-
{
|
|
1004
|
-
"
|
|
1005
|
-
|
|
1006
|
-
It should capture the missing concept or insight in concise, context-rich language, avoiding reuse of earlier queries.
|
|
1007
|
-
}
|
|
989
|
+
{{
|
|
990
|
+
"query": "Your generated search query here."
|
|
991
|
+
}}
|
|
1008
992
|
```
|
|
1009
993
|
"""
|
|
1010
|
-
p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
|
|
1011
|
-
response = self.generate_code(p,system_prompt=txt_sp)
|
|
1012
994
|
try:
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
995
|
+
raw_initial_query_response = self.generate_code(initial_query_gen_prompt, system_prompt="You are a query generation expert.", temperature=0.0)
|
|
996
|
+
initial_plan = json.loads(raw_initial_query_response)
|
|
997
|
+
current_query_for_rag = initial_plan.get("query")
|
|
998
|
+
if not current_query_for_rag:
|
|
999
|
+
raise ValueError("LLM returned an empty initial query.")
|
|
1000
|
+
if streaming_callback:
|
|
1001
|
+
streaming_callback(f"Initial query generated:\n'{current_query_for_rag}'", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
|
|
1002
|
+
except Exception as e:
|
|
1003
|
+
trace_exception(e)
|
|
1004
|
+
current_query_for_rag = original_user_prompt
|
|
1005
|
+
if streaming_callback:
|
|
1006
|
+
streaming_callback(f"Failed to generate initial query, falling back to user prompt. Error: {e}", MSG_TYPE.MSG_TYPE_WARNING, {"id": "initial_query_failure"}, turn_rag_history_for_callback)
|
|
1007
|
+
else:
|
|
1008
|
+
current_query_for_rag=rag_query_text
|
|
1009
|
+
|
|
1010
|
+
previous_queries = []
|
|
1024
1011
|
|
|
1012
|
+
for hop_count in range(max_rag_hops):
|
|
1025
1013
|
if streaming_callback:
|
|
1026
|
-
streaming_callback(f"
|
|
1027
|
-
|
|
1028
|
-
|
|
1014
|
+
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1015
|
+
|
|
1016
|
+
if streaming_callback:
|
|
1017
|
+
streaming_callback(f"Executing Query:\n{current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query_exec_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1018
|
+
|
|
1029
1019
|
try:
|
|
1030
|
-
|
|
1020
|
+
retrieved_chunks = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
1031
1021
|
except Exception as e:
|
|
1032
1022
|
trace_exception(e)
|
|
1033
1023
|
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
|
|
1034
1024
|
|
|
1035
1025
|
hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
|
|
1036
1026
|
previous_queries.append(current_query_for_rag)
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1027
|
+
|
|
1028
|
+
newly_retrieved_text = ""
|
|
1029
|
+
new_chunks_count = 0
|
|
1030
|
+
if retrieved_chunks:
|
|
1031
|
+
for chunk in retrieved_chunks:
|
|
1032
|
+
doc = chunk.get("file_path", "Unknown")
|
|
1033
|
+
content = str(chunk.get("chunk_text", ""))
|
|
1034
|
+
sim = float(chunk.get("similarity_percent", 0.0))
|
|
1035
|
+
detail = {"document": doc, "similarity": sim, "content": content, "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
|
|
1036
|
+
hop_details["retrieved_chunks_details"].append(detail)
|
|
1037
|
+
|
|
1038
|
+
key = f"{doc}::{content[:100]}"
|
|
1039
|
+
if key not in all_unique_retrieved_chunks_map:
|
|
1040
|
+
all_unique_retrieved_chunks_map[key] = detail
|
|
1041
|
+
newly_retrieved_text += f"--- Document: {doc} (Similarity: {sim:.1f}%)\n{content}\n---\n"
|
|
1042
|
+
new_chunks_count += 1
|
|
1043
|
+
|
|
1044
|
+
hop_details["status"] = f"Completed, found {len(retrieved_chunks)} chunks ({new_chunks_count} new)."
|
|
1054
1045
|
rag_hops_details_list.append(hop_details)
|
|
1046
|
+
|
|
1055
1047
|
if streaming_callback:
|
|
1056
|
-
streaming_callback(f"
|
|
1048
|
+
streaming_callback(f"Retrieved {len(retrieved_chunks)} chunks ({new_chunks_count} new).", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retrieval_info_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1049
|
+
|
|
1050
|
+
if new_chunks_count == 0 and hop_count > 0:
|
|
1051
|
+
if streaming_callback:
|
|
1052
|
+
streaming_callback("No new unique information found, stopping RAG hops.", MSG_TYPE.MSG_TYPE_INFO, {"id": "rag_stop_no_new_info"}, turn_rag_history_for_callback)
|
|
1053
|
+
break
|
|
1057
1054
|
|
|
1058
1055
|
if streaming_callback:
|
|
1059
|
-
streaming_callback(
|
|
1056
|
+
streaming_callback("Analyzing findings and refining plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1057
|
+
|
|
1058
|
+
planning_system_prompt = (
|
|
1059
|
+
"You are a strategic research agent via multiple hops. Your task is to analyze new information, update your "
|
|
1060
|
+
"understanding, refine your research objectives, and decide on the next best action."
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
planning_prompt = f"""
|
|
1064
|
+
--- Original User Request ---
|
|
1065
|
+
{original_user_prompt}
|
|
1066
|
+
|
|
1067
|
+
--- Objectives you have formulated ---
|
|
1068
|
+
{current_objectives}
|
|
1069
|
+
|
|
1070
|
+
--- Existing Knowledge Scratchpad (Summary of previous findings) ---
|
|
1071
|
+
{knowledge_scratchpad}
|
|
1072
|
+
|
|
1073
|
+
--- Newly Retrieved Documents for this Hop ---
|
|
1074
|
+
{newly_retrieved_text if newly_retrieved_text else "No new documents were found with the last query."}
|
|
1075
|
+
|
|
1076
|
+
--- Previous Queries (for reference, do not repeat) ---
|
|
1077
|
+
- {"- ".join(previous_queries)}
|
|
1060
1078
|
|
|
1079
|
+
--- INSTRUCTIONS ---
|
|
1080
|
+
1. **Analyze & Update Knowledge:** Read the 'Newly Retrieved Documents'. Summarize the most important new facts and insights into a few bullet points for the 'new_notes_for_scratchpad'.
|
|
1081
|
+
2. **Refine Objectives:** Review the 'Current Research Objectives'. Do the new documents answer any objectives? Do they reveal that some objectives need to be changed or made more specific? Rewrite the complete, updated list of objectives.
|
|
1082
|
+
3. **Decide & Plan Next Query:** Based on your updated objectives and knowledge, decide if you have enough information to form a final answer.
|
|
1083
|
+
- If YES, set `decision` to `false`.
|
|
1084
|
+
- If NO, set `decision` to `true` and formulate a new, focused `query` to address the most critical remaining gap in your knowledge. The query must be different from previous ones.
|
|
1061
1085
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1086
|
+
--- OUTPUT FORMAT ---
|
|
1087
|
+
Provide your response as a single JSON object inside a JSON markdown tag. Use this exact schema:
|
|
1088
|
+
```json
|
|
1089
|
+
{{
|
|
1090
|
+
"updated_objectives": "(string) A bulleted list of the new, refined objectives based on the latest information.",
|
|
1091
|
+
"new_notes_for_scratchpad": "(string) A concise summary in bullet points of key findings from the new documents.",
|
|
1092
|
+
"decision": "boolean (true if you need to query again, false if you are done).",
|
|
1093
|
+
"query": "(string, optional) The next query for the vector database if decision is true."
|
|
1094
|
+
}}
|
|
1095
|
+
```
|
|
1096
|
+
"""
|
|
1097
|
+
raw_planning_response = self.generate_code(planning_prompt, system_prompt=planning_system_prompt, temperature=0.0)
|
|
1098
|
+
|
|
1099
|
+
try:
|
|
1100
|
+
plan = robust_json_parser(raw_planning_response)
|
|
1101
|
+
|
|
1102
|
+
raw_notes = plan.get("new_notes_for_scratchpad")
|
|
1103
|
+
if isinstance(raw_notes, list):
|
|
1104
|
+
notes_from_hop = "\n".join(str(item) for item in raw_notes if item).strip()
|
|
1105
|
+
elif isinstance(raw_notes, str):
|
|
1106
|
+
notes_from_hop = raw_notes.strip()
|
|
1107
|
+
else:
|
|
1108
|
+
notes_from_hop = ""
|
|
1109
|
+
|
|
1110
|
+
if notes_from_hop:
|
|
1111
|
+
if knowledge_scratchpad == "No information gathered yet.":
|
|
1112
|
+
knowledge_scratchpad = f"Findings from Hop {hop_count + 1}:\n{notes_from_hop}"
|
|
1113
|
+
else:
|
|
1114
|
+
knowledge_scratchpad += f"\n\nFindings from Hop {hop_count + 1}:\n{notes_from_hop}"
|
|
1115
|
+
|
|
1116
|
+
raw_objectives = plan.get("updated_objectives")
|
|
1117
|
+
if isinstance(raw_objectives, list):
|
|
1118
|
+
current_objectives = "\n".join(str(item) for item in raw_objectives if item).strip()
|
|
1119
|
+
elif isinstance(raw_objectives, str) and raw_objectives.strip():
|
|
1120
|
+
current_objectives = raw_objectives.strip()
|
|
1121
|
+
|
|
1122
|
+
if streaming_callback:
|
|
1123
|
+
streaming_callback(f"Refined Objectives:\n{current_objectives}\n\nNew Learnings:\n{notes_from_hop}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"planning_output_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1124
|
+
|
|
1125
|
+
if not plan.get("decision", False):
|
|
1126
|
+
if streaming_callback:
|
|
1127
|
+
streaming_callback("LLM decided it has enough information.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1128
|
+
break
|
|
1129
|
+
else:
|
|
1130
|
+
next_query = plan.get("query")
|
|
1131
|
+
if not next_query:
|
|
1132
|
+
if streaming_callback:
|
|
1133
|
+
streaming_callback("LLM decided to continue but provided no query. Stopping.", MSG_TYPE.MSG_TYPE_WARNING, {"id": "rag_stop_no_query"}, turn_rag_history_for_callback)
|
|
1134
|
+
break
|
|
1135
|
+
current_query_for_rag = next_query
|
|
1136
|
+
|
|
1137
|
+
except Exception as ex:
|
|
1138
|
+
trace_exception(ex)
|
|
1139
|
+
if streaming_callback:
|
|
1140
|
+
streaming_callback(f"Error processing planning step: {ex}. Stopping RAG.", MSG_TYPE.MSG_TYPE_EXCEPTION, {"id": f"planning_error_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1141
|
+
break
|
|
1142
|
+
|
|
1143
|
+
if streaming_callback:
|
|
1144
|
+
streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1145
|
+
|
|
1146
|
+
sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(), key=lambda c: c["similarity"], reverse=True)
|
|
1065
1147
|
context_lines = []
|
|
1066
1148
|
total_chars = 0
|
|
1067
1149
|
for c in sorted_chunks:
|
|
1068
|
-
snippet = (
|
|
1069
|
-
|
|
1070
|
-
f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
|
|
1071
|
-
f"{c['content']}\n---\n"
|
|
1072
|
-
)
|
|
1073
|
-
if total_chars + len(snippet) > max_rag_context_characters:
|
|
1074
|
-
break
|
|
1150
|
+
snippet = (f"Source: {c['document']} (Sim: {c['similarity']:.1f}%)\n{c['content']}\n---\n")
|
|
1151
|
+
if total_chars + len(snippet) > max_rag_context_characters: break
|
|
1075
1152
|
context_lines.append(snippet)
|
|
1076
1153
|
total_chars += len(snippet)
|
|
1077
|
-
|
|
1078
1154
|
accumulated_context = "".join(context_lines)
|
|
1079
1155
|
|
|
1080
|
-
# If context exceeds our effective limit, summarize it
|
|
1081
1156
|
if self.count_tokens(accumulated_context) > effective_ctx_size:
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
"that preserves all key facts and sources needed to answer the user's request:\n\n"
|
|
1087
|
-
f"{accumulated_context}"
|
|
1088
|
-
)
|
|
1089
|
-
summary = self.generate_text(
|
|
1090
|
-
prompt=summary_prompt,
|
|
1091
|
-
system_prompt="Intermediate summary",
|
|
1092
|
-
temperature=0.0,
|
|
1093
|
-
n_predict= n_predict or 512,
|
|
1094
|
-
stream=False
|
|
1095
|
-
)
|
|
1096
|
-
accumulated_context = self.remove_thinking_blocks(summary).strip()
|
|
1097
|
-
if streaming_callback:
|
|
1098
|
-
streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
|
|
1157
|
+
pass
|
|
1158
|
+
|
|
1159
|
+
if streaming_callback:
|
|
1160
|
+
streaming_callback("Compiling final answer from all findings...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1099
1161
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
f"
|
|
1162
|
+
final_prompt_parts = [
|
|
1163
|
+
f"**User's Original Request:**\n{original_user_prompt}\n",
|
|
1164
|
+
f"**Final Research Objectives:**\n{current_objectives}\n",
|
|
1165
|
+
f"**Knowledge Scratchpad (Summary of Findings):**\n{knowledge_scratchpad}\n",
|
|
1103
1166
|
]
|
|
1104
|
-
if objectives_text:
|
|
1105
|
-
final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
|
|
1106
1167
|
if accumulated_context:
|
|
1107
|
-
|
|
1108
|
-
"
|
|
1109
|
-
f"{accumulated_context}\n
|
|
1168
|
+
final_prompt_parts.append(
|
|
1169
|
+
"**Supporting Raw Context from Retrieved Documents:**\n---\n"
|
|
1170
|
+
f"{accumulated_context}\n---\n"
|
|
1110
1171
|
)
|
|
1111
1172
|
else:
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
"
|
|
1173
|
+
final_prompt_parts.append("**Supporting Raw Context:**\n(No relevant documents were retrieved.)\n")
|
|
1174
|
+
|
|
1175
|
+
final_prompt_parts.append(
|
|
1176
|
+
"**Final Instruction:**\nSynthesize a comprehensive answer to the user's original request. "
|
|
1177
|
+
"Use the 'Knowledge Scratchpad' as your primary source of information and the 'Supporting Raw Context' for specific details and quotes. "
|
|
1178
|
+
"Adhere strictly to the information provided. If the information is insufficient to fully answer, state what is missing based on your 'Final Research Objectives'."
|
|
1116
1179
|
)
|
|
1117
|
-
|
|
1180
|
+
final_prompt_parts.append(self.ai_full_header)
|
|
1118
1181
|
|
|
1119
1182
|
final_answer = self.generate_text(
|
|
1120
|
-
prompt="\n".join(
|
|
1121
|
-
images=images,
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
stream=stream,
|
|
1125
|
-
temperature=temperature,
|
|
1126
|
-
top_k=top_k,
|
|
1127
|
-
top_p=top_p,
|
|
1128
|
-
repeat_penalty=repeat_penalty,
|
|
1129
|
-
repeat_last_n=repeat_last_n,
|
|
1130
|
-
seed=seed,
|
|
1131
|
-
n_threads=n_threads,
|
|
1132
|
-
ctx_size=ctx_size,
|
|
1183
|
+
prompt="\n".join(final_prompt_parts),
|
|
1184
|
+
images=images, system_prompt=system_prompt, n_predict=n_predict, stream=stream,
|
|
1185
|
+
temperature=temperature, top_k=top_k, top_p=top_p, repeat_penalty=repeat_penalty,
|
|
1186
|
+
repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads, ctx_size=ctx_size,
|
|
1133
1187
|
streaming_callback=streaming_callback if stream else None,
|
|
1134
1188
|
**llm_generation_kwargs
|
|
1135
1189
|
)
|
|
1136
1190
|
answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
|
|
1191
|
+
|
|
1192
|
+
if streaming_callback:
|
|
1193
|
+
streaming_callback("Final answer generated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1137
1194
|
|
|
1138
1195
|
return {
|
|
1139
1196
|
"final_answer": answer_text,
|
|
@@ -1141,8 +1198,304 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
1141
1198
|
"all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
|
|
1142
1199
|
"error": None
|
|
1143
1200
|
}
|
|
1201
|
+
|
|
1202
|
+
def generate_with_mcp_rag(
|
|
1203
|
+
self,
|
|
1204
|
+
prompt: str,
|
|
1205
|
+
rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
|
|
1206
|
+
system_prompt: str = None,
|
|
1207
|
+
objective_extraction_system_prompt="Extract objectives",
|
|
1208
|
+
images: Optional[List[str]] = None,
|
|
1209
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1210
|
+
max_tool_calls: int = 10,
|
|
1211
|
+
max_llm_iterations: int = 15,
|
|
1212
|
+
tool_call_decision_temperature: float = 0.0,
|
|
1213
|
+
final_answer_temperature: float = None,
|
|
1214
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
|
|
1215
|
+
build_plan: bool = True,
|
|
1216
|
+
rag_vectorizer_name: Optional[str] = None,
|
|
1217
|
+
rag_top_k: int = 5,
|
|
1218
|
+
rag_min_similarity_percent: float = 70.0,
|
|
1219
|
+
**llm_generation_kwargs
|
|
1220
|
+
) -> Dict[str, Any]:
|
|
1221
|
+
"""
|
|
1222
|
+
Generates a response using a stateful agent that can choose between calling standard
|
|
1223
|
+
MCP tools and querying a RAG database, all within a unified reasoning loop.
|
|
1224
|
+
"""
|
|
1225
|
+
if not self.binding:
|
|
1226
|
+
return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
|
|
1227
|
+
if not self.mcp:
|
|
1228
|
+
return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
|
|
1229
|
+
|
|
1230
|
+
# --- Initialize Agent State ---
|
|
1231
|
+
turn_history: List[Dict[str, Any]] = []
|
|
1232
|
+
original_user_prompt = prompt
|
|
1233
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
1234
|
+
current_objectives = ""
|
|
1235
|
+
agent_work_history = []
|
|
1236
|
+
tool_calls_made_this_turn = []
|
|
1237
|
+
llm_iterations = 0
|
|
1238
|
+
|
|
1239
|
+
# --- 1. Discover MCP Tools and Inject the RAG Tool ---
|
|
1240
|
+
if tools is None:
|
|
1241
|
+
try:
|
|
1242
|
+
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1243
|
+
if not mcp_tools: ASCIIColors.warning("No MCP tools discovered.")
|
|
1244
|
+
except Exception as e_disc:
|
|
1245
|
+
return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
|
|
1246
|
+
else:
|
|
1247
|
+
mcp_tools = tools
|
|
1248
|
+
|
|
1249
|
+
# Define the RAG tool and add it to the list
|
|
1250
|
+
rag_tool_definition = {
|
|
1251
|
+
"name": "research::query_database",
|
|
1252
|
+
"description": (
|
|
1253
|
+
"Queries a vector database to find relevant text chunks based on a natural language query. "
|
|
1254
|
+
"Use this to gather information, answer questions, or find context for a task before using other tools."
|
|
1255
|
+
),
|
|
1256
|
+
"input_schema": {
|
|
1257
|
+
"type": "object",
|
|
1258
|
+
"properties": {
|
|
1259
|
+
"query": {
|
|
1260
|
+
"type": "string",
|
|
1261
|
+
"description": "The natural language query to search for. Be specific to get the best results."
|
|
1262
|
+
}
|
|
1263
|
+
},
|
|
1264
|
+
"required": ["query"]
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
available_tools = [rag_tool_definition] + mcp_tools
|
|
1268
|
+
|
|
1269
|
+
# --- 2. Optional Initial Objectives Extraction ---
|
|
1270
|
+
formatted_tools_list = "\n".join([
|
|
1271
|
+
f"- Full Tool Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
|
|
1272
|
+
for t in available_tools
|
|
1273
|
+
])
|
|
1274
|
+
if build_plan:
|
|
1275
|
+
if streaming_callback:
|
|
1276
|
+
streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_history)
|
|
1277
|
+
|
|
1278
|
+
# The enhanced prompt is placed inside the original parenthesis format.
|
|
1279
|
+
# The f-strings for tool lists and user prompts are preserved.
|
|
1280
|
+
|
|
1281
|
+
obj_prompt = (
|
|
1282
|
+
"You are a hyper-efficient and logical project planner. Your sole purpose is to analyze the user's request and create a concise, numbered list of actionable steps to fulfill it.\n\n"
|
|
1283
|
+
"Your plan must be the most direct and minimal path to the user's goal.\n\n"
|
|
1284
|
+
"**Your Core Directives:**\n\n"
|
|
1285
|
+
"1. **Analyze the Request:** Break down the user's prompt into the essential, core tasks required.\n"
|
|
1286
|
+
"2. **Evaluate Tools with Extreme Scrutiny:** For each task, determine if a tool is **absolutely necessary**. Do not suggest a tool unless the task is impossible without it.\n"
|
|
1287
|
+
"3. **Prioritize Simplicity:** If the request can be answered directly without any tools (e.g., it's a simple question or requires a creative response), your entire plan should be a single step: \"1. Formulate a direct answer to the user's request.\"\n\n"
|
|
1288
|
+
"**CRITICAL RULES:**\n"
|
|
1289
|
+
"* **DO NOT** add any steps, objectives, or tool uses that were not explicitly required by the user.\n"
|
|
1290
|
+
"* **DO NOT** attempt to use a tool just because it is available. Most requests will not require any tools.\n"
|
|
1291
|
+
"* **DO NOT** add \"nice-to-have\" or \"extra\" tasks. Stick strictly to the request.\n\n"
|
|
1292
|
+
"Your final output must be a short, numbered list of steps. Do not call any tools in this planning phase.\n\n"
|
|
1293
|
+
"---\n"
|
|
1294
|
+
"**Available Tools:**\n"
|
|
1295
|
+
f"{formatted_tools_list}\n\n"
|
|
1296
|
+
"**User Request:**\n"
|
|
1297
|
+
f'"{original_user_prompt}"'
|
|
1298
|
+
)
|
|
1299
|
+
initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
1300
|
+
current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
|
|
1301
|
+
|
|
1302
|
+
if streaming_callback:
|
|
1303
|
+
streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_history)
|
|
1304
|
+
else:
|
|
1305
|
+
current_objectives = f"Fulfill the user's request: '{original_user_prompt}'"
|
|
1306
|
+
|
|
1307
|
+
turn_history.append({"type": "initial_objectives", "content": current_objectives})
|
|
1308
|
+
|
|
1144
1309
|
|
|
1145
1310
|
|
|
1311
|
+
# --- 3. Main Agent Loop ---
|
|
1312
|
+
while llm_iterations < max_llm_iterations:
|
|
1313
|
+
llm_iterations += 1
|
|
1314
|
+
if streaming_callback:
|
|
1315
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1316
|
+
|
|
1317
|
+
# Format agent history for the prompt
|
|
1318
|
+
formatted_agent_history = "No actions taken yet."
|
|
1319
|
+
if agent_work_history:
|
|
1320
|
+
history_parts = []
|
|
1321
|
+
for i, entry in enumerate(agent_work_history):
|
|
1322
|
+
history_parts.append(
|
|
1323
|
+
f"### Step {i+1}:\n"
|
|
1324
|
+
f"**Thought:** {entry['thought']}\n"
|
|
1325
|
+
f"**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n"
|
|
1326
|
+
f"**Observation:**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```"
|
|
1327
|
+
)
|
|
1328
|
+
formatted_agent_history = "\n\n".join(history_parts)
|
|
1329
|
+
|
|
1330
|
+
# Construct the "Thinking & Planning" prompt
|
|
1331
|
+
decision_prompt_template = f"""You are a strategic AI assistant. Your goal is to achieve a set of objectives by intelligently using research and system tools.
|
|
1332
|
+
|
|
1333
|
+
--- AVAILABLE TOOLS ---
|
|
1334
|
+
{formatted_tools_list}
|
|
1335
|
+
|
|
1336
|
+
--- CURRENT STATE ---
|
|
1337
|
+
Original User Request: {original_user_prompt}
|
|
1338
|
+
Current Research Objectives:
|
|
1339
|
+
{current_objectives}
|
|
1340
|
+
|
|
1341
|
+
Knowledge Scratchpad (our current understanding):
|
|
1342
|
+
{knowledge_scratchpad}
|
|
1343
|
+
|
|
1344
|
+
--- AGENT WORK HISTORY (previous steps in this turn) ---
|
|
1345
|
+
{formatted_agent_history}
|
|
1346
|
+
|
|
1347
|
+
--- INSTRUCTIONS ---
|
|
1348
|
+
1. **Analyze:** Review the entire work history, objectives, and scratchpad.
|
|
1349
|
+
2. **Update State:** Based on the latest observations, update the scratchpad and refine the objectives. The scratchpad should be a comprehensive summary of ALL knowledge gathered.
|
|
1350
|
+
3. **Decide Next Action:** Choose ONE of the following: `call_tool`, `final_answer`, or `clarify`. Always prefer to gather information with `research::query_database` before attempting to use other tools if you lack context.
|
|
1351
|
+
|
|
1352
|
+
--- OUTPUT FORMAT ---
|
|
1353
|
+
Respond with a single JSON object inside a JSON markdown tag. Use this exact schema:
|
|
1354
|
+
```json
|
|
1355
|
+
{{
|
|
1356
|
+
"thought": "Your reasoning for the chosen action, analyzing how the work history informs your next step. Explain why you are choosing a specific tool (or to answer).",
|
|
1357
|
+
"updated_scratchpad": "The new, complete, and comprehensive summary of all knowledge gathered. Integrate new findings with old ones. if no new knowledge is gathered, this should be an empty string.",
|
|
1358
|
+
"updated_objectives": "The full, potentially revised, list of objectives. If no change, repeat the current list.",
|
|
1359
|
+
"action": "The chosen action: 'call_tool', 'final_answer', or 'clarify'.",
|
|
1360
|
+
"tool_name": "(string, if action is 'call_tool') The full 'alias::tool_name' of the tool to use.",
|
|
1361
|
+
"tool_params": {{"query": "...", "param2": "..."}},
|
|
1362
|
+
"clarification_request": "(string, if action is 'clarify') Your question to the user."
|
|
1363
|
+
}}
|
|
1364
|
+
```
|
|
1365
|
+
"""
|
|
1366
|
+
raw_llm_decision_json = self.generate_text(
|
|
1367
|
+
prompt=decision_prompt_template, n_predict=2048, temperature=tool_call_decision_temperature
|
|
1368
|
+
)
|
|
1369
|
+
|
|
1370
|
+
# --- 4. Parse LLM's plan and update state ---
|
|
1371
|
+
try:
|
|
1372
|
+
llm_decision = robust_json_parser(raw_llm_decision_json)
|
|
1373
|
+
turn_history.append({"type": "llm_plan", "content": llm_decision})
|
|
1374
|
+
|
|
1375
|
+
current_objectives = llm_decision.get("updated_objectives", current_objectives)
|
|
1376
|
+
new_scratchpad = llm_decision.get("updated_scratchpad")
|
|
1377
|
+
|
|
1378
|
+
if new_scratchpad and new_scratchpad != knowledge_scratchpad:
|
|
1379
|
+
knowledge_scratchpad = new_scratchpad
|
|
1380
|
+
if streaming_callback:
|
|
1381
|
+
streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP, {"id": "scratchpad_update"}, turn_history)
|
|
1382
|
+
streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id":"scratch_pad_update"}, turn_history)
|
|
1383
|
+
|
|
1384
|
+
except (json.JSONDecodeError, AttributeError, KeyError) as e:
|
|
1385
|
+
ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}. Error: {e}")
|
|
1386
|
+
turn_history.append({"type": "error", "content": f"Failed to parse LLM plan: {raw_llm_decision_json}"})
|
|
1387
|
+
break
|
|
1388
|
+
|
|
1389
|
+
if streaming_callback:
|
|
1390
|
+
streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
|
|
1391
|
+
|
|
1392
|
+
# --- 5. Execute the chosen action ---
|
|
1393
|
+
action = llm_decision.get("action")
|
|
1394
|
+
tool_result = None
|
|
1395
|
+
|
|
1396
|
+
if action == "call_tool":
|
|
1397
|
+
if len(tool_calls_made_this_turn) >= max_tool_calls:
|
|
1398
|
+
ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
|
|
1399
|
+
break
|
|
1400
|
+
|
|
1401
|
+
tool_name = llm_decision.get("tool_name")
|
|
1402
|
+
tool_params = llm_decision.get("tool_params", {})
|
|
1403
|
+
|
|
1404
|
+
if not tool_name or not isinstance(tool_params, dict):
|
|
1405
|
+
ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
|
|
1406
|
+
break
|
|
1407
|
+
|
|
1408
|
+
if streaming_callback:
|
|
1409
|
+
streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
1410
|
+
|
|
1411
|
+
try:
|
|
1412
|
+
# ** DYNAMIC TOOL/RAG DISPATCH **
|
|
1413
|
+
if tool_name == "research::query_database":
|
|
1414
|
+
query = tool_params.get("query")
|
|
1415
|
+
if not query:
|
|
1416
|
+
tool_result = {"error": "RAG tool called without a 'query' parameter."}
|
|
1417
|
+
else:
|
|
1418
|
+
retrieved_chunks = rag_query_function(query, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
1419
|
+
if not retrieved_chunks:
|
|
1420
|
+
tool_result = {"summary": "No relevant documents found for the query.", "chunks": []}
|
|
1421
|
+
else:
|
|
1422
|
+
tool_result = {
|
|
1423
|
+
"summary": f"Found {len(retrieved_chunks)} relevant document chunks.",
|
|
1424
|
+
"chunks": retrieved_chunks
|
|
1425
|
+
}
|
|
1426
|
+
else:
|
|
1427
|
+
# Standard MCP tool execution
|
|
1428
|
+
tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
|
|
1429
|
+
|
|
1430
|
+
except Exception as e_exec:
|
|
1431
|
+
trace_exception(e_exec)
|
|
1432
|
+
tool_result = {"error": f"An exception occurred while executing tool '{tool_name}': {e_exec}"}
|
|
1433
|
+
|
|
1434
|
+
# Record the work cycle in the agent's history
|
|
1435
|
+
work_entry = {
|
|
1436
|
+
"thought": llm_decision.get("thought", "N/A"),
|
|
1437
|
+
"tool_name": tool_name,
|
|
1438
|
+
"tool_params": tool_params,
|
|
1439
|
+
"tool_result": tool_result
|
|
1440
|
+
}
|
|
1441
|
+
agent_work_history.append(work_entry)
|
|
1442
|
+
tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
1443
|
+
|
|
1444
|
+
if streaming_callback:
|
|
1445
|
+
streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
1446
|
+
streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
|
|
1447
|
+
|
|
1448
|
+
elif action == "clarify":
|
|
1449
|
+
clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
|
|
1450
|
+
return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True}
|
|
1451
|
+
|
|
1452
|
+
elif action == "final_answer":
|
|
1453
|
+
ASCIIColors.info("LLM decided to formulate a final answer.")
|
|
1454
|
+
break
|
|
1455
|
+
|
|
1456
|
+
else:
|
|
1457
|
+
ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
|
|
1458
|
+
break
|
|
1459
|
+
if streaming_callback:
|
|
1460
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1461
|
+
|
|
1462
|
+
if streaming_callback:
|
|
1463
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1464
|
+
# --- 6. Generate Final Answer ---
|
|
1465
|
+
if streaming_callback:
|
|
1466
|
+
streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
|
|
1467
|
+
|
|
1468
|
+
final_answer_prompt = f"""You are an AI assistant providing a final, comprehensive answer based on research and tool use.
|
|
1469
|
+
|
|
1470
|
+
--- CONTEXT ---
|
|
1471
|
+
Original User Request: "{original_user_prompt}"
|
|
1472
|
+
|
|
1473
|
+
--- SUMMARY OF FINDINGS (Knowledge Scratchpad) ---
|
|
1474
|
+
{knowledge_scratchpad}
|
|
1475
|
+
|
|
1476
|
+
--- INSTRUCTIONS ---
|
|
1477
|
+
- Synthesize a clear, complete answer for the user based ONLY on the information in the 'Summary of Findings'.
|
|
1478
|
+
- Address the user directly and answer their original request.
|
|
1479
|
+
- Do not make up information. If the findings are insufficient, state what you found and what remains unanswered.
|
|
1480
|
+
"""
|
|
1481
|
+
final_answer_text = self.generate_text(
|
|
1482
|
+
prompt=final_answer_prompt,
|
|
1483
|
+
system_prompt=system_prompt,
|
|
1484
|
+
images=images,
|
|
1485
|
+
stream=streaming_callback is not None,
|
|
1486
|
+
streaming_callback=streaming_callback,
|
|
1487
|
+
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
1488
|
+
**(llm_generation_kwargs or {})
|
|
1489
|
+
)
|
|
1490
|
+
|
|
1491
|
+
if streaming_callback:
|
|
1492
|
+
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
|
|
1493
|
+
|
|
1494
|
+
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
1495
|
+
turn_history.append({"type":"final_answer_generated", "content": final_answer})
|
|
1496
|
+
|
|
1497
|
+
return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
1498
|
+
|
|
1146
1499
|
def generate_code(
|
|
1147
1500
|
self,
|
|
1148
1501
|
prompt,
|