lollms-client 0.20.7__py3-none-any.whl → 0.20.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_core.py +679 -416
- lollms_client/lollms_discussion.py +251 -344
- lollms_client/lollms_types.py +3 -3
- lollms_client/lollms_utilities.py +97 -0
- lollms_client/mcp_bindings/remote_mcp/__init__.py +2 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.8.dist-info}/METADATA +1 -1
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.8.dist-info}/RECORD +11 -11
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.8.dist-info}/WHEEL +0 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.8.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.20.7.dist-info → lollms_client-0.20.8.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import requests
|
|
3
3
|
from ascii_colors import ASCIIColors, trace_exception
|
|
4
4
|
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
5
|
-
from lollms_client.lollms_utilities import
|
|
5
|
+
from lollms_client.lollms_utilities import robust_json_parser # Keep utilities needed by core
|
|
6
6
|
from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
|
|
7
7
|
# Import new Abstract Base Classes and Managers
|
|
8
8
|
from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
|
|
@@ -597,336 +597,237 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
597
597
|
response_full += response
|
|
598
598
|
codes = self.extract_code_blocks(response, format=code_tag_format)
|
|
599
599
|
return codes
|
|
600
|
+
def _build_final_decision_prompt(
|
|
601
|
+
self,
|
|
602
|
+
formatted_tools_list: str,
|
|
603
|
+
formatted_conversation_history: str,
|
|
604
|
+
current_plan: str,
|
|
605
|
+
knowledge_scratchpad: str,
|
|
606
|
+
agent_work_history_str: str,
|
|
607
|
+
ctx_size: Optional[int],
|
|
608
|
+
) -> str:
|
|
609
|
+
# This helper function for building the main decision prompt remains the same.
|
|
610
|
+
# It is already robust and follows all formatting constraints.
|
|
611
|
+
final_agent_history = agent_work_history_str
|
|
612
|
+
if ctx_size:
|
|
613
|
+
get_token_count = len
|
|
614
|
+
# This is a simplified representation of the static prompt for size calculation
|
|
615
|
+
static_parts_text = "You are a task-oriented AI assistant..."
|
|
616
|
+
fixed_parts_size = get_token_count(static_parts_text)
|
|
617
|
+
available_space_for_history = ctx_size - fixed_parts_size - 100
|
|
618
|
+
if get_token_count(agent_work_history_str) > available_space_for_history:
|
|
619
|
+
if available_space_for_history > 0:
|
|
620
|
+
truncation_point = len(agent_work_history_str) - available_space_for_history
|
|
621
|
+
final_agent_history = ("[...history truncated due to context size...]\n" + agent_work_history_str[truncation_point:])
|
|
622
|
+
ASCIIColors.warning("Agent history was truncated to fit the context window.")
|
|
623
|
+
else:
|
|
624
|
+
final_agent_history = "[...history truncated due to context size...]"
|
|
625
|
+
return (
|
|
626
|
+
"You are a task-oriented AI assistant. Your goal is to follow a plan to fulfill a user's request, using tools and asking for clarification when needed.\n\n"
|
|
627
|
+
"--- AVAILABLE TOOLS ---\n"
|
|
628
|
+
f"{formatted_tools_list}\n\n"
|
|
629
|
+
"--- CONVERSATION HISTORY ---\n"
|
|
630
|
+
f"{formatted_conversation_history}\n\n"
|
|
631
|
+
"--- CURRENT PLAN & KNOWLEDGE ---\n"
|
|
632
|
+
f"Current Plan:\n{current_plan}\n\n"
|
|
633
|
+
f"Knowledge Scratchpad (summary of all findings so far):\n{knowledge_scratchpad}\n\n"
|
|
634
|
+
"--- YOUR WORK SO FAR (in this turn) ---\n"
|
|
635
|
+
f"{final_agent_history}\n\n"
|
|
636
|
+
"--- YOUR TASK ---\n"
|
|
637
|
+
"1. **Analyze the Full Context:** Review the entire conversation, your plan, the scratchpad, and your work history.\n"
|
|
638
|
+
"2. **Update Your State:** Based on the latest tool observation, update the scratchpad to synthesize ALL knowledge gathered. Update the plan by marking completed steps or refining next steps.\n"
|
|
639
|
+
" - `call_tool`: If the next step in the plan requires a tool.\n"
|
|
640
|
+
" - `clarify`: If you are blocked, the user's request is ambiguous, or you need more information to proceed. Ask a specific, targeted question.\n"
|
|
641
|
+
" - `final_answer`: If all steps in the plan are complete and you have enough information to answer the user's request.\n\n"
|
|
642
|
+
"--- OUTPUT FORMAT ---\n"
|
|
643
|
+
"Respond with a single JSON object inside a ```json markdown tag.\n"
|
|
644
|
+
"```json\n"
|
|
645
|
+
"{\n"
|
|
646
|
+
' "thought": "Your reasoning. Analyze the latest observation and decide what to do next based on the plan and history. State which step of the plan you are working on.",\n'
|
|
647
|
+
' "updated_scratchpad": "The new, complete summary of all knowledge gathered so far. Integrate the latest findings.",\n'
|
|
648
|
+
' "updated_plan": "The new, remaining plan. Remove steps that are now complete. Refine next steps if needed.",\n'
|
|
649
|
+
' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
|
|
650
|
+
' "action_details": {\n'
|
|
651
|
+
' "tool_name": "(Required if action is \'call_tool\') The full \'alias::tool_name\' of the tool to use.",\n'
|
|
652
|
+
' "tool_params": {},\n'
|
|
653
|
+
' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
|
|
654
|
+
" }\n"
|
|
655
|
+
"}\n"
|
|
656
|
+
"```"
|
|
657
|
+
)
|
|
658
|
+
|
|
600
659
|
|
|
601
|
-
# --- Function Calling with MCP ---
|
|
602
660
|
def generate_with_mcp(
|
|
603
661
|
self,
|
|
604
662
|
prompt: str,
|
|
605
|
-
|
|
663
|
+
system_prompt:str = None,
|
|
664
|
+
objective_extraction_system_prompt="Build a plan",
|
|
606
665
|
images: Optional[List[str]] = None,
|
|
607
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
666
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
608
667
|
max_tool_calls: int = 5,
|
|
609
|
-
max_llm_iterations: int = 10,
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
668
|
+
max_llm_iterations: int = 10,
|
|
669
|
+
ctx_size: Optional[int] = None,
|
|
670
|
+
max_json_retries: int = 1,
|
|
671
|
+
tool_call_decision_temperature: float = 0.0,
|
|
672
|
+
final_answer_temperature: float = None,
|
|
673
|
+
streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
|
|
674
|
+
# The `extract_plan` parameter has been removed.
|
|
614
675
|
**llm_generation_kwargs
|
|
615
676
|
) -> Dict[str, Any]:
|
|
616
|
-
|
|
617
|
-
|
|
677
|
+
if not self.binding or not self.mcp:
|
|
678
|
+
return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
|
|
618
679
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
images (Optional[List[str]]): Images provided with the current user prompt.
|
|
623
|
-
tools (Optional[List[Dict[str, Any]]]): A list of MCP tool definitions available for this call.
|
|
624
|
-
If None, tools will be discovered from the MCP binding.
|
|
625
|
-
max_tool_calls (int): Maximum number of distinct tool calls allowed in one interaction turn.
|
|
626
|
-
max_llm_iterations (int): Maximum number of times the LLM can decide to call a tool
|
|
627
|
-
before being forced to generate a final answer.
|
|
628
|
-
tool_call_decision_temperature (float): Temperature for LLM when deciding on tool calls.
|
|
629
|
-
final_answer_temperature (float): Temperature for LLM when generating the final answer.
|
|
630
|
-
streaming_callback (Optional[Callable]): Callback for streaming LLM responses (tool decisions/final answer).
|
|
631
|
-
Signature: (chunk_str, msg_type, metadata_dict, history_list_of_dicts_for_this_turn) -> bool
|
|
632
|
-
interactive_tool_execution (bool): If True, ask user for confirmation before executing each tool.
|
|
680
|
+
turn_history: List[Dict[str, Any]] = []
|
|
681
|
+
# Renamed for clarity: `prompt` is the full conversation context.
|
|
682
|
+
conversation_context = prompt
|
|
633
683
|
|
|
634
|
-
Returns:
|
|
635
|
-
Dict[str, Any]: A dictionary containing:
|
|
636
|
-
- "final_answer" (str): The LLM's final textual answer.
|
|
637
|
-
- "tool_calls" (List[Dict]): A list of tools called, their params, and results.
|
|
638
|
-
- "error" (Optional[str]): Error message if something went wrong.
|
|
639
|
-
"""
|
|
640
|
-
if not self.binding:
|
|
641
|
-
return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
|
|
642
|
-
if not self.mcp:
|
|
643
|
-
return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
|
|
644
|
-
|
|
645
|
-
turn_history: List[Dict[str, Any]] = [] # Tracks this specific turn's interactions (LLM thoughts, tool calls, tool results)
|
|
646
|
-
|
|
647
|
-
# 1. Discover tools if not provided
|
|
648
684
|
if tools is None:
|
|
649
685
|
try:
|
|
650
686
|
tools = self.mcp.discover_tools(force_refresh=True)
|
|
651
|
-
if not tools:
|
|
652
|
-
ASCIIColors.warning("No MCP tools discovered by the binding.")
|
|
687
|
+
if not tools: ASCIIColors.warning("No MCP tools discovered.")
|
|
653
688
|
except Exception as e_disc:
|
|
654
689
|
return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
|
|
655
|
-
|
|
656
|
-
if not tools: # If still no tools after discovery attempt
|
|
657
|
-
ASCIIColors.info("No tools available for function calling. Generating direct response.")
|
|
658
|
-
final_answer = self.remove_thinking_blocks(self.generate_text(
|
|
659
|
-
prompt=prompt,
|
|
660
|
-
system_prompt= (discussion_history[0]['content'] if discussion_history and discussion_history[0]['role'] == 'system' else "") + "\nYou are a helpful assistant.", # Basic system prompt
|
|
661
|
-
images=images,
|
|
662
|
-
stream=streaming_callback is not None, # stream if callback is provided
|
|
663
|
-
streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, None, turn_history) if streaming_callback else None, # Adapt callback
|
|
664
|
-
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
665
|
-
**(llm_generation_kwargs or {})
|
|
666
|
-
))
|
|
667
|
-
if isinstance(final_answer, dict) and "error" in final_answer: # Handle generation error
|
|
668
|
-
return {"final_answer": "", "tool_calls": [], "error": final_answer["error"]}
|
|
669
|
-
return {"final_answer": final_answer, "tool_calls": [], "error": None}
|
|
670
690
|
|
|
691
|
+
if not tools:
|
|
692
|
+
final_answer_text = self.generate_text(prompt=prompt, system_prompt=system_prompt, stream=streaming_callback is not None, streaming_callback=streaming_callback)
|
|
693
|
+
return {"final_answer": self.remove_thinking_blocks(final_answer_text), "tool_calls": [], "error": None}
|
|
671
694
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
])
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
695
|
+
# --- Agent State Initialization ---
|
|
696
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
697
|
+
agent_work_history = []
|
|
698
|
+
formatted_tools_list = "\n".join([f"- Tool: {t.get('name')}\n Description: {t.get('description')}\n Schema: {json.dumps(t.get('input_schema'))}" for t in tools])
|
|
699
|
+
|
|
700
|
+
# --- Unconditional Plan Generation ---
|
|
701
|
+
# This step now runs at the beginning of every call.
|
|
702
|
+
if streaming_callback:
|
|
703
|
+
streaming_callback("Building/Revising plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "plan_extraction"}, turn_history)
|
|
704
|
+
|
|
705
|
+
obj_prompt = (
|
|
706
|
+
"You are an Intelligent Workflow Planner. Your mission is to create the most efficient plan possible by analyzing the user's request within the context of the full conversation.\n\n"
|
|
707
|
+
"Your Guiding Principle: **Always choose the path of least resistance.**\n\n"
|
|
708
|
+
"**Your Logical Process:**\n"
|
|
709
|
+
"1. **Analyze the Entire Conversation:** Understand the user's ultimate goal based on all interaction so far.\n"
|
|
710
|
+
"2. **Check for a Single-Step Solution:** Scrutinize the available tools. Can a single tool call directly achieve the user's current goal? \n"
|
|
711
|
+
"3. **Formulate a Plan:** Based on your analysis, create a concise, numbered list of steps to achieve the goal. If the goal is simple, this may be only one step. If it is complex or multi-turn, it may be several steps.\n\n"
|
|
712
|
+
"**CRITICAL RULES:**\n"
|
|
713
|
+
"* **MANDATORY: NEVER add steps the user did not ask for.** Do not embellish or add 'nice-to-have' features.\n"
|
|
714
|
+
"* **Focus on the Goal:** Your plan should directly address the user's request as it stands now in the conversation.\n\n"
|
|
715
|
+
"---\n"
|
|
716
|
+
"**Available Tools:**\n"
|
|
717
|
+
f"{formatted_tools_list}\n\n"
|
|
718
|
+
"**Full Conversation History:**\n"
|
|
719
|
+
f'"{conversation_context}"'
|
|
720
|
+
)
|
|
721
|
+
initial_plan_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
722
|
+
current_plan = self.remove_thinking_blocks(initial_plan_gen).strip()
|
|
686
723
|
|
|
724
|
+
if streaming_callback:
|
|
725
|
+
streaming_callback(f"Current plan:\n{current_plan}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "plan_extraction"}, turn_history)
|
|
687
726
|
|
|
727
|
+
turn_history.append({"type": "initial_plan", "content": current_plan})
|
|
728
|
+
|
|
729
|
+
# --- Main Agent Loop ---
|
|
688
730
|
tool_calls_made_this_turn = []
|
|
689
731
|
llm_iterations = 0
|
|
690
732
|
|
|
691
733
|
while llm_iterations < max_llm_iterations:
|
|
692
734
|
llm_iterations += 1
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
# Add tool execution results from previous iterations in this turn to the history string
|
|
706
|
-
for tc_info in tool_calls_made_this_turn:
|
|
707
|
-
if tc_info.get("result"): # Only add if there's a result (successful or error)
|
|
708
|
-
history_str += f"{self.ai_full_header}(Executed tool '{tc_info['name']}' with params {tc_info['params']}. Result: {json.dumps(tc_info['result'])})\n"
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
decision_prompt_template = f"""You are an AI assistant that can use tools to answer user requests.
|
|
712
|
-
Available tools:
|
|
713
|
-
{formatted_tools_list}
|
|
714
|
-
|
|
715
|
-
Current conversation:
|
|
716
|
-
{history_str}
|
|
717
|
-
|
|
718
|
-
Based on the available tools and the current conversation, decide the next step.
|
|
719
|
-
Respond with a JSON object containing ONE of the following structures:
|
|
720
|
-
1. If you need to use a tool:
|
|
721
|
-
{{"action": "call_tool", "tool_name": "<name_of_tool_to_call>", "tool_params": {{<parameters_for_tool_as_json_object>}}}}
|
|
722
|
-
2. If you can answer directly without using a tool OR if you have sufficient information from previous tool calls:
|
|
723
|
-
{{"action": "final_answer"}}
|
|
724
|
-
3. If the user's request is unclear or you need more information before deciding:
|
|
725
|
-
{{"action": "clarify", "clarification_request": "<your_question_to_the_user>"}}
|
|
726
|
-
""" # No {self.ai_full_header} here, generate_code will get raw JSON
|
|
727
|
-
|
|
728
|
-
if streaming_callback:
|
|
729
|
-
streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
|
|
730
|
-
|
|
731
|
-
# Use generate_code to get structured JSON output from LLM
|
|
732
|
-
# Note: generate_code itself uses generate_text. We are asking for JSON here.
|
|
733
|
-
raw_llm_decision_json = self.generate_text(
|
|
734
|
-
prompt=decision_prompt_template, # This is the full prompt for the LLM
|
|
735
|
-
n_predict=512, # Reasonable size for decision JSON
|
|
736
|
-
temperature=tool_call_decision_temperature,
|
|
737
|
-
images=images
|
|
738
|
-
# `images` are part of the history_str if relevant to the binding
|
|
739
|
-
# streaming_callback=None, # Decisions are usually not streamed chunk by chunk
|
|
735
|
+
# ... The self-correction and action execution loop remains the same ...
|
|
736
|
+
if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
737
|
+
formatted_agent_history = "No actions taken yet in this turn."
|
|
738
|
+
if agent_work_history:
|
|
739
|
+
history_parts = [ f"### Step {i+1}:\n**Thought:** {entry['thought']}\n**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n**Observation (Tool Output):**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```" for i, entry in enumerate(agent_work_history)]
|
|
740
|
+
formatted_agent_history = "\n\n".join(history_parts)
|
|
741
|
+
|
|
742
|
+
llm_decision = None
|
|
743
|
+
current_decision_prompt = self._build_final_decision_prompt(
|
|
744
|
+
formatted_tools_list=formatted_tools_list, formatted_conversation_history=conversation_context,
|
|
745
|
+
current_plan=current_plan, knowledge_scratchpad=knowledge_scratchpad,
|
|
746
|
+
agent_work_history_str=formatted_agent_history, ctx_size=ctx_size
|
|
740
747
|
)
|
|
741
|
-
if streaming_callback:
|
|
742
|
-
streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
|
|
743
748
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
ASCIIColors.error("LLM failed to provide a decision JSON.")
|
|
747
|
-
turn_history.append({"type": "error", "content": "LLM failed to provide a decision."})
|
|
748
|
-
return {"final_answer": "I'm sorry, I encountered an issue trying to process your request.", "tool_calls": tool_calls_made_this_turn, "error": "LLM decision JSON was empty."}
|
|
749
|
-
|
|
750
|
-
processed_raw_json = raw_llm_decision_json.strip() # Strip whitespace first
|
|
751
|
-
try:
|
|
752
|
-
llm_decision = json.loads(processed_raw_json)
|
|
753
|
-
turn_history.append({"type": "llm_decision", "content": llm_decision})
|
|
754
|
-
except json.JSONDecodeError:
|
|
755
|
-
ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}")
|
|
749
|
+
for i in range(max_json_retries + 1):
|
|
750
|
+
raw_llm_decision_json = self.generate_text(prompt=current_decision_prompt, n_predict=2048, temperature=tool_call_decision_temperature)
|
|
756
751
|
try:
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
if llm_decision is None: # If parsing failed and couldn't recover
|
|
778
|
-
return {"final_answer": "I'm sorry, I had trouble understanding the next step due to a formatting issue.", "tool_calls": tool_calls_made_this_turn, "error": "Invalid JSON from LLM for decision."}
|
|
779
|
-
|
|
752
|
+
llm_decision = robust_json_parser(raw_llm_decision_json)
|
|
753
|
+
if "action" not in llm_decision or "action_details" not in llm_decision:
|
|
754
|
+
raise KeyError("The JSON is missing required keys: 'action' and/or 'action_details'.")
|
|
755
|
+
break
|
|
756
|
+
except (json.JSONDecodeError, AttributeError, KeyError) as e:
|
|
757
|
+
error_message = f"JSON parsing failed (Attempt {i+1}/{max_json_retries+1}). Error: {e}"
|
|
758
|
+
ASCIIColors.warning(error_message)
|
|
759
|
+
if streaming_callback: streaming_callback(error_message, MSG_TYPE.MSG_TYPE_WARNING, None, turn_history)
|
|
760
|
+
turn_history.append({"type": "error", "content": f"Invalid JSON response: {raw_llm_decision_json}"})
|
|
761
|
+
if i >= max_json_retries:
|
|
762
|
+
ASCIIColors.error("Max JSON retries reached. Aborting agent loop.")
|
|
763
|
+
llm_decision = None
|
|
764
|
+
break
|
|
765
|
+
current_decision_prompt = ( "You previously failed..." ) # Self-correction prompt
|
|
766
|
+
if not llm_decision: break
|
|
767
|
+
|
|
768
|
+
turn_history.append({"type": "llm_decision", "content": llm_decision})
|
|
769
|
+
current_plan = llm_decision.get("updated_plan", current_plan)
|
|
770
|
+
knowledge_scratchpad = llm_decision.get("updated_scratchpad", knowledge_scratchpad)
|
|
780
771
|
action = llm_decision.get("action")
|
|
772
|
+
action_details = llm_decision.get("action_details", {})
|
|
773
|
+
if streaming_callback: streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
|
|
781
774
|
|
|
782
775
|
if action == "call_tool":
|
|
783
|
-
if len(tool_calls_made_this_turn) >= max_tool_calls:
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
tool_name = llm_decision.get("tool_name")
|
|
789
|
-
tool_params = llm_decision.get("tool_params", {})
|
|
790
|
-
|
|
791
|
-
if not tool_name:
|
|
792
|
-
ASCIIColors.warning("LLM decided to call a tool but didn't specify tool_name.")
|
|
793
|
-
current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
|
|
794
|
-
break # Or ask LLM to try again without this faulty decision in history
|
|
795
|
-
|
|
796
|
-
tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
|
|
797
|
-
turn_history.append(tool_call_info)
|
|
798
|
-
if streaming_callback:
|
|
799
|
-
streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
|
|
800
|
-
streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
|
|
801
|
-
|
|
802
|
-
# Interactive execution if enabled
|
|
803
|
-
if interactive_tool_execution:
|
|
804
|
-
try:
|
|
805
|
-
user_confirmation = input(f"AI wants to execute tool '{tool_name}' with params {tool_params}. Allow? (yes/no/details): ").lower()
|
|
806
|
-
if user_confirmation == "details":
|
|
807
|
-
tool_def_for_details = next((t for t in tools if t.get("name") == tool_name), None)
|
|
808
|
-
print(f"Tool details: {json.dumps(tool_def_for_details, indent=2)}")
|
|
809
|
-
user_confirmation = input(f"Allow execution of '{tool_name}'? (yes/no): ").lower()
|
|
810
|
-
|
|
811
|
-
if user_confirmation != "yes":
|
|
812
|
-
ASCIIColors.info("Tool execution cancelled by user.")
|
|
813
|
-
tool_result = {"error": "Tool execution cancelled by user."}
|
|
814
|
-
# Add this info to conversation for LLM
|
|
815
|
-
current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' execution was cancelled by the user. What should I do next?)"})
|
|
816
|
-
tool_call_info["result"] = tool_result # Record cancellation
|
|
817
|
-
tool_calls_made_this_turn.append(tool_call_info)
|
|
818
|
-
continue # Back to LLM for next decision
|
|
819
|
-
except Exception as e_input: # Catch issues with input() e.g. in non-interactive env
|
|
820
|
-
ASCIIColors.warning(f"Error during interactive confirmation: {e_input}. Proceeding without confirmation.")
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
if streaming_callback:
|
|
824
|
-
streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
|
|
825
|
-
|
|
776
|
+
if len(tool_calls_made_this_turn) >= max_tool_calls: break
|
|
777
|
+
tool_name = action_details.get("tool_name")
|
|
778
|
+
tool_params = action_details.get("tool_params", {})
|
|
779
|
+
if not tool_name or not isinstance(tool_params, dict): break
|
|
780
|
+
if streaming_callback: streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
826
781
|
tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
tool_calls_made_this_turn.append(
|
|
782
|
+
work_entry = { "thought": llm_decision.get("thought", "N/A"), "tool_name": tool_name, "tool_params": tool_params, "tool_result": tool_result }
|
|
783
|
+
agent_work_history.append(work_entry)
|
|
784
|
+
tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
830
785
|
if streaming_callback:
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
if streaming_callback:
|
|
834
|
-
streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
|
|
835
|
-
|
|
836
|
-
# Add tool execution result to conversation for the LLM
|
|
837
|
-
# The format of this message can influence how the LLM uses the tool output.
|
|
838
|
-
# current_conversation.append({"role": "tool_result", "tool_name": tool_name, "content": json.dumps(tool_result)}) # More structured
|
|
839
|
-
current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' executed. Result: {json.dumps(tool_result)})"})
|
|
840
|
-
|
|
841
|
-
|
|
786
|
+
streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
787
|
+
streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
|
|
842
788
|
elif action == "clarify":
|
|
843
|
-
clarification_request =
|
|
844
|
-
|
|
845
|
-
streaming_callback(clarification_request, MSG_TYPE.MSG_TYPE_FULL, {"type": "clarification_request"}, turn_history)
|
|
846
|
-
turn_history.append({"type":"clarification_request_sent", "content": clarification_request})
|
|
847
|
-
return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
848
|
-
|
|
789
|
+
clarification_request = action_details.get("clarification_request", "I need more information.")
|
|
790
|
+
return { "final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True }
|
|
849
791
|
elif action == "final_answer":
|
|
850
792
|
ASCIIColors.info("LLM decided to formulate a final answer.")
|
|
851
|
-
current_conversation.append({"role":"assistant", "content":"(I will now formulate the final answer based on the information gathered.)"}) # Inform LLM's "thought process"
|
|
852
|
-
break # Exit loop to generate final answer
|
|
853
|
-
|
|
854
|
-
else:
|
|
855
|
-
ASCIIColors.warning(f"LLM returned unknown action: {action}")
|
|
856
|
-
current_conversation.append({"role":"assistant", "content":f"(Received an unexpected decision: {action}. I will try to answer directly.)"})
|
|
857
|
-
break # Exit loop
|
|
858
|
-
|
|
859
|
-
# Safety break if too many iterations without reaching final answer or max_tool_calls
|
|
860
|
-
if llm_iterations >= max_llm_iterations:
|
|
861
|
-
ASCIIColors.warning("Max LLM iterations reached. Forcing final answer.")
|
|
862
|
-
current_conversation.append({"role":"assistant", "content":"(Max iterations reached. I will now try to formulate an answer.)"})
|
|
863
793
|
break
|
|
864
|
-
|
|
865
|
-
# 3. Generate final answer if LLM decided to, or if loop broke
|
|
866
|
-
if streaming_callback:
|
|
867
|
-
streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_history)
|
|
868
|
-
|
|
869
|
-
# Construct the final prompt string for generate_text from current_conversation
|
|
870
|
-
final_prompt_str = ""
|
|
871
|
-
final_system_prompt = ""
|
|
872
|
-
|
|
873
|
-
# Consolidate system messages if any
|
|
874
|
-
interim_history_for_final_answer = []
|
|
875
|
-
for msg in current_conversation:
|
|
876
|
-
if msg["role"] == "system":
|
|
877
|
-
final_system_prompt += msg["content"] + "\n"
|
|
878
794
|
else:
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
final_answer_text = self.generate_text(
|
|
900
|
-
prompt=current_prompt_for_final_answer, # Pass the conversation history as the prompt
|
|
901
|
-
system_prompt=final_system_prompt.strip(),
|
|
902
|
-
images=images if not tool_calls_made_this_turn else None, # Only pass initial images if no tool calls happened (context might be lost)
|
|
903
|
-
stream=streaming_callback is not None,
|
|
904
|
-
streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, {"type":"final_answer_chunk"}, turn_history) if streaming_callback else None,
|
|
905
|
-
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
906
|
-
**(llm_generation_kwargs or {})
|
|
795
|
+
ASCIIColors.warning(f"LLM returned unknown action: '{action}'. Forcing final answer.")
|
|
796
|
+
break
|
|
797
|
+
if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
798
|
+
|
|
799
|
+
# --- Final Answer Synthesis ---
|
|
800
|
+
# This part remains the same.
|
|
801
|
+
if streaming_callback: streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
|
|
802
|
+
final_answer_prompt = (
|
|
803
|
+
"You are an AI assistant tasked with providing a final, comprehensive answer to the user based on the research performed.\n\n"
|
|
804
|
+
"--- FULL CONVERSATION CONTEXT ---\n"
|
|
805
|
+
f"{conversation_context}\n\n"
|
|
806
|
+
"--- SUMMARY OF FINDINGS (Your Knowledge Scratchpad) ---\n"
|
|
807
|
+
f"{knowledge_scratchpad}\n\n"
|
|
808
|
+
"--- INSTRUCTIONS ---\n"
|
|
809
|
+
"- Synthesize a clear and complete answer for the user based ONLY on the information in the 'Summary of Findings'.\n"
|
|
810
|
+
"- Address the user directly and answer their latest query, considering the full conversation.\n"
|
|
811
|
+
"- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
|
|
812
|
+
"- Format your response clearly using markdown where appropriate.\n"
|
|
907
813
|
)
|
|
908
|
-
|
|
909
|
-
if streaming_callback:
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
turn_history.append({"type":"error", "content":f"LLM failed to generate final answer: {final_answer_text['error']}"})
|
|
914
|
-
return {"final_answer": "", "tool_calls": tool_calls_made_this_turn, "error": final_answer_text["error"]}
|
|
915
|
-
|
|
916
|
-
turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
|
|
917
|
-
return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
918
|
-
|
|
814
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
|
|
815
|
+
if streaming_callback: streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
|
|
816
|
+
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
817
|
+
turn_history.append({"type":"final_answer_generated", "content": final_answer})
|
|
818
|
+
return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
919
819
|
def generate_text_with_rag(
|
|
920
820
|
self,
|
|
921
821
|
prompt: str,
|
|
922
822
|
rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
|
|
823
|
+
system_prompt: str = "",
|
|
824
|
+
objective_extraction_system_prompt="Extract objectives",
|
|
923
825
|
rag_query_text: Optional[str] = None,
|
|
924
826
|
rag_vectorizer_name: Optional[str] = None,
|
|
925
827
|
rag_top_k: int = 5,
|
|
926
828
|
rag_min_similarity_percent: float = 70.0,
|
|
927
|
-
max_rag_hops: int =
|
|
829
|
+
max_rag_hops: int = 3,
|
|
928
830
|
images: Optional[List[str]] = None,
|
|
929
|
-
system_prompt: str = "",
|
|
930
831
|
n_predict: Optional[int] = None,
|
|
931
832
|
stream: Optional[bool] = None,
|
|
932
833
|
temperature: Optional[float] = None,
|
|
@@ -943,13 +844,11 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
943
844
|
**llm_generation_kwargs
|
|
944
845
|
) -> Dict[str, Any]:
|
|
945
846
|
"""
|
|
946
|
-
Enhanced RAG with
|
|
947
|
-
when context grows beyond ctx_size or self.default_ctx_size.
|
|
847
|
+
Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
|
|
948
848
|
"""
|
|
949
849
|
if not self.binding:
|
|
950
850
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
951
851
|
|
|
952
|
-
# Determine effective context size limit
|
|
953
852
|
effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
|
|
954
853
|
|
|
955
854
|
turn_rag_history_for_callback: List[Dict[str, Any]] = []
|
|
@@ -957,183 +856,251 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
957
856
|
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
|
|
958
857
|
|
|
959
858
|
original_user_prompt = prompt
|
|
960
|
-
|
|
961
|
-
|
|
859
|
+
|
|
860
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
861
|
+
current_objectives = ""
|
|
862
|
+
|
|
962
863
|
if extract_objectives:
|
|
963
864
|
if streaming_callback:
|
|
964
|
-
streaming_callback("Extracting
|
|
865
|
+
streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
|
|
866
|
+
|
|
965
867
|
obj_prompt = (
|
|
966
|
-
"You are an expert analyst. "
|
|
967
|
-
"
|
|
968
|
-
"Output a
|
|
868
|
+
"You are an expert analyst. Your task is to extract and structure the key research objectives from the user's request below. "
|
|
869
|
+
"These objectives will guide a research process. Frame them as questions or tasks. "
|
|
870
|
+
"Output a bulleted list of objectives only without a comment.\n\n"
|
|
969
871
|
f"User request:\n\"{original_user_prompt}\""
|
|
970
872
|
)
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
temperature=0.0,
|
|
975
|
-
n_predict=200,
|
|
976
|
-
stream=False
|
|
977
|
-
)
|
|
978
|
-
objectives_text = self.remove_thinking_blocks(obj_gen).strip()
|
|
873
|
+
initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
874
|
+
current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
|
|
875
|
+
|
|
979
876
|
if streaming_callback:
|
|
980
|
-
streaming_callback(f"Objectives
|
|
877
|
+
streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
|
|
878
|
+
else:
|
|
879
|
+
current_objectives = f"Answer the user's request: '{original_user_prompt}'"
|
|
981
880
|
|
|
982
|
-
|
|
983
|
-
|
|
881
|
+
if streaming_callback:
|
|
882
|
+
streaming_callback("Generating initial search query...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
|
|
883
|
+
if not rag_query_text:
|
|
884
|
+
initial_query_gen_prompt = f"""
|
|
885
|
+
You are a research assistant. Your task is to formulate the first search query for a vector database based on an initial user request and research objectives. The query should be concise and target the most crucial information needed to start.
|
|
984
886
|
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
txt_sp = (
|
|
994
|
-
"Your objective is to analyze the provided chunks of information to determine "
|
|
995
|
-
"whether they are sufficient to reach the objective. If not, formulate a refined and focused query "
|
|
996
|
-
"that can retrieve more relevant information from a vector database. Ensure the query captures the semantic essence "
|
|
997
|
-
"of what is missing, is contextually independent, and is optimized for vector-based similarity search. "
|
|
998
|
-
"Do not repeat or rephrase earlier queries—always generate a new, meaningful atomic query targeting the current gap in knowledge."
|
|
999
|
-
)
|
|
887
|
+
--- User's Request ---
|
|
888
|
+
{original_user_prompt}
|
|
889
|
+
|
|
890
|
+
--- Initial Research Objectives ---
|
|
891
|
+
{current_objectives}
|
|
892
|
+
|
|
893
|
+
--- INSTRUCTIONS ---
|
|
894
|
+
Generate a single, effective search query.
|
|
1000
895
|
|
|
1001
|
-
|
|
896
|
+
--- OUTPUT FORMAT ---
|
|
897
|
+
Provide your response as a single JSON object with one key, "query".
|
|
1002
898
|
```json
|
|
1003
|
-
{
|
|
1004
|
-
"
|
|
1005
|
-
|
|
1006
|
-
It should capture the missing concept or insight in concise, context-rich language, avoiding reuse of earlier queries.
|
|
1007
|
-
}
|
|
899
|
+
{{
|
|
900
|
+
"query": "Your generated search query here."
|
|
901
|
+
}}
|
|
1008
902
|
```
|
|
1009
903
|
"""
|
|
1010
|
-
p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
|
|
1011
|
-
response = self.generate_code(p,system_prompt=txt_sp)
|
|
1012
904
|
try:
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
905
|
+
raw_initial_query_response = self.generate_code(initial_query_gen_prompt, system_prompt="You are a query generation expert.", temperature=0.0)
|
|
906
|
+
initial_plan = json.loads(raw_initial_query_response)
|
|
907
|
+
current_query_for_rag = initial_plan.get("query")
|
|
908
|
+
if not current_query_for_rag:
|
|
909
|
+
raise ValueError("LLM returned an empty initial query.")
|
|
910
|
+
if streaming_callback:
|
|
911
|
+
streaming_callback(f"Initial query generated:\n'{current_query_for_rag}'", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
|
|
912
|
+
except Exception as e:
|
|
913
|
+
trace_exception(e)
|
|
914
|
+
current_query_for_rag = original_user_prompt
|
|
915
|
+
if streaming_callback:
|
|
916
|
+
streaming_callback(f"Failed to generate initial query, falling back to user prompt. Error: {e}", MSG_TYPE.MSG_TYPE_WARNING, {"id": "initial_query_failure"}, turn_rag_history_for_callback)
|
|
917
|
+
else:
|
|
918
|
+
current_query_for_rag=rag_query_text
|
|
919
|
+
|
|
920
|
+
previous_queries = []
|
|
1024
921
|
|
|
922
|
+
for hop_count in range(max_rag_hops):
|
|
1025
923
|
if streaming_callback:
|
|
1026
|
-
streaming_callback(f"
|
|
1027
|
-
|
|
1028
|
-
|
|
924
|
+
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
925
|
+
|
|
926
|
+
if streaming_callback:
|
|
927
|
+
streaming_callback(f"Executing Query:\n{current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query_exec_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
928
|
+
|
|
1029
929
|
try:
|
|
1030
|
-
|
|
930
|
+
retrieved_chunks = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
1031
931
|
except Exception as e:
|
|
1032
932
|
trace_exception(e)
|
|
1033
933
|
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
|
|
1034
934
|
|
|
1035
935
|
hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
|
|
1036
936
|
previous_queries.append(current_query_for_rag)
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
937
|
+
|
|
938
|
+
newly_retrieved_text = ""
|
|
939
|
+
new_chunks_count = 0
|
|
940
|
+
if retrieved_chunks:
|
|
941
|
+
for chunk in retrieved_chunks:
|
|
942
|
+
doc = chunk.get("file_path", "Unknown")
|
|
943
|
+
content = str(chunk.get("chunk_text", ""))
|
|
944
|
+
sim = float(chunk.get("similarity_percent", 0.0))
|
|
945
|
+
detail = {"document": doc, "similarity": sim, "content": content, "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
|
|
946
|
+
hop_details["retrieved_chunks_details"].append(detail)
|
|
947
|
+
|
|
948
|
+
key = f"{doc}::{content[:100]}"
|
|
949
|
+
if key not in all_unique_retrieved_chunks_map:
|
|
950
|
+
all_unique_retrieved_chunks_map[key] = detail
|
|
951
|
+
newly_retrieved_text += f"--- Document: {doc} (Similarity: {sim:.1f}%)\n{content}\n---\n"
|
|
952
|
+
new_chunks_count += 1
|
|
953
|
+
|
|
954
|
+
hop_details["status"] = f"Completed, found {len(retrieved_chunks)} chunks ({new_chunks_count} new)."
|
|
1054
955
|
rag_hops_details_list.append(hop_details)
|
|
956
|
+
|
|
1055
957
|
if streaming_callback:
|
|
1056
|
-
streaming_callback(f"
|
|
958
|
+
streaming_callback(f"Retrieved {len(retrieved_chunks)} chunks ({new_chunks_count} new).", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retrieval_info_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
959
|
+
|
|
960
|
+
if new_chunks_count == 0 and hop_count > 0:
|
|
961
|
+
if streaming_callback:
|
|
962
|
+
streaming_callback("No new unique information found, stopping RAG hops.", MSG_TYPE.MSG_TYPE_INFO, {"id": "rag_stop_no_new_info"}, turn_rag_history_for_callback)
|
|
963
|
+
break
|
|
1057
964
|
|
|
1058
965
|
if streaming_callback:
|
|
1059
|
-
streaming_callback(
|
|
966
|
+
streaming_callback("Analyzing findings and refining plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
967
|
+
|
|
968
|
+
planning_system_prompt = (
|
|
969
|
+
"You are a strategic research agent via multiple hops. Your task is to analyze new information, update your "
|
|
970
|
+
"understanding, refine your research objectives, and decide on the next best action."
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
planning_prompt = f"""
|
|
974
|
+
--- Original User Request ---
|
|
975
|
+
{original_user_prompt}
|
|
976
|
+
|
|
977
|
+
--- Objectives you have formulated ---
|
|
978
|
+
{current_objectives}
|
|
979
|
+
|
|
980
|
+
--- Existing Knowledge Scratchpad (Summary of previous findings) ---
|
|
981
|
+
{knowledge_scratchpad}
|
|
982
|
+
|
|
983
|
+
--- Newly Retrieved Documents for this Hop ---
|
|
984
|
+
{newly_retrieved_text if newly_retrieved_text else "No new documents were found with the last query."}
|
|
985
|
+
|
|
986
|
+
--- Previous Queries (for reference, do not repeat) ---
|
|
987
|
+
- {"- ".join(previous_queries)}
|
|
988
|
+
|
|
989
|
+
--- INSTRUCTIONS ---
|
|
990
|
+
1. **Analyze & Update Knowledge:** Read the 'Newly Retrieved Documents'. Summarize the most important new facts and insights into a few bullet points for the 'new_notes_for_scratchpad'.
|
|
991
|
+
2. **Refine Objectives:** Review the 'Current Research Objectives'. Do the new documents answer any objectives? Do they reveal that some objectives need to be changed or made more specific? Rewrite the complete, updated list of objectives.
|
|
992
|
+
3. **Decide & Plan Next Query:** Based on your updated objectives and knowledge, decide if you have enough information to form a final answer.
|
|
993
|
+
- If YES, set `decision` to `false`.
|
|
994
|
+
- If NO, set `decision` to `true` and formulate a new, focused `query` to address the most critical remaining gap in your knowledge. The query must be different from previous ones.
|
|
995
|
+
|
|
996
|
+
--- OUTPUT FORMAT ---
|
|
997
|
+
Provide your response as a single JSON object inside a JSON markdown tag. Use this exact schema:
|
|
998
|
+
```json
|
|
999
|
+
{{
|
|
1000
|
+
"updated_objectives": "(string) A bulleted list of the new, refined objectives based on the latest information.",
|
|
1001
|
+
"new_notes_for_scratchpad": "(string) A concise summary in bullet points of key findings from the new documents.",
|
|
1002
|
+
"decision": "boolean (true if you need to query again, false if you are done).",
|
|
1003
|
+
"query": "(string, optional) The next query for the vector database if decision is true."
|
|
1004
|
+
}}
|
|
1005
|
+
```
|
|
1006
|
+
"""
|
|
1007
|
+
raw_planning_response = self.generate_code(planning_prompt, system_prompt=planning_system_prompt, temperature=0.0)
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
plan = robust_json_parser(raw_planning_response)
|
|
1011
|
+
|
|
1012
|
+
raw_notes = plan.get("new_notes_for_scratchpad")
|
|
1013
|
+
if isinstance(raw_notes, list):
|
|
1014
|
+
notes_from_hop = "\n".join(str(item) for item in raw_notes if item).strip()
|
|
1015
|
+
elif isinstance(raw_notes, str):
|
|
1016
|
+
notes_from_hop = raw_notes.strip()
|
|
1017
|
+
else:
|
|
1018
|
+
notes_from_hop = ""
|
|
1019
|
+
|
|
1020
|
+
if notes_from_hop:
|
|
1021
|
+
if knowledge_scratchpad == "No information gathered yet.":
|
|
1022
|
+
knowledge_scratchpad = f"Findings from Hop {hop_count + 1}:\n{notes_from_hop}"
|
|
1023
|
+
else:
|
|
1024
|
+
knowledge_scratchpad += f"\n\nFindings from Hop {hop_count + 1}:\n{notes_from_hop}"
|
|
1025
|
+
|
|
1026
|
+
raw_objectives = plan.get("updated_objectives")
|
|
1027
|
+
if isinstance(raw_objectives, list):
|
|
1028
|
+
current_objectives = "\n".join(str(item) for item in raw_objectives if item).strip()
|
|
1029
|
+
elif isinstance(raw_objectives, str) and raw_objectives.strip():
|
|
1030
|
+
current_objectives = raw_objectives.strip()
|
|
1031
|
+
|
|
1032
|
+
if streaming_callback:
|
|
1033
|
+
streaming_callback(f"Refined Objectives:\n{current_objectives}\n\nNew Learnings:\n{notes_from_hop}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"planning_output_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1060
1034
|
|
|
1035
|
+
if not plan.get("decision", False):
|
|
1036
|
+
if streaming_callback:
|
|
1037
|
+
streaming_callback("LLM decided it has enough information.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1038
|
+
break
|
|
1039
|
+
else:
|
|
1040
|
+
next_query = plan.get("query")
|
|
1041
|
+
if not next_query:
|
|
1042
|
+
if streaming_callback:
|
|
1043
|
+
streaming_callback("LLM decided to continue but provided no query. Stopping.", MSG_TYPE.MSG_TYPE_WARNING, {"id": "rag_stop_no_query"}, turn_rag_history_for_callback)
|
|
1044
|
+
break
|
|
1045
|
+
current_query_for_rag = next_query
|
|
1046
|
+
|
|
1047
|
+
except Exception as ex:
|
|
1048
|
+
trace_exception(ex)
|
|
1049
|
+
if streaming_callback:
|
|
1050
|
+
streaming_callback(f"Error processing planning step: {ex}. Stopping RAG.", MSG_TYPE.MSG_TYPE_EXCEPTION, {"id": f"planning_error_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1051
|
+
break
|
|
1061
1052
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1053
|
+
if streaming_callback:
|
|
1054
|
+
streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
|
|
1055
|
+
|
|
1056
|
+
sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(), key=lambda c: c["similarity"], reverse=True)
|
|
1065
1057
|
context_lines = []
|
|
1066
1058
|
total_chars = 0
|
|
1067
1059
|
for c in sorted_chunks:
|
|
1068
|
-
snippet = (
|
|
1069
|
-
|
|
1070
|
-
f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
|
|
1071
|
-
f"{c['content']}\n---\n"
|
|
1072
|
-
)
|
|
1073
|
-
if total_chars + len(snippet) > max_rag_context_characters:
|
|
1074
|
-
break
|
|
1060
|
+
snippet = (f"Source: {c['document']} (Sim: {c['similarity']:.1f}%)\n{c['content']}\n---\n")
|
|
1061
|
+
if total_chars + len(snippet) > max_rag_context_characters: break
|
|
1075
1062
|
context_lines.append(snippet)
|
|
1076
1063
|
total_chars += len(snippet)
|
|
1077
|
-
|
|
1078
1064
|
accumulated_context = "".join(context_lines)
|
|
1079
1065
|
|
|
1080
|
-
# If context exceeds our effective limit, summarize it
|
|
1081
1066
|
if self.count_tokens(accumulated_context) > effective_ctx_size:
|
|
1082
|
-
|
|
1083
|
-
streaming_callback("Context too large, performing intermediate summary...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
|
|
1084
|
-
summary_prompt = (
|
|
1085
|
-
"Summarize the following gathered context into a concise form "
|
|
1086
|
-
"that preserves all key facts and sources needed to answer the user's request:\n\n"
|
|
1087
|
-
f"{accumulated_context}"
|
|
1088
|
-
)
|
|
1089
|
-
summary = self.generate_text(
|
|
1090
|
-
prompt=summary_prompt,
|
|
1091
|
-
system_prompt="Intermediate summary",
|
|
1092
|
-
temperature=0.0,
|
|
1093
|
-
n_predict= n_predict or 512,
|
|
1094
|
-
stream=False
|
|
1095
|
-
)
|
|
1096
|
-
accumulated_context = self.remove_thinking_blocks(summary).strip()
|
|
1097
|
-
if streaming_callback:
|
|
1098
|
-
streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
|
|
1067
|
+
pass
|
|
1099
1068
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1069
|
+
if streaming_callback:
|
|
1070
|
+
streaming_callback("Compiling final answer from all findings...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1071
|
+
|
|
1072
|
+
final_prompt_parts = [
|
|
1073
|
+
f"**User's Original Request:**\n{original_user_prompt}\n",
|
|
1074
|
+
f"**Final Research Objectives:**\n{current_objectives}\n",
|
|
1075
|
+
f"**Knowledge Scratchpad (Summary of Findings):**\n{knowledge_scratchpad}\n",
|
|
1103
1076
|
]
|
|
1104
|
-
if objectives_text:
|
|
1105
|
-
final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
|
|
1106
1077
|
if accumulated_context:
|
|
1107
|
-
|
|
1108
|
-
"
|
|
1109
|
-
f"{accumulated_context}\n
|
|
1078
|
+
final_prompt_parts.append(
|
|
1079
|
+
"**Supporting Raw Context from Retrieved Documents:**\n---\n"
|
|
1080
|
+
f"{accumulated_context}\n---\n"
|
|
1110
1081
|
)
|
|
1111
1082
|
else:
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
"
|
|
1083
|
+
final_prompt_parts.append("**Supporting Raw Context:**\n(No relevant documents were retrieved.)\n")
|
|
1084
|
+
|
|
1085
|
+
final_prompt_parts.append(
|
|
1086
|
+
"**Final Instruction:**\nSynthesize a comprehensive answer to the user's original request. "
|
|
1087
|
+
"Use the 'Knowledge Scratchpad' as your primary source of information and the 'Supporting Raw Context' for specific details and quotes. "
|
|
1088
|
+
"Adhere strictly to the information provided. If the information is insufficient to fully answer, state what is missing based on your 'Final Research Objectives'."
|
|
1116
1089
|
)
|
|
1117
|
-
|
|
1090
|
+
final_prompt_parts.append(self.ai_full_header)
|
|
1118
1091
|
|
|
1119
1092
|
final_answer = self.generate_text(
|
|
1120
|
-
prompt="\n".join(
|
|
1121
|
-
images=images,
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
stream=stream,
|
|
1125
|
-
temperature=temperature,
|
|
1126
|
-
top_k=top_k,
|
|
1127
|
-
top_p=top_p,
|
|
1128
|
-
repeat_penalty=repeat_penalty,
|
|
1129
|
-
repeat_last_n=repeat_last_n,
|
|
1130
|
-
seed=seed,
|
|
1131
|
-
n_threads=n_threads,
|
|
1132
|
-
ctx_size=ctx_size,
|
|
1093
|
+
prompt="\n".join(final_prompt_parts),
|
|
1094
|
+
images=images, system_prompt=system_prompt, n_predict=n_predict, stream=stream,
|
|
1095
|
+
temperature=temperature, top_k=top_k, top_p=top_p, repeat_penalty=repeat_penalty,
|
|
1096
|
+
repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads, ctx_size=ctx_size,
|
|
1133
1097
|
streaming_callback=streaming_callback if stream else None,
|
|
1134
1098
|
**llm_generation_kwargs
|
|
1135
1099
|
)
|
|
1136
1100
|
answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
|
|
1101
|
+
|
|
1102
|
+
if streaming_callback:
|
|
1103
|
+
streaming_callback("Final answer generated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1137
1104
|
|
|
1138
1105
|
return {
|
|
1139
1106
|
"final_answer": answer_text,
|
|
@@ -1141,7 +1108,303 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
1141
1108
|
"all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
|
|
1142
1109
|
"error": None
|
|
1143
1110
|
}
|
|
1111
|
+
|
|
1112
|
+
def generate_with_mcp_rag(
|
|
1113
|
+
self,
|
|
1114
|
+
prompt: str,
|
|
1115
|
+
rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
|
|
1116
|
+
system_prompt: str = None,
|
|
1117
|
+
objective_extraction_system_prompt="Extract objectives",
|
|
1118
|
+
images: Optional[List[str]] = None,
|
|
1119
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1120
|
+
max_tool_calls: int = 10,
|
|
1121
|
+
max_llm_iterations: int = 15,
|
|
1122
|
+
tool_call_decision_temperature: float = 0.0,
|
|
1123
|
+
final_answer_temperature: float = None,
|
|
1124
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
|
|
1125
|
+
build_plan: bool = True,
|
|
1126
|
+
rag_vectorizer_name: Optional[str] = None,
|
|
1127
|
+
rag_top_k: int = 5,
|
|
1128
|
+
rag_min_similarity_percent: float = 70.0,
|
|
1129
|
+
**llm_generation_kwargs
|
|
1130
|
+
) -> Dict[str, Any]:
|
|
1131
|
+
"""
|
|
1132
|
+
Generates a response using a stateful agent that can choose between calling standard
|
|
1133
|
+
MCP tools and querying a RAG database, all within a unified reasoning loop.
|
|
1134
|
+
"""
|
|
1135
|
+
if not self.binding:
|
|
1136
|
+
return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
|
|
1137
|
+
if not self.mcp:
|
|
1138
|
+
return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
|
|
1139
|
+
|
|
1140
|
+
# --- Initialize Agent State ---
|
|
1141
|
+
turn_history: List[Dict[str, Any]] = []
|
|
1142
|
+
original_user_prompt = prompt
|
|
1143
|
+
knowledge_scratchpad = "No information gathered yet."
|
|
1144
|
+
current_objectives = ""
|
|
1145
|
+
agent_work_history = []
|
|
1146
|
+
tool_calls_made_this_turn = []
|
|
1147
|
+
llm_iterations = 0
|
|
1148
|
+
|
|
1149
|
+
# --- 1. Discover MCP Tools and Inject the RAG Tool ---
|
|
1150
|
+
if tools is None:
|
|
1151
|
+
try:
|
|
1152
|
+
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1153
|
+
if not mcp_tools: ASCIIColors.warning("No MCP tools discovered.")
|
|
1154
|
+
except Exception as e_disc:
|
|
1155
|
+
return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
|
|
1156
|
+
else:
|
|
1157
|
+
mcp_tools = tools
|
|
1158
|
+
|
|
1159
|
+
# Define the RAG tool and add it to the list
|
|
1160
|
+
rag_tool_definition = {
|
|
1161
|
+
"name": "research::query_database",
|
|
1162
|
+
"description": (
|
|
1163
|
+
"Queries a vector database to find relevant text chunks based on a natural language query. "
|
|
1164
|
+
"Use this to gather information, answer questions, or find context for a task before using other tools."
|
|
1165
|
+
),
|
|
1166
|
+
"input_schema": {
|
|
1167
|
+
"type": "object",
|
|
1168
|
+
"properties": {
|
|
1169
|
+
"query": {
|
|
1170
|
+
"type": "string",
|
|
1171
|
+
"description": "The natural language query to search for. Be specific to get the best results."
|
|
1172
|
+
}
|
|
1173
|
+
},
|
|
1174
|
+
"required": ["query"]
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
available_tools = [rag_tool_definition] + mcp_tools
|
|
1178
|
+
|
|
1179
|
+
# --- 2. Optional Initial Objectives Extraction ---
|
|
1180
|
+
formatted_tools_list = "\n".join([
|
|
1181
|
+
f"- Full Tool Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
|
|
1182
|
+
for t in available_tools
|
|
1183
|
+
])
|
|
1184
|
+
if build_plan:
|
|
1185
|
+
if streaming_callback:
|
|
1186
|
+
streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_history)
|
|
1187
|
+
|
|
1188
|
+
# The enhanced prompt is placed inside the original parenthesis format.
|
|
1189
|
+
# The f-strings for tool lists and user prompts are preserved.
|
|
1190
|
+
|
|
1191
|
+
obj_prompt = (
|
|
1192
|
+
"You are a hyper-efficient and logical project planner. Your sole purpose is to analyze the user's request and create a concise, numbered list of actionable steps to fulfill it.\n\n"
|
|
1193
|
+
"Your plan must be the most direct and minimal path to the user's goal.\n\n"
|
|
1194
|
+
"**Your Core Directives:**\n\n"
|
|
1195
|
+
"1. **Analyze the Request:** Break down the user's prompt into the essential, core tasks required.\n"
|
|
1196
|
+
"2. **Evaluate Tools with Extreme Scrutiny:** For each task, determine if a tool is **absolutely necessary**. Do not suggest a tool unless the task is impossible without it.\n"
|
|
1197
|
+
"3. **Prioritize Simplicity:** If the request can be answered directly without any tools (e.g., it's a simple question or requires a creative response), your entire plan should be a single step: \"1. Formulate a direct answer to the user's request.\"\n\n"
|
|
1198
|
+
"**CRITICAL RULES:**\n"
|
|
1199
|
+
"* **DO NOT** add any steps, objectives, or tool uses that were not explicitly required by the user.\n"
|
|
1200
|
+
"* **DO NOT** attempt to use a tool just because it is available. Most requests will not require any tools.\n"
|
|
1201
|
+
"* **DO NOT** add \"nice-to-have\" or \"extra\" tasks. Stick strictly to the request.\n\n"
|
|
1202
|
+
"Your final output must be a short, numbered list of steps. Do not call any tools in this planning phase.\n\n"
|
|
1203
|
+
"---\n"
|
|
1204
|
+
"**Available Tools:**\n"
|
|
1205
|
+
f"{formatted_tools_list}\n\n"
|
|
1206
|
+
"**User Request:**\n"
|
|
1207
|
+
f'"{original_user_prompt}"'
|
|
1208
|
+
)
|
|
1209
|
+
initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
|
|
1210
|
+
current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
|
|
1211
|
+
|
|
1212
|
+
if streaming_callback:
|
|
1213
|
+
streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_history)
|
|
1214
|
+
else:
|
|
1215
|
+
current_objectives = f"Fulfill the user's request: '{original_user_prompt}'"
|
|
1144
1216
|
|
|
1217
|
+
turn_history.append({"type": "initial_objectives", "content": current_objectives})
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
|
|
1221
|
+
# --- 3. Main Agent Loop ---
|
|
1222
|
+
while llm_iterations < max_llm_iterations:
|
|
1223
|
+
llm_iterations += 1
|
|
1224
|
+
if streaming_callback:
|
|
1225
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1226
|
+
|
|
1227
|
+
# Format agent history for the prompt
|
|
1228
|
+
formatted_agent_history = "No actions taken yet."
|
|
1229
|
+
if agent_work_history:
|
|
1230
|
+
history_parts = []
|
|
1231
|
+
for i, entry in enumerate(agent_work_history):
|
|
1232
|
+
history_parts.append(
|
|
1233
|
+
f"### Step {i+1}:\n"
|
|
1234
|
+
f"**Thought:** {entry['thought']}\n"
|
|
1235
|
+
f"**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n"
|
|
1236
|
+
f"**Observation:**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```"
|
|
1237
|
+
)
|
|
1238
|
+
formatted_agent_history = "\n\n".join(history_parts)
|
|
1239
|
+
|
|
1240
|
+
# Construct the "Thinking & Planning" prompt
|
|
1241
|
+
decision_prompt_template = f"""You are a strategic AI assistant. Your goal is to achieve a set of objectives by intelligently using research and system tools.
|
|
1242
|
+
|
|
1243
|
+
--- AVAILABLE TOOLS ---
|
|
1244
|
+
{formatted_tools_list}
|
|
1245
|
+
|
|
1246
|
+
--- CURRENT STATE ---
|
|
1247
|
+
Original User Request: {original_user_prompt}
|
|
1248
|
+
Current Research Objectives:
|
|
1249
|
+
{current_objectives}
|
|
1250
|
+
|
|
1251
|
+
Knowledge Scratchpad (our current understanding):
|
|
1252
|
+
{knowledge_scratchpad}
|
|
1253
|
+
|
|
1254
|
+
--- AGENT WORK HISTORY (previous steps in this turn) ---
|
|
1255
|
+
{formatted_agent_history}
|
|
1256
|
+
|
|
1257
|
+
--- INSTRUCTIONS ---
|
|
1258
|
+
1. **Analyze:** Review the entire work history, objectives, and scratchpad.
|
|
1259
|
+
2. **Update State:** Based on the latest observations, update the scratchpad and refine the objectives. The scratchpad should be a comprehensive summary of ALL knowledge gathered.
|
|
1260
|
+
3. **Decide Next Action:** Choose ONE of the following: `call_tool`, `final_answer`, or `clarify`. Always prefer to gather information with `research::query_database` before attempting to use other tools if you lack context.
|
|
1261
|
+
|
|
1262
|
+
--- OUTPUT FORMAT ---
|
|
1263
|
+
Respond with a single JSON object inside a JSON markdown tag. Use this exact schema:
|
|
1264
|
+
```json
|
|
1265
|
+
{{
|
|
1266
|
+
"thought": "Your reasoning for the chosen action, analyzing how the work history informs your next step. Explain why you are choosing a specific tool (or to answer).",
|
|
1267
|
+
"updated_scratchpad": "The new, complete, and comprehensive summary of all knowledge gathered. Integrate new findings with old ones. if no new knowledge is gathered, this should be an empty string.",
|
|
1268
|
+
"updated_objectives": "The full, potentially revised, list of objectives. If no change, repeat the current list.",
|
|
1269
|
+
"action": "The chosen action: 'call_tool', 'final_answer', or 'clarify'.",
|
|
1270
|
+
"tool_name": "(string, if action is 'call_tool') The full 'alias::tool_name' of the tool to use.",
|
|
1271
|
+
"tool_params": {{"query": "...", "param2": "..."}},
|
|
1272
|
+
"clarification_request": "(string, if action is 'clarify') Your question to the user."
|
|
1273
|
+
}}
|
|
1274
|
+
```
|
|
1275
|
+
"""
|
|
1276
|
+
raw_llm_decision_json = self.generate_text(
|
|
1277
|
+
prompt=decision_prompt_template, n_predict=2048, temperature=tool_call_decision_temperature
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
# --- 4. Parse LLM's plan and update state ---
|
|
1281
|
+
try:
|
|
1282
|
+
llm_decision = robust_json_parser(raw_llm_decision_json)
|
|
1283
|
+
turn_history.append({"type": "llm_plan", "content": llm_decision})
|
|
1284
|
+
|
|
1285
|
+
current_objectives = llm_decision.get("updated_objectives", current_objectives)
|
|
1286
|
+
new_scratchpad = llm_decision.get("updated_scratchpad")
|
|
1287
|
+
|
|
1288
|
+
if new_scratchpad and new_scratchpad != knowledge_scratchpad:
|
|
1289
|
+
knowledge_scratchpad = new_scratchpad
|
|
1290
|
+
if streaming_callback:
|
|
1291
|
+
streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP, {"id": "scratchpad_update"}, turn_history)
|
|
1292
|
+
streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id":"scratch_pad_update"}, turn_history)
|
|
1293
|
+
|
|
1294
|
+
except (json.JSONDecodeError, AttributeError, KeyError) as e:
|
|
1295
|
+
ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}. Error: {e}")
|
|
1296
|
+
turn_history.append({"type": "error", "content": f"Failed to parse LLM plan: {raw_llm_decision_json}"})
|
|
1297
|
+
break
|
|
1298
|
+
|
|
1299
|
+
if streaming_callback:
|
|
1300
|
+
streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
|
|
1301
|
+
|
|
1302
|
+
# --- 5. Execute the chosen action ---
|
|
1303
|
+
action = llm_decision.get("action")
|
|
1304
|
+
tool_result = None
|
|
1305
|
+
|
|
1306
|
+
if action == "call_tool":
|
|
1307
|
+
if len(tool_calls_made_this_turn) >= max_tool_calls:
|
|
1308
|
+
ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
|
|
1309
|
+
break
|
|
1310
|
+
|
|
1311
|
+
tool_name = llm_decision.get("tool_name")
|
|
1312
|
+
tool_params = llm_decision.get("tool_params", {})
|
|
1313
|
+
|
|
1314
|
+
if not tool_name or not isinstance(tool_params, dict):
|
|
1315
|
+
ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
|
|
1316
|
+
break
|
|
1317
|
+
|
|
1318
|
+
if streaming_callback:
|
|
1319
|
+
streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
1320
|
+
|
|
1321
|
+
try:
|
|
1322
|
+
# ** DYNAMIC TOOL/RAG DISPATCH **
|
|
1323
|
+
if tool_name == "research::query_database":
|
|
1324
|
+
query = tool_params.get("query")
|
|
1325
|
+
if not query:
|
|
1326
|
+
tool_result = {"error": "RAG tool called without a 'query' parameter."}
|
|
1327
|
+
else:
|
|
1328
|
+
retrieved_chunks = rag_query_function(query, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
1329
|
+
if not retrieved_chunks:
|
|
1330
|
+
tool_result = {"summary": "No relevant documents found for the query.", "chunks": []}
|
|
1331
|
+
else:
|
|
1332
|
+
tool_result = {
|
|
1333
|
+
"summary": f"Found {len(retrieved_chunks)} relevant document chunks.",
|
|
1334
|
+
"chunks": retrieved_chunks
|
|
1335
|
+
}
|
|
1336
|
+
else:
|
|
1337
|
+
# Standard MCP tool execution
|
|
1338
|
+
tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
|
|
1339
|
+
|
|
1340
|
+
except Exception as e_exec:
|
|
1341
|
+
trace_exception(e_exec)
|
|
1342
|
+
tool_result = {"error": f"An exception occurred while executing tool '{tool_name}': {e_exec}"}
|
|
1343
|
+
|
|
1344
|
+
# Record the work cycle in the agent's history
|
|
1345
|
+
work_entry = {
|
|
1346
|
+
"thought": llm_decision.get("thought", "N/A"),
|
|
1347
|
+
"tool_name": tool_name,
|
|
1348
|
+
"tool_params": tool_params,
|
|
1349
|
+
"tool_result": tool_result
|
|
1350
|
+
}
|
|
1351
|
+
agent_work_history.append(work_entry)
|
|
1352
|
+
tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
1353
|
+
|
|
1354
|
+
if streaming_callback:
|
|
1355
|
+
streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
|
|
1356
|
+
streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
|
|
1357
|
+
|
|
1358
|
+
elif action == "clarify":
|
|
1359
|
+
clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
|
|
1360
|
+
return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True}
|
|
1361
|
+
|
|
1362
|
+
elif action == "final_answer":
|
|
1363
|
+
ASCIIColors.info("LLM decided to formulate a final answer.")
|
|
1364
|
+
break
|
|
1365
|
+
|
|
1366
|
+
else:
|
|
1367
|
+
ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
|
|
1368
|
+
break
|
|
1369
|
+
if streaming_callback:
|
|
1370
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1371
|
+
|
|
1372
|
+
if streaming_callback:
|
|
1373
|
+
streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
|
|
1374
|
+
# --- 6. Generate Final Answer ---
|
|
1375
|
+
if streaming_callback:
|
|
1376
|
+
streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
|
|
1377
|
+
|
|
1378
|
+
final_answer_prompt = f"""You are an AI assistant providing a final, comprehensive answer based on research and tool use.
|
|
1379
|
+
|
|
1380
|
+
--- CONTEXT ---
|
|
1381
|
+
Original User Request: "{original_user_prompt}"
|
|
1382
|
+
|
|
1383
|
+
--- SUMMARY OF FINDINGS (Knowledge Scratchpad) ---
|
|
1384
|
+
{knowledge_scratchpad}
|
|
1385
|
+
|
|
1386
|
+
--- INSTRUCTIONS ---
|
|
1387
|
+
- Synthesize a clear, complete answer for the user based ONLY on the information in the 'Summary of Findings'.
|
|
1388
|
+
- Address the user directly and answer their original request.
|
|
1389
|
+
- Do not make up information. If the findings are insufficient, state what you found and what remains unanswered.
|
|
1390
|
+
"""
|
|
1391
|
+
final_answer_text = self.generate_text(
|
|
1392
|
+
prompt=final_answer_prompt,
|
|
1393
|
+
system_prompt=system_prompt,
|
|
1394
|
+
images=images,
|
|
1395
|
+
stream=streaming_callback is not None,
|
|
1396
|
+
streaming_callback=streaming_callback,
|
|
1397
|
+
temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
|
|
1398
|
+
**(llm_generation_kwargs or {})
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
if streaming_callback:
|
|
1402
|
+
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
|
|
1403
|
+
|
|
1404
|
+
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
1405
|
+
turn_history.append({"type":"final_answer_generated", "content": final_answer})
|
|
1406
|
+
|
|
1407
|
+
return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
1145
1408
|
|
|
1146
1409
|
def generate_code(
|
|
1147
1410
|
self,
|