lollms-client 0.20.7__py3-none-any.whl → 0.20.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -2,7 +2,7 @@
2
2
  import requests
3
3
  from ascii_colors import ASCIIColors, trace_exception
4
4
  from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
5
- from lollms_client.lollms_utilities import encode_image # Keep utilities needed by core
5
+ from lollms_client.lollms_utilities import robust_json_parser # Keep utilities needed by core
6
6
  from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
7
7
  # Import new Abstract Base Classes and Managers
8
8
  from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
@@ -597,336 +597,327 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
597
597
  response_full += response
598
598
  codes = self.extract_code_blocks(response, format=code_tag_format)
599
599
  return codes
600
+
601
+ def _synthesize_knowledge(
602
+ self,
603
+ previous_scratchpad: str,
604
+ tool_name: str,
605
+ tool_params: dict,
606
+ tool_result: dict
607
+ ) -> str:
608
+ """
609
+ A dedicated LLM call to interpret a tool's output and update the knowledge scratchpad.
610
+ """
611
+ synthesis_prompt = (
612
+ "You are a data analyst assistant. Your sole job is to interpret the output of a tool and integrate it into the existing research summary (knowledge scratchpad).\n\n"
613
+ "--- PREVIOUS KNOWLEDGE SCRATCHPAD ---\n"
614
+ f"{previous_scratchpad}\n\n"
615
+ "--- ACTION JUST TAKEN ---\n"
616
+ f"Tool Called: `{tool_name}`\n"
617
+ f"Parameters: {json.dumps(tool_params)}\n\n"
618
+ "--- RAW TOOL OUTPUT ---\n"
619
+ f"```json\n{json.dumps(tool_result, indent=2)}\n```\n\n"
620
+ "--- YOUR TASK ---\n"
621
+ "Read the 'RAW TOOL OUTPUT' and explain what it means in plain language. Then, integrate this new information with the 'PREVIOUS KNOWLEDGE SCRATCHPAD' to create a new, complete, and self-contained summary.\n"
622
+ "Your output should be ONLY the text of the new scratchpad, with no extra commentary or formatting.\n\n"
623
+ "--- NEW KNOWLEDGE SCRATCHPAD ---\n"
624
+ )
625
+ new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
626
+ return self.remove_thinking_blocks(new_scratchpad_text).strip()
627
+
628
+ def _build_final_decision_prompt(
629
+ self,
630
+ formatted_tools_list: str,
631
+ formatted_conversation_history: str,
632
+ current_plan: str,
633
+ knowledge_scratchpad: str,
634
+ agent_work_history_str: str,
635
+ ctx_size: Optional[int],
636
+ ) -> str:
637
+ """
638
+ Builds the decision prompt with explicit state-checking instructions to prevent loops.
639
+ """
640
+ final_agent_history = agent_work_history_str
641
+
642
+ if ctx_size:
643
+ get_token_count = len
644
+ static_parts_text = (
645
+ "You are a task-oriented AI assistant. Your goal is to execute a plan step-by-step without repeating work.\n\n"
646
+ "--- AVAILABLE TOOLS ---\n"
647
+ f"{formatted_tools_list}\n\n"
648
+ "--- CONVERSATION HISTORY ---\n"
649
+ f"{formatted_conversation_history}\n\n"
650
+ "--- CUMULATIVE KNOWLEDGE (What you know so far) ---\n"
651
+ f"{knowledge_scratchpad}\n\n"
652
+ "--- THE OVERALL PLAN ---\n"
653
+ f"{current_plan}\n\n"
654
+ "--- ACTIONS TAKEN THIS TURN ---\n"
655
+ "\n\n" # Empty history for size calculation
656
+ "--- YOUR TASK: STATE-DRIVEN EXECUTION ---\n"
657
+ "1. **Identify the next step:** Look at 'THE OVERALL PLAN' and identify the very next incomplete step.\n"
658
+ "2. **Check your knowledge:** Look at the 'CUMULATIVE KNOWLEDGE'. Have you already performed this step and recorded the result? For example, if the step is 'search for papers', check if the search results are already in the knowledge base.\n"
659
+ "3. **Decide your action:**\n"
660
+ " - **If the step is NOT DONE:** Your action is `call_tool` to execute it.\n"
661
+ " - **If the step IS ALREADY DONE:** Your job is to update the plan by removing the completed step. Then, re-evaluate from step 1 with the *new, shorter plan*.\n"
662
+ " - **If ALL steps are done:** Your action is `final_answer`.\n"
663
+ " - **If you are blocked:** Your action is `clarify`.\n\n"
664
+ "--- OUTPUT FORMAT ---\n"
665
+ "Respond with a single JSON object inside a ```json markdown tag.\n"
666
+ "```json\n{\n"
667
+ ' "thought": "My explicit reasoning. First, I will state the next step from the plan. Second, I will check the cumulative knowledge to see if this step is already complete. Third, I will state my conclusion and chosen action based on that comparison.",\n'
668
+ ' "updated_plan": "The new, remaining plan. It is CRITICAL that you remove any step that you have confirmed is complete in your thought process.",\n'
669
+ ' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
670
+ ' "action_details": {\n'
671
+ ' "tool_name": "(Required if action is \'call_tool\') The tool for the CURRENT incomplete step.",\n'
672
+ ' "tool_params": {},\n'
673
+ ' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
674
+ " }\n}\n```"
675
+ )
676
+ fixed_parts_size = get_token_count(static_parts_text)
677
+ available_space_for_history = ctx_size - fixed_parts_size - 100
678
+ if get_token_count(agent_work_history_str) > available_space_for_history:
679
+ if available_space_for_history > 0:
680
+ truncation_point = len(agent_work_history_str) - available_space_for_history
681
+ final_agent_history = ("[...history truncated due to context size...]\n" + agent_work_history_str[truncation_point:])
682
+ ASCIIColors.warning("Agent history was truncated to fit the context window.")
683
+ else:
684
+ final_agent_history = "[...history truncated due to context size...]"
685
+
686
+ return (
687
+ "You are a task-oriented AI assistant. Your goal is to execute a plan step-by-step without repeating work.\n\n"
688
+ "--- AVAILABLE TOOLS ---\n"
689
+ f"{formatted_tools_list}\n\n"
690
+ "--- CONVERSATION HISTORY ---\n"
691
+ f"{formatted_conversation_history}\n\n"
692
+ "--- CUMULATIVE KNOWLEDGE (What you know so far) ---\n"
693
+ f"{knowledge_scratchpad}\n\n"
694
+ "--- THE OVERALL PLAN ---\n"
695
+ f"{current_plan}\n\n"
696
+ "--- ACTIONS TAKEN THIS TURN ---\n"
697
+ f"{final_agent_history}\n\n"
698
+ "--- YOUR TASK: STATE-DRIVEN EXECUTION ---\n"
699
+ "1. **Identify the next step:** Look at 'THE OVERALL PLAN' and identify the very next incomplete step.\n"
700
+ "2. **Check your knowledge:** Look at the 'CUMULATIVE KNOWLEDGE'. Have you already performed this step and recorded the result? For example, if the step is 'search for papers', check if the search results are already in the knowledge base.\n"
701
+ "3. **Decide your action:**\n"
702
+ " - **If the step is NOT DONE:** Your action is `call_tool` to execute it.\n"
703
+ " - **If the step IS ALREADY DONE:** Your job is to update the plan by removing the completed step. Then, re-evaluate from step 1 with the *new, shorter plan*.\n"
704
+ " - **If ALL steps are done:** Your action is `final_answer`.\n"
705
+ " - **If you are blocked:** Your action is `clarify`.\n\n"
706
+ "--- OUTPUT FORMAT ---\n"
707
+ "Respond with a single JSON object inside a ```json markdown tag.\n"
708
+ "```json\n"
709
+ "{\n"
710
+ ' "thought": "My explicit reasoning. First, I will state the next step from the plan. Second, I will check the cumulative knowledge to see if this step is already complete. Third, I will state my conclusion and chosen action based on that comparison.",\n'
711
+ ' "updated_plan": "The new, remaining plan. It is CRITICAL that you remove any step that you have confirmed is complete in your thought process.",\n'
712
+ ' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
713
+ ' "action_details": {\n'
714
+ ' "tool_name": "(Required if action is \'call_tool\') The tool for the CURRENT incomplete step.",\n'
715
+ ' "tool_params": {},\n'
716
+ ' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
717
+ " }\n"
718
+ "}\n"
719
+ "```"
720
+ )
721
+
600
722
 
601
- # --- Function Calling with MCP ---
602
723
  def generate_with_mcp(
603
724
  self,
604
725
  prompt: str,
605
- discussion_history: Optional[List[Dict[str, str]]] = None, # e.g. [{"role":"user", "content":"..."}, {"role":"assistant", "content":"..."}]
726
+ system_prompt:str = None,
727
+ objective_extraction_system_prompt="Build a plan",
606
728
  images: Optional[List[str]] = None,
607
- tools: Optional[List[Dict[str, Any]]] = None, # List of MCP tool definitions
729
+ tools: Optional[List[Dict[str, Any]]] = None,
608
730
  max_tool_calls: int = 5,
609
- max_llm_iterations: int = 10, # Safety break for LLM deciding to call tools repeatedly
610
- tool_call_decision_temperature: float = 0.1, # Lower temp for more deterministic decision making
611
- final_answer_temperature: float = None, # Use instance default if None
612
- streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
613
- interactive_tool_execution: bool = False, # If true, prompts user before executing a tool
731
+ max_llm_iterations: int = 10,
732
+ ctx_size: Optional[int] = None,
733
+ max_json_retries: int = 1,
734
+ tool_call_decision_temperature: float = 0.0,
735
+ final_answer_temperature: float = None,
736
+ streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
614
737
  **llm_generation_kwargs
615
738
  ) -> Dict[str, Any]:
616
- """
617
- Generates a response that may involve calling one or more tools via MCP.
618
-
619
- Args:
620
- prompt (str): The user's initial prompt.
621
- discussion_history (Optional[List[Dict[str, str]]]): Previous turns of conversation.
622
- images (Optional[List[str]]): Images provided with the current user prompt.
623
- tools (Optional[List[Dict[str, Any]]]): A list of MCP tool definitions available for this call.
624
- If None, tools will be discovered from the MCP binding.
625
- max_tool_calls (int): Maximum number of distinct tool calls allowed in one interaction turn.
626
- max_llm_iterations (int): Maximum number of times the LLM can decide to call a tool
627
- before being forced to generate a final answer.
628
- tool_call_decision_temperature (float): Temperature for LLM when deciding on tool calls.
629
- final_answer_temperature (float): Temperature for LLM when generating the final answer.
630
- streaming_callback (Optional[Callable]): Callback for streaming LLM responses (tool decisions/final answer).
631
- Signature: (chunk_str, msg_type, metadata_dict, history_list_of_dicts_for_this_turn) -> bool
632
- interactive_tool_execution (bool): If True, ask user for confirmation before executing each tool.
739
+ if not self.binding or not self.mcp:
740
+ return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
633
741
 
634
- Returns:
635
- Dict[str, Any]: A dictionary containing:
636
- - "final_answer" (str): The LLM's final textual answer.
637
- - "tool_calls" (List[Dict]): A list of tools called, their params, and results.
638
- - "error" (Optional[str]): Error message if something went wrong.
639
- """
640
- if not self.binding:
641
- return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
642
- if not self.mcp:
643
- return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
742
+ turn_history: List[Dict[str, Any]] = []
743
+ conversation_context = prompt
644
744
 
645
- turn_history: List[Dict[str, Any]] = [] # Tracks this specific turn's interactions (LLM thoughts, tool calls, tool results)
646
-
647
- # 1. Discover tools if not provided
648
745
  if tools is None:
649
746
  try:
650
747
  tools = self.mcp.discover_tools(force_refresh=True)
651
- if not tools:
652
- ASCIIColors.warning("No MCP tools discovered by the binding.")
748
+ if not tools: ASCIIColors.warning("No MCP tools discovered.")
653
749
  except Exception as e_disc:
654
750
  return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
655
-
656
- if not tools: # If still no tools after discovery attempt
657
- ASCIIColors.info("No tools available for function calling. Generating direct response.")
658
- final_answer = self.remove_thinking_blocks(self.generate_text(
659
- prompt=prompt,
660
- system_prompt= (discussion_history[0]['content'] if discussion_history and discussion_history[0]['role'] == 'system' else "") + "\nYou are a helpful assistant.", # Basic system prompt
661
- images=images,
662
- stream=streaming_callback is not None, # stream if callback is provided
663
- streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, None, turn_history) if streaming_callback else None, # Adapt callback
664
- temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
665
- **(llm_generation_kwargs or {})
666
- ))
667
- if isinstance(final_answer, dict) and "error" in final_answer: # Handle generation error
668
- return {"final_answer": "", "tool_calls": [], "error": final_answer["error"]}
669
- return {"final_answer": final_answer, "tool_calls": [], "error": None}
670
751
 
752
+ if not tools:
753
+ final_answer_text = self.generate_text(prompt=prompt, system_prompt=system_prompt, stream=streaming_callback is not None, streaming_callback=streaming_callback)
754
+ return {"final_answer": self.remove_thinking_blocks(final_answer_text), "tool_calls": [], "error": None}
671
755
 
672
- formatted_tools_list = "\n".join([
673
- f"- Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
674
- for t in tools
675
- ])
676
-
677
- current_conversation: List[Dict[str, str]] = []
678
- if discussion_history:
679
- current_conversation.extend(discussion_history)
680
- current_conversation.append({"role": "user", "content": prompt})
681
- if images: # Add image representations to the last user message if supported by LLM and chat format
682
- # This part is highly dependent on how the specific LLM binding handles images in chat.
683
- # For simplicity, we'll assume if images are passed, the underlying generate_text handles it.
684
- # A more robust solution would modify current_conversation[-1]['content'] structure.
685
- ASCIIColors.info("Images provided. Ensure LLM binding's generate_text handles them with chat history.")
686
-
756
+ knowledge_scratchpad = "No information gathered yet."
757
+ agent_work_history = []
758
+ formatted_tools_list = "\n".join([f"- Tool: {t.get('name')}\n Description: {t.get('description')}\n Schema: {json.dumps(t.get('input_schema'))}" for t in tools])
759
+
760
+ if streaming_callback:
761
+ streaming_callback("Building/Revising plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "plan_extraction"}, turn_history)
762
+
763
+ obj_prompt = (
764
+ "You are an Intelligent Workflow Planner. Your mission is to create the most efficient plan possible by analyzing the user's request within the context of the full conversation.\n\n"
765
+ "Your Guiding Principle: **Always choose the path of least resistance.**\n\n"
766
+ "**Your Logical Process:**\n"
767
+ "1. **Analyze the Entire Conversation:** Understand the user's ultimate goal based on all interaction so far.\n"
768
+ "2. **Check for a Single-Step Solution:** Scrutinize the available tools. Can a single tool call directly achieve the user's current goal? \n"
769
+ "3. **Formulate a Plan:** Based on your analysis, create a concise, numbered list of steps to achieve the goal. If the goal is simple, this may be only one step. If it is complex or multi-turn, it may be several steps.\n\n"
770
+ "**CRITICAL RULES:**\n"
771
+ "* **MANDATORY: NEVER add steps the user did not ask for.** Do not embellish or add 'nice-to-have' features.\n"
772
+ "* **Focus on the Goal:** Your plan should directly address the user's request as it stands now in the conversation.\n\n"
773
+ "---\n"
774
+ "**Available Tools:**\n"
775
+ f"{formatted_tools_list}\n\n"
776
+ "**Full Conversation History:**\n"
777
+ f'"{conversation_context}"'
778
+ )
779
+ initial_plan_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
780
+ current_plan = self.remove_thinking_blocks(initial_plan_gen).strip()
687
781
 
782
+ if streaming_callback:
783
+ streaming_callback(f"Current plan:\n{current_plan}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "plan_extraction"}, turn_history)
784
+ turn_history.append({"type": "initial_plan", "content": current_plan})
785
+
688
786
  tool_calls_made_this_turn = []
689
787
  llm_iterations = 0
690
788
 
691
789
  while llm_iterations < max_llm_iterations:
692
790
  llm_iterations += 1
791
+ if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
693
792
 
694
- # 2. Construct prompt for LLM to decide on tool call or direct answer
695
- # We need to convert current_conversation into a single string prompt for `generate_code`
696
- # or adapt `generate_code` to take a message list if underlying LLM supports chat for structured output.
697
- # For now, let's assume `generate_code` takes a flat prompt.
698
-
699
- # Create a string representation of the conversation history
700
- history_str = ""
701
- for msg in current_conversation:
702
- role_prefix = self.user_custom_header(msg["role"]) if msg["role"]=="user" else self.ai_custom_header(msg["role"]) if msg["role"]=="assistant" else self.system_custom_header(msg["role"]) if msg["role"]=="system" else "!@>unknown:"
703
- history_str += f"{role_prefix}{msg['content']}\n"
704
-
705
- # Add tool execution results from previous iterations in this turn to the history string
706
- for tc_info in tool_calls_made_this_turn:
707
- if tc_info.get("result"): # Only add if there's a result (successful or error)
708
- history_str += f"{self.ai_full_header}(Executed tool '{tc_info['name']}' with params {tc_info['params']}. Result: {json.dumps(tc_info['result'])})\n"
709
-
710
-
711
- decision_prompt_template = f"""You are an AI assistant that can use tools to answer user requests.
712
- Available tools:
713
- {formatted_tools_list}
714
-
715
- Current conversation:
716
- {history_str}
717
-
718
- Based on the available tools and the current conversation, decide the next step.
719
- Respond with a JSON object containing ONE of the following structures:
720
- 1. If you need to use a tool:
721
- {{"action": "call_tool", "tool_name": "<name_of_tool_to_call>", "tool_params": {{<parameters_for_tool_as_json_object>}}}}
722
- 2. If you can answer directly without using a tool OR if you have sufficient information from previous tool calls:
723
- {{"action": "final_answer"}}
724
- 3. If the user's request is unclear or you need more information before deciding:
725
- {{"action": "clarify", "clarification_request": "<your_question_to_the_user>"}}
726
- """ # No {self.ai_full_header} here, generate_code will get raw JSON
727
-
728
- if streaming_callback:
729
- streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
730
-
731
- # Use generate_code to get structured JSON output from LLM
732
- # Note: generate_code itself uses generate_text. We are asking for JSON here.
733
- raw_llm_decision_json = self.generate_text(
734
- prompt=decision_prompt_template, # This is the full prompt for the LLM
735
- n_predict=512, # Reasonable size for decision JSON
736
- temperature=tool_call_decision_temperature,
737
- images=images
738
- # `images` are part of the history_str if relevant to the binding
739
- # streaming_callback=None, # Decisions are usually not streamed chunk by chunk
793
+ formatted_agent_history = "No actions taken yet in this turn."
794
+ if agent_work_history:
795
+ history_parts = [ f"### Step {i+1}:\n**Thought:** {entry['thought']}\n**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n**Observation (Tool Output):**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```" for i, entry in enumerate(agent_work_history)]
796
+ formatted_agent_history = "\n\n".join(history_parts)
797
+
798
+ llm_decision = None
799
+ current_decision_prompt = self._build_final_decision_prompt(
800
+ formatted_tools_list=formatted_tools_list, formatted_conversation_history=conversation_context,
801
+ current_plan=current_plan, knowledge_scratchpad=knowledge_scratchpad,
802
+ agent_work_history_str=formatted_agent_history, ctx_size=ctx_size
740
803
  )
741
- if streaming_callback:
742
- streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
743
-
744
804
 
745
- if not raw_llm_decision_json:
746
- ASCIIColors.error("LLM failed to provide a decision JSON.")
747
- turn_history.append({"type": "error", "content": "LLM failed to provide a decision."})
748
- return {"final_answer": "I'm sorry, I encountered an issue trying to process your request.", "tool_calls": tool_calls_made_this_turn, "error": "LLM decision JSON was empty."}
749
-
750
- processed_raw_json = raw_llm_decision_json.strip() # Strip whitespace first
751
- try:
752
- llm_decision = json.loads(processed_raw_json)
753
- turn_history.append({"type": "llm_decision", "content": llm_decision})
754
- except json.JSONDecodeError:
755
- ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}")
805
+ for i in range(max_json_retries + 1):
806
+ raw_llm_decision_json = self.generate_text(prompt=current_decision_prompt, n_predict=2048, temperature=tool_call_decision_temperature)
756
807
  try:
757
- decoder = json.JSONDecoder()
758
- # Try to decode the first JSON object from the (stripped) string
759
- llm_decision, end_index = decoder.raw_decode(processed_raw_json)
760
- turn_history.append({"type": "llm_decision_extracted", "content": llm_decision, "raw_trimmed": processed_raw_json[:end_index]})
761
-
762
- remaining_text = processed_raw_json[end_index:].strip()
763
- if remaining_text:
764
- ASCIIColors.warning(f"LLM output contained additional text after the first JSON object: '{remaining_text}'. Processing only the first object.")
765
- turn_history.append({"type": "llm_extra_output_ignored", "content": remaining_text})
766
- except json.JSONDecodeError as e_inner:
767
- ASCIIColors.error(f"Failed to parse LLM decision JSON even after attempting to extract first object: {raw_llm_decision_json}. Error: {e_inner}")
768
- turn_history.append({"type": "error", "content": "Failed to parse LLM decision JSON.", "raw_json": raw_llm_decision_json, "error_details": str(e_inner)})
769
- # Provide a generic error message, as the LLM's output was malformed.
770
- # Adding the raw output or a snippet to the conversation history might help the LLM recover or inform the user.
771
- current_conversation.append({
772
- "role": "assistant",
773
- "content": "(I encountered an internal error trying to understand my next step. I will try to answer directly based on what I have so far.)"
774
- })
775
- break # Break to generate final answer with current info
776
-
777
- if llm_decision is None: # If parsing failed and couldn't recover
778
- return {"final_answer": "I'm sorry, I had trouble understanding the next step due to a formatting issue.", "tool_calls": tool_calls_made_this_turn, "error": "Invalid JSON from LLM for decision."}
779
-
808
+ llm_decision = robust_json_parser(raw_llm_decision_json)
809
+ if "action" not in llm_decision or "action_details" not in llm_decision or "updated_plan" not in llm_decision:
810
+ raise KeyError("The JSON is missing required keys: 'action', 'action_details', or 'updated_plan'.")
811
+ break
812
+ except (json.JSONDecodeError, AttributeError, KeyError) as e:
813
+ error_message = f"JSON parsing failed (Attempt {i+1}/{max_json_retries+1}). Error: {e}"
814
+ ASCIIColors.warning(error_message)
815
+ if streaming_callback: streaming_callback(error_message, MSG_TYPE.MSG_TYPE_WARNING, None, turn_history)
816
+ turn_history.append({"type": "error", "content": f"Invalid JSON response: {raw_llm_decision_json}"})
817
+ if i >= max_json_retries:
818
+ ASCIIColors.error("Max JSON retries reached. Aborting agent loop.")
819
+ llm_decision = None
820
+ break
821
+ current_decision_prompt = (
822
+ "You previously failed to generate a valid JSON object. Review the error and your last output, then try again, adhering strictly to the required schema.\n\n"
823
+ "--- ERROR ---\n"
824
+ f"{str(e)}\n\n"
825
+ "--- YOUR PREVIOUS (INVALID) OUTPUT ---\n"
826
+ f"{raw_llm_decision_json}\n\n"
827
+ "--- REQUIRED SCHEMA REMINDER ---\n"
828
+ "Your response MUST be a single JSON object inside a ```json markdown tag. It must contain 'action', 'action_details', and 'updated_plan' keys.\n\n"
829
+ "Now, please re-generate the JSON response correctly."
830
+ )
831
+ if not llm_decision: break
832
+
833
+ turn_history.append({"type": "llm_decision", "content": llm_decision})
834
+ current_plan = llm_decision.get("updated_plan", current_plan)
780
835
  action = llm_decision.get("action")
836
+ action_details = llm_decision.get("action_details", {})
837
+ if streaming_callback: streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
781
838
 
782
839
  if action == "call_tool":
783
840
  if len(tool_calls_made_this_turn) >= max_tool_calls:
784
- ASCIIColors.warning("Maximum tool calls reached for this turn. Forcing final answer.")
785
- current_conversation.append({"role":"assistant", "content":"(Max tool calls reached. I will now try to formulate an answer based on available information.)"})
786
- break # Exit loop to generate final answer
787
-
788
- tool_name = llm_decision.get("tool_name")
789
- tool_params = llm_decision.get("tool_params", {})
790
-
791
- if not tool_name:
792
- ASCIIColors.warning("LLM decided to call a tool but didn't specify tool_name.")
793
- current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
794
- break # Or ask LLM to try again without this faulty decision in history
795
-
796
- tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
797
- turn_history.append(tool_call_info)
798
- if streaming_callback:
799
- streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
800
- streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
801
-
802
- # Interactive execution if enabled
803
- if interactive_tool_execution:
804
- try:
805
- user_confirmation = input(f"AI wants to execute tool '{tool_name}' with params {tool_params}. Allow? (yes/no/details): ").lower()
806
- if user_confirmation == "details":
807
- tool_def_for_details = next((t for t in tools if t.get("name") == tool_name), None)
808
- print(f"Tool details: {json.dumps(tool_def_for_details, indent=2)}")
809
- user_confirmation = input(f"Allow execution of '{tool_name}'? (yes/no): ").lower()
810
-
811
- if user_confirmation != "yes":
812
- ASCIIColors.info("Tool execution cancelled by user.")
813
- tool_result = {"error": "Tool execution cancelled by user."}
814
- # Add this info to conversation for LLM
815
- current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' execution was cancelled by the user. What should I do next?)"})
816
- tool_call_info["result"] = tool_result # Record cancellation
817
- tool_calls_made_this_turn.append(tool_call_info)
818
- continue # Back to LLM for next decision
819
- except Exception as e_input: # Catch issues with input() e.g. in non-interactive env
820
- ASCIIColors.warning(f"Error during interactive confirmation: {e_input}. Proceeding without confirmation.")
821
-
822
-
823
- if streaming_callback:
824
- streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
841
+ ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
842
+ break
843
+ tool_name = action_details.get("tool_name")
844
+ tool_params = action_details.get("tool_params", {})
845
+ if not tool_name or not isinstance(tool_params, dict):
846
+ ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
847
+ break
825
848
 
849
+ if streaming_callback: streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
826
850
  tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
827
-
828
- tool_call_info["result"] = tool_result # Add result to this call's info
829
- tool_calls_made_this_turn.append(tool_call_info) # Log the completed call
830
851
  if streaming_callback:
831
- streaming_callback(f"", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
852
+ streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
853
+ streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
832
854
 
855
+ if streaming_callback: streaming_callback("Synthesizing new knowledge...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"synthesis_step_{llm_iterations}"}, turn_history)
856
+ new_scratchpad = self._synthesize_knowledge(previous_scratchpad=knowledge_scratchpad, tool_name=tool_name, tool_params=tool_params, tool_result=tool_result)
857
+ knowledge_scratchpad = new_scratchpad
833
858
  if streaming_callback:
834
- streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
835
-
836
- # Add tool execution result to conversation for the LLM
837
- # The format of this message can influence how the LLM uses the tool output.
838
- # current_conversation.append({"role": "tool_result", "tool_name": tool_name, "content": json.dumps(tool_result)}) # More structured
839
- current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' executed. Result: {json.dumps(tool_result)})"})
840
-
841
-
859
+ streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"synthesis_step_{llm_iterations}"}, turn_history)
860
+ streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id": "scratchpad_update"}, turn_history)
861
+
862
+ work_entry = { "thought": llm_decision.get("thought", "N/A"), "tool_name": tool_name, "tool_params": tool_params, "tool_result": tool_result, "synthesized_knowledge": knowledge_scratchpad }
863
+ agent_work_history.append(work_entry)
864
+ tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
865
+
842
866
  elif action == "clarify":
843
- clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
844
- if streaming_callback:
845
- streaming_callback(clarification_request, MSG_TYPE.MSG_TYPE_FULL, {"type": "clarification_request"}, turn_history)
846
- turn_history.append({"type":"clarification_request_sent", "content": clarification_request})
847
- return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None}
848
-
867
+ clarification_request = action_details.get("clarification_request", "I need more information to proceed. Could you please clarify?")
868
+ return { "final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True }
869
+
849
870
  elif action == "final_answer":
850
871
  ASCIIColors.info("LLM decided to formulate a final answer.")
851
- current_conversation.append({"role":"assistant", "content":"(I will now formulate the final answer based on the information gathered.)"}) # Inform LLM's "thought process"
852
- break # Exit loop to generate final answer
872
+ break
853
873
 
854
874
  else:
855
- ASCIIColors.warning(f"LLM returned unknown action: {action}")
856
- current_conversation.append({"role":"assistant", "content":f"(Received an unexpected decision: {action}. I will try to answer directly.)"})
857
- break # Exit loop
858
-
859
- # Safety break if too many iterations without reaching final answer or max_tool_calls
860
- if llm_iterations >= max_llm_iterations:
861
- ASCIIColors.warning("Max LLM iterations reached. Forcing final answer.")
862
- current_conversation.append({"role":"assistant", "content":"(Max iterations reached. I will now try to formulate an answer.)"})
875
+ ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
863
876
  break
877
+
878
+ if streaming_callback:
879
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
880
+
881
+ if streaming_callback:
882
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
883
+ if streaming_callback:
884
+ streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
864
885
 
865
- # 3. Generate final answer if LLM decided to, or if loop broke
866
- if streaming_callback:
867
- streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_history)
868
-
869
- # Construct the final prompt string for generate_text from current_conversation
870
- final_prompt_str = ""
871
- final_system_prompt = ""
872
-
873
- # Consolidate system messages if any
874
- interim_history_for_final_answer = []
875
- for msg in current_conversation:
876
- if msg["role"] == "system":
877
- final_system_prompt += msg["content"] + "\n"
878
- else:
879
- interim_history_for_final_answer.append(msg)
880
-
881
- if not any(msg['role'] == 'user' for msg in interim_history_for_final_answer): # Ensure there's a user turn if only system + tool calls
882
- interim_history_for_final_answer.append({'role':'user', 'content': prompt}) # Add original prompt if lost
883
-
884
-
885
- # The generate_text method needs a single prompt and an optional system_prompt.
886
- # We need to format the interim_history_for_final_answer into a single prompt string,
887
- # or modify generate_text to accept a list of messages.
888
- # For now, flatten to string:
889
- current_prompt_for_final_answer = ""
890
- for i, msg in enumerate(interim_history_for_final_answer):
891
- role_prefix = self.user_custom_header(msg["role"]) if msg["role"]=="user" else self.ai_custom_header(msg["role"]) if msg["role"]=="assistant" else f"!@>{msg['role']}:"
892
- current_prompt_for_final_answer += f"{role_prefix}{msg['content']}"
893
- if i < len(interim_history_for_final_answer) -1 : # Add newline separator except for last
894
- current_prompt_for_final_answer += "\n"
895
- # Add AI header to prompt AI to speak
896
- current_prompt_for_final_answer += f"\n{self.ai_full_header}"
897
-
898
-
899
- final_answer_text = self.generate_text(
900
- prompt=current_prompt_for_final_answer, # Pass the conversation history as the prompt
901
- system_prompt=final_system_prompt.strip(),
902
- images=images if not tool_calls_made_this_turn else None, # Only pass initial images if no tool calls happened (context might be lost)
903
- stream=streaming_callback is not None,
904
- streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, {"type":"final_answer_chunk"}, turn_history) if streaming_callback else None,
905
- temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
906
- **(llm_generation_kwargs or {})
886
+ final_answer_prompt = (
887
+ "You are an AI assistant tasked with providing a final, comprehensive answer to the user based on the research performed.\n\n"
888
+ "--- FULL CONVERSATION CONTEXT ---\n"
889
+ f"{conversation_context}\n\n"
890
+ "--- SUMMARY OF FINDINGS (Your Knowledge Scratchpad) ---\n"
891
+ f"{knowledge_scratchpad}\n\n"
892
+ "--- INSTRUCTIONS ---\n"
893
+ "- Synthesize a clear and complete answer for the user based ONLY on the information in the 'Summary of Findings'.\n"
894
+ "- Address the user directly and answer their latest query, considering the full conversation.\n"
895
+ "- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
896
+ "- Format your response clearly using markdown where appropriate.\n"
907
897
  )
898
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
899
+
900
+ if streaming_callback:
901
+ streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
908
902
 
909
- if streaming_callback:
910
- streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_history)
911
-
912
- if isinstance(final_answer_text, dict) and "error" in final_answer_text: # Handle generation error
913
- turn_history.append({"type":"error", "content":f"LLM failed to generate final answer: {final_answer_text['error']}"})
914
- return {"final_answer": "", "tool_calls": tool_calls_made_this_turn, "error": final_answer_text["error"]}
903
+ final_answer = self.remove_thinking_blocks(final_answer_text)
904
+ turn_history.append({"type":"final_answer_generated", "content": final_answer})
905
+
906
+ return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
915
907
 
916
- turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
917
- return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
918
908
 
919
909
  def generate_text_with_rag(
920
910
  self,
921
911
  prompt: str,
922
912
  rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
913
+ system_prompt: str = "",
914
+ objective_extraction_system_prompt="Extract objectives",
923
915
  rag_query_text: Optional[str] = None,
924
916
  rag_vectorizer_name: Optional[str] = None,
925
917
  rag_top_k: int = 5,
926
918
  rag_min_similarity_percent: float = 70.0,
927
- max_rag_hops: int = 0,
919
+ max_rag_hops: int = 3,
928
920
  images: Optional[List[str]] = None,
929
- system_prompt: str = "",
930
921
  n_predict: Optional[int] = None,
931
922
  stream: Optional[bool] = None,
932
923
  temperature: Optional[float] = None,
@@ -943,13 +934,11 @@ Respond with a JSON object containing ONE of the following structures:
943
934
  **llm_generation_kwargs
944
935
  ) -> Dict[str, Any]:
945
936
  """
946
- Enhanced RAG with optional initial objective extraction and automatic intermediate summaries
947
- when context grows beyond ctx_size or self.default_ctx_size.
937
+ Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
948
938
  """
949
939
  if not self.binding:
950
940
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
951
941
 
952
- # Determine effective context size limit
953
942
  effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
954
943
 
955
944
  turn_rag_history_for_callback: List[Dict[str, Any]] = []
@@ -957,183 +946,251 @@ Respond with a JSON object containing ONE of the following structures:
957
946
  all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
958
947
 
959
948
  original_user_prompt = prompt
960
- objectives_text = ""
961
- # 0. Optional Objectives Extraction Step
949
+
950
+ knowledge_scratchpad = "No information gathered yet."
951
+ current_objectives = ""
952
+
962
953
  if extract_objectives:
963
954
  if streaming_callback:
964
- streaming_callback("Extracting and structuring objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
955
+ streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
956
+
965
957
  obj_prompt = (
966
- "You are an expert analyst. "
967
- "Your task is to extract and structure the key objectives from the user's request below. "
968
- "Output a bullet list of objectives only.\n\n"
958
+ "You are an expert analyst. Your task is to extract and structure the key research objectives from the user's request below. "
959
+ "These objectives will guide a research process. Frame them as questions or tasks. "
960
+ "Output a bulleted list of objectives only without a comment.\n\n"
969
961
  f"User request:\n\"{original_user_prompt}\""
970
962
  )
971
- obj_gen = self.generate_text(
972
- prompt=obj_prompt,
973
- system_prompt="Extract objectives",
974
- temperature=0.0,
975
- n_predict=200,
976
- stream=False
977
- )
978
- objectives_text = self.remove_thinking_blocks(obj_gen).strip()
963
+ initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
964
+ current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
965
+
979
966
  if streaming_callback:
980
- streaming_callback(f"Objectives: {objectives_text}", MSG_TYPE.MSG_TYPE_STEP, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
967
+ streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
968
+ else:
969
+ current_objectives = f"Answer the user's request: '{original_user_prompt}'"
981
970
 
982
- if streaming_callback:
983
- streaming_callback(f"Objectives extracted:\n{objectives_text}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
971
+ if streaming_callback:
972
+ streaming_callback("Generating initial search query...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
973
+ if not rag_query_text:
974
+ initial_query_gen_prompt = f"""
975
+ You are a research assistant. Your task is to formulate the first search query for a vector database based on an initial user request and research objectives. The query should be concise and target the most crucial information needed to start.
984
976
 
985
- current_query_for_rag = rag_query_text or None
986
- previous_queries=[]
987
- # 1. RAG Hops
988
- for hop_count in range(max_rag_hops + 1):
989
- if streaming_callback:
990
- streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
991
- txt_previous_queries = f"Previous queries:\n"+'\n'.join(previous_queries)+"\n\n" if len(previous_queries)>0 else ""
992
- txt_informations = f"Information:\n"+'\n'.join([f"(from {chunk['document']}):{chunk['content']}" for _, chunk in all_unique_retrieved_chunks_map.items()]) if len(all_unique_retrieved_chunks_map)>0 else "This is the first request. No data received yet. Build a new query."
993
- txt_sp = (
994
- "Your objective is to analyze the provided chunks of information to determine "
995
- "whether they are sufficient to reach the objective. If not, formulate a refined and focused query "
996
- "that can retrieve more relevant information from a vector database. Ensure the query captures the semantic essence "
997
- "of what is missing, is contextually independent, and is optimized for vector-based similarity search. "
998
- "Do not repeat or rephrase earlier queries—always generate a new, meaningful atomic query targeting the current gap in knowledge."
999
- )
977
+ --- User's Request ---
978
+ {original_user_prompt}
979
+
980
+ --- Initial Research Objectives ---
981
+ {current_objectives}
1000
982
 
1001
- txt_formatting = """The output format must be in form of JSON placed inside a JSON markdown tag. Use the following schema:
983
+ --- INSTRUCTIONS ---
984
+ Generate a single, effective search query.
985
+
986
+ --- OUTPUT FORMAT ---
987
+ Provide your response as a single JSON object with one key, "query".
1002
988
  ```json
1003
- {
1004
- "decision": A boolean indicating your decision (true: more data is needed, false: the current data is sufficient),
1005
- "query": (str, optional, only if decision is true). A new, atomic query suitable for semantic search in a vector database.
1006
- It should capture the missing concept or insight in concise, context-rich language, avoiding reuse of earlier queries.
1007
- }
989
+ {{
990
+ "query": "Your generated search query here."
991
+ }}
1008
992
  ```
1009
993
  """
1010
- p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
1011
- response = self.generate_code(p,system_prompt=txt_sp)
1012
994
  try:
1013
- answer = json.loads(response)
1014
- decision = answer["decision"]
1015
- if not decision:
1016
- if streaming_callback:
1017
- streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1018
-
1019
- break
1020
- else:
1021
- current_query_for_rag = str(answer["query"])
1022
- except Exception as ex:
1023
- trace_exception(ex)
995
+ raw_initial_query_response = self.generate_code(initial_query_gen_prompt, system_prompt="You are a query generation expert.", temperature=0.0)
996
+ initial_plan = json.loads(raw_initial_query_response)
997
+ current_query_for_rag = initial_plan.get("query")
998
+ if not current_query_for_rag:
999
+ raise ValueError("LLM returned an empty initial query.")
1000
+ if streaming_callback:
1001
+ streaming_callback(f"Initial query generated:\n'{current_query_for_rag}'", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
1002
+ except Exception as e:
1003
+ trace_exception(e)
1004
+ current_query_for_rag = original_user_prompt
1005
+ if streaming_callback:
1006
+ streaming_callback(f"Failed to generate initial query, falling back to user prompt. Error: {e}", MSG_TYPE.MSG_TYPE_WARNING, {"id": "initial_query_failure"}, turn_rag_history_for_callback)
1007
+ else:
1008
+ current_query_for_rag=rag_query_text
1009
+
1010
+ previous_queries = []
1024
1011
 
1012
+ for hop_count in range(max_rag_hops):
1025
1013
  if streaming_callback:
1026
- streaming_callback(f"Query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query for hop {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1027
-
1028
- # Retrieve chunks
1014
+ streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
1015
+
1016
+ if streaming_callback:
1017
+ streaming_callback(f"Executing Query:\n{current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query_exec_{hop_count + 1}"}, turn_rag_history_for_callback)
1018
+
1029
1019
  try:
1030
- retrieved = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
1020
+ retrieved_chunks = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
1031
1021
  except Exception as e:
1032
1022
  trace_exception(e)
1033
1023
  return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
1034
1024
 
1035
1025
  hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
1036
1026
  previous_queries.append(current_query_for_rag)
1037
- new_unique = 0
1038
- documents = []
1039
- for chunk in retrieved:
1040
- doc = chunk.get("file_path", "Unknown")
1041
- content = str(chunk.get("chunk_text", ""))
1042
- sim = float(chunk.get("similarity_percent", 0.0))
1043
- detail = {"document": doc, "similarity": sim, "content": content,
1044
- "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
1045
- documents.append(doc)
1046
- hop_details["retrieved_chunks_details"].append(detail)
1047
- key = f"{doc}::{content[:100]}"
1048
- if key not in all_unique_retrieved_chunks_map:
1049
- all_unique_retrieved_chunks_map[key] = detail
1050
- new_unique += 1
1051
- hop_details["status"] = "Completed" if retrieved else "No chunks retrieved"
1052
- if hop_count > 0 and new_unique == 0:
1053
- hop_details["status"] = "No *new* unique chunks retrieved"
1027
+
1028
+ newly_retrieved_text = ""
1029
+ new_chunks_count = 0
1030
+ if retrieved_chunks:
1031
+ for chunk in retrieved_chunks:
1032
+ doc = chunk.get("file_path", "Unknown")
1033
+ content = str(chunk.get("chunk_text", ""))
1034
+ sim = float(chunk.get("similarity_percent", 0.0))
1035
+ detail = {"document": doc, "similarity": sim, "content": content, "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
1036
+ hop_details["retrieved_chunks_details"].append(detail)
1037
+
1038
+ key = f"{doc}::{content[:100]}"
1039
+ if key not in all_unique_retrieved_chunks_map:
1040
+ all_unique_retrieved_chunks_map[key] = detail
1041
+ newly_retrieved_text += f"--- Document: {doc} (Similarity: {sim:.1f}%)\n{content}\n---\n"
1042
+ new_chunks_count += 1
1043
+
1044
+ hop_details["status"] = f"Completed, found {len(retrieved_chunks)} chunks ({new_chunks_count} new)."
1054
1045
  rag_hops_details_list.append(hop_details)
1046
+
1055
1047
  if streaming_callback:
1056
- streaming_callback(f"Retreived {len(retrieved)} data chunks from {set(documents)}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retreival {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1048
+ streaming_callback(f"Retrieved {len(retrieved_chunks)} chunks ({new_chunks_count} new).", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retrieval_info_{hop_count + 1}"}, turn_rag_history_for_callback)
1049
+
1050
+ if new_chunks_count == 0 and hop_count > 0:
1051
+ if streaming_callback:
1052
+ streaming_callback("No new unique information found, stopping RAG hops.", MSG_TYPE.MSG_TYPE_INFO, {"id": "rag_stop_no_new_info"}, turn_rag_history_for_callback)
1053
+ break
1057
1054
 
1058
1055
  if streaming_callback:
1059
- streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1056
+ streaming_callback("Analyzing findings and refining plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{hop_count + 1}"}, turn_rag_history_for_callback)
1057
+
1058
+ planning_system_prompt = (
1059
+ "You are a strategic research agent via multiple hops. Your task is to analyze new information, update your "
1060
+ "understanding, refine your research objectives, and decide on the next best action."
1061
+ )
1062
+
1063
+ planning_prompt = f"""
1064
+ --- Original User Request ---
1065
+ {original_user_prompt}
1066
+
1067
+ --- Objectives you have formulated ---
1068
+ {current_objectives}
1069
+
1070
+ --- Existing Knowledge Scratchpad (Summary of previous findings) ---
1071
+ {knowledge_scratchpad}
1072
+
1073
+ --- Newly Retrieved Documents for this Hop ---
1074
+ {newly_retrieved_text if newly_retrieved_text else "No new documents were found with the last query."}
1075
+
1076
+ --- Previous Queries (for reference, do not repeat) ---
1077
+ - {"- ".join(previous_queries)}
1060
1078
 
1079
+ --- INSTRUCTIONS ---
1080
+ 1. **Analyze & Update Knowledge:** Read the 'Newly Retrieved Documents'. Summarize the most important new facts and insights into a few bullet points for the 'new_notes_for_scratchpad'.
1081
+ 2. **Refine Objectives:** Review the 'Current Research Objectives'. Do the new documents answer any objectives? Do they reveal that some objectives need to be changed or made more specific? Rewrite the complete, updated list of objectives.
1082
+ 3. **Decide & Plan Next Query:** Based on your updated objectives and knowledge, decide if you have enough information to form a final answer.
1083
+ - If YES, set `decision` to `false`.
1084
+ - If NO, set `decision` to `true` and formulate a new, focused `query` to address the most critical remaining gap in your knowledge. The query must be different from previous ones.
1061
1085
 
1062
- # 2. Prepare & Summarize Context
1063
- sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(),
1064
- key=lambda c: c["similarity"], reverse=True)
1086
+ --- OUTPUT FORMAT ---
1087
+ Provide your response as a single JSON object inside a JSON markdown tag. Use this exact schema:
1088
+ ```json
1089
+ {{
1090
+ "updated_objectives": "(string) A bulleted list of the new, refined objectives based on the latest information.",
1091
+ "new_notes_for_scratchpad": "(string) A concise summary in bullet points of key findings from the new documents.",
1092
+ "decision": "boolean (true if you need to query again, false if you are done).",
1093
+ "query": "(string, optional) The next query for the vector database if decision is true."
1094
+ }}
1095
+ ```
1096
+ """
1097
+ raw_planning_response = self.generate_code(planning_prompt, system_prompt=planning_system_prompt, temperature=0.0)
1098
+
1099
+ try:
1100
+ plan = robust_json_parser(raw_planning_response)
1101
+
1102
+ raw_notes = plan.get("new_notes_for_scratchpad")
1103
+ if isinstance(raw_notes, list):
1104
+ notes_from_hop = "\n".join(str(item) for item in raw_notes if item).strip()
1105
+ elif isinstance(raw_notes, str):
1106
+ notes_from_hop = raw_notes.strip()
1107
+ else:
1108
+ notes_from_hop = ""
1109
+
1110
+ if notes_from_hop:
1111
+ if knowledge_scratchpad == "No information gathered yet.":
1112
+ knowledge_scratchpad = f"Findings from Hop {hop_count + 1}:\n{notes_from_hop}"
1113
+ else:
1114
+ knowledge_scratchpad += f"\n\nFindings from Hop {hop_count + 1}:\n{notes_from_hop}"
1115
+
1116
+ raw_objectives = plan.get("updated_objectives")
1117
+ if isinstance(raw_objectives, list):
1118
+ current_objectives = "\n".join(str(item) for item in raw_objectives if item).strip()
1119
+ elif isinstance(raw_objectives, str) and raw_objectives.strip():
1120
+ current_objectives = raw_objectives.strip()
1121
+
1122
+ if streaming_callback:
1123
+ streaming_callback(f"Refined Objectives:\n{current_objectives}\n\nNew Learnings:\n{notes_from_hop}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"planning_output_{hop_count + 1}"}, turn_rag_history_for_callback)
1124
+
1125
+ if not plan.get("decision", False):
1126
+ if streaming_callback:
1127
+ streaming_callback("LLM decided it has enough information.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
1128
+ break
1129
+ else:
1130
+ next_query = plan.get("query")
1131
+ if not next_query:
1132
+ if streaming_callback:
1133
+ streaming_callback("LLM decided to continue but provided no query. Stopping.", MSG_TYPE.MSG_TYPE_WARNING, {"id": "rag_stop_no_query"}, turn_rag_history_for_callback)
1134
+ break
1135
+ current_query_for_rag = next_query
1136
+
1137
+ except Exception as ex:
1138
+ trace_exception(ex)
1139
+ if streaming_callback:
1140
+ streaming_callback(f"Error processing planning step: {ex}. Stopping RAG.", MSG_TYPE.MSG_TYPE_EXCEPTION, {"id": f"planning_error_{hop_count + 1}"}, turn_rag_history_for_callback)
1141
+ break
1142
+
1143
+ if streaming_callback:
1144
+ streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
1145
+
1146
+ sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(), key=lambda c: c["similarity"], reverse=True)
1065
1147
  context_lines = []
1066
1148
  total_chars = 0
1067
1149
  for c in sorted_chunks:
1068
- snippet = (
1069
- f"Source: {c['document']} (Sim: {c['similarity']:.1f}%, "
1070
- f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
1071
- f"{c['content']}\n---\n"
1072
- )
1073
- if total_chars + len(snippet) > max_rag_context_characters:
1074
- break
1150
+ snippet = (f"Source: {c['document']} (Sim: {c['similarity']:.1f}%)\n{c['content']}\n---\n")
1151
+ if total_chars + len(snippet) > max_rag_context_characters: break
1075
1152
  context_lines.append(snippet)
1076
1153
  total_chars += len(snippet)
1077
-
1078
1154
  accumulated_context = "".join(context_lines)
1079
1155
 
1080
- # If context exceeds our effective limit, summarize it
1081
1156
  if self.count_tokens(accumulated_context) > effective_ctx_size:
1082
- if streaming_callback:
1083
- streaming_callback("Context too large, performing intermediate summary...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
1084
- summary_prompt = (
1085
- "Summarize the following gathered context into a concise form "
1086
- "that preserves all key facts and sources needed to answer the user's request:\n\n"
1087
- f"{accumulated_context}"
1088
- )
1089
- summary = self.generate_text(
1090
- prompt=summary_prompt,
1091
- system_prompt="Intermediate summary",
1092
- temperature=0.0,
1093
- n_predict= n_predict or 512,
1094
- stream=False
1095
- )
1096
- accumulated_context = self.remove_thinking_blocks(summary).strip()
1097
- if streaming_callback:
1098
- streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
1157
+ pass
1158
+
1159
+ if streaming_callback:
1160
+ streaming_callback("Compiling final answer from all findings...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
1099
1161
 
1100
- # 3. Final Answer Generation
1101
- final_prompt = [
1102
- f"Original request: {original_user_prompt}"
1162
+ final_prompt_parts = [
1163
+ f"**User's Original Request:**\n{original_user_prompt}\n",
1164
+ f"**Final Research Objectives:**\n{current_objectives}\n",
1165
+ f"**Knowledge Scratchpad (Summary of Findings):**\n{knowledge_scratchpad}\n",
1103
1166
  ]
1104
- if objectives_text:
1105
- final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
1106
1167
  if accumulated_context:
1107
- final_prompt.append(
1108
- "\nBased on the gathered context:\n---\n"
1109
- f"{accumulated_context}\n---"
1168
+ final_prompt_parts.append(
1169
+ "**Supporting Raw Context from Retrieved Documents:**\n---\n"
1170
+ f"{accumulated_context}\n---\n"
1110
1171
  )
1111
1172
  else:
1112
- final_prompt.append("\n(No relevant context retrieved.)")
1113
- final_prompt.append(
1114
- "\nProvide a comprehensive answer using ONLY the above context. "
1115
- "If context is insufficient, state so clearly."
1173
+ final_prompt_parts.append("**Supporting Raw Context:**\n(No relevant documents were retrieved.)\n")
1174
+
1175
+ final_prompt_parts.append(
1176
+ "**Final Instruction:**\nSynthesize a comprehensive answer to the user's original request. "
1177
+ "Use the 'Knowledge Scratchpad' as your primary source of information and the 'Supporting Raw Context' for specific details and quotes. "
1178
+ "Adhere strictly to the information provided. If the information is insufficient to fully answer, state what is missing based on your 'Final Research Objectives'."
1116
1179
  )
1117
- final_prompt.append(self.ai_full_header)
1180
+ final_prompt_parts.append(self.ai_full_header)
1118
1181
 
1119
1182
  final_answer = self.generate_text(
1120
- prompt="\n".join(final_prompt),
1121
- images=images,
1122
- system_prompt=system_prompt,
1123
- n_predict=n_predict,
1124
- stream=stream,
1125
- temperature=temperature,
1126
- top_k=top_k,
1127
- top_p=top_p,
1128
- repeat_penalty=repeat_penalty,
1129
- repeat_last_n=repeat_last_n,
1130
- seed=seed,
1131
- n_threads=n_threads,
1132
- ctx_size=ctx_size,
1183
+ prompt="\n".join(final_prompt_parts),
1184
+ images=images, system_prompt=system_prompt, n_predict=n_predict, stream=stream,
1185
+ temperature=temperature, top_k=top_k, top_p=top_p, repeat_penalty=repeat_penalty,
1186
+ repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads, ctx_size=ctx_size,
1133
1187
  streaming_callback=streaming_callback if stream else None,
1134
1188
  **llm_generation_kwargs
1135
1189
  )
1136
1190
  answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
1191
+
1192
+ if streaming_callback:
1193
+ streaming_callback("Final answer generated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
1137
1194
 
1138
1195
  return {
1139
1196
  "final_answer": answer_text,
@@ -1141,8 +1198,304 @@ Respond with a JSON object containing ONE of the following structures:
1141
1198
  "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
1142
1199
  "error": None
1143
1200
  }
1201
+
1202
+ def generate_with_mcp_rag(
1203
+ self,
1204
+ prompt: str,
1205
+ rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
1206
+ system_prompt: str = None,
1207
+ objective_extraction_system_prompt="Extract objectives",
1208
+ images: Optional[List[str]] = None,
1209
+ tools: Optional[List[Dict[str, Any]]] = None,
1210
+ max_tool_calls: int = 10,
1211
+ max_llm_iterations: int = 15,
1212
+ tool_call_decision_temperature: float = 0.0,
1213
+ final_answer_temperature: float = None,
1214
+ streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
1215
+ build_plan: bool = True,
1216
+ rag_vectorizer_name: Optional[str] = None,
1217
+ rag_top_k: int = 5,
1218
+ rag_min_similarity_percent: float = 70.0,
1219
+ **llm_generation_kwargs
1220
+ ) -> Dict[str, Any]:
1221
+ """
1222
+ Generates a response using a stateful agent that can choose between calling standard
1223
+ MCP tools and querying a RAG database, all within a unified reasoning loop.
1224
+ """
1225
+ if not self.binding:
1226
+ return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
1227
+ if not self.mcp:
1228
+ return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
1229
+
1230
+ # --- Initialize Agent State ---
1231
+ turn_history: List[Dict[str, Any]] = []
1232
+ original_user_prompt = prompt
1233
+ knowledge_scratchpad = "No information gathered yet."
1234
+ current_objectives = ""
1235
+ agent_work_history = []
1236
+ tool_calls_made_this_turn = []
1237
+ llm_iterations = 0
1238
+
1239
+ # --- 1. Discover MCP Tools and Inject the RAG Tool ---
1240
+ if tools is None:
1241
+ try:
1242
+ mcp_tools = self.mcp.discover_tools(force_refresh=True)
1243
+ if not mcp_tools: ASCIIColors.warning("No MCP tools discovered.")
1244
+ except Exception as e_disc:
1245
+ return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
1246
+ else:
1247
+ mcp_tools = tools
1248
+
1249
+ # Define the RAG tool and add it to the list
1250
+ rag_tool_definition = {
1251
+ "name": "research::query_database",
1252
+ "description": (
1253
+ "Queries a vector database to find relevant text chunks based on a natural language query. "
1254
+ "Use this to gather information, answer questions, or find context for a task before using other tools."
1255
+ ),
1256
+ "input_schema": {
1257
+ "type": "object",
1258
+ "properties": {
1259
+ "query": {
1260
+ "type": "string",
1261
+ "description": "The natural language query to search for. Be specific to get the best results."
1262
+ }
1263
+ },
1264
+ "required": ["query"]
1265
+ }
1266
+ }
1267
+ available_tools = [rag_tool_definition] + mcp_tools
1268
+
1269
+ # --- 2. Optional Initial Objectives Extraction ---
1270
+ formatted_tools_list = "\n".join([
1271
+ f"- Full Tool Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
1272
+ for t in available_tools
1273
+ ])
1274
+ if build_plan:
1275
+ if streaming_callback:
1276
+ streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_history)
1277
+
1278
+ # The enhanced prompt is placed inside the original parenthesis format.
1279
+ # The f-strings for tool lists and user prompts are preserved.
1280
+
1281
+ obj_prompt = (
1282
+ "You are a hyper-efficient and logical project planner. Your sole purpose is to analyze the user's request and create a concise, numbered list of actionable steps to fulfill it.\n\n"
1283
+ "Your plan must be the most direct and minimal path to the user's goal.\n\n"
1284
+ "**Your Core Directives:**\n\n"
1285
+ "1. **Analyze the Request:** Break down the user's prompt into the essential, core tasks required.\n"
1286
+ "2. **Evaluate Tools with Extreme Scrutiny:** For each task, determine if a tool is **absolutely necessary**. Do not suggest a tool unless the task is impossible without it.\n"
1287
+ "3. **Prioritize Simplicity:** If the request can be answered directly without any tools (e.g., it's a simple question or requires a creative response), your entire plan should be a single step: \"1. Formulate a direct answer to the user's request.\"\n\n"
1288
+ "**CRITICAL RULES:**\n"
1289
+ "* **DO NOT** add any steps, objectives, or tool uses that were not explicitly required by the user.\n"
1290
+ "* **DO NOT** attempt to use a tool just because it is available. Most requests will not require any tools.\n"
1291
+ "* **DO NOT** add \"nice-to-have\" or \"extra\" tasks. Stick strictly to the request.\n\n"
1292
+ "Your final output must be a short, numbered list of steps. Do not call any tools in this planning phase.\n\n"
1293
+ "---\n"
1294
+ "**Available Tools:**\n"
1295
+ f"{formatted_tools_list}\n\n"
1296
+ "**User Request:**\n"
1297
+ f'"{original_user_prompt}"'
1298
+ )
1299
+ initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
1300
+ current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
1301
+
1302
+ if streaming_callback:
1303
+ streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_history)
1304
+ else:
1305
+ current_objectives = f"Fulfill the user's request: '{original_user_prompt}'"
1306
+
1307
+ turn_history.append({"type": "initial_objectives", "content": current_objectives})
1308
+
1144
1309
 
1145
1310
 
1311
+ # --- 3. Main Agent Loop ---
1312
+ while llm_iterations < max_llm_iterations:
1313
+ llm_iterations += 1
1314
+ if streaming_callback:
1315
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1316
+
1317
+ # Format agent history for the prompt
1318
+ formatted_agent_history = "No actions taken yet."
1319
+ if agent_work_history:
1320
+ history_parts = []
1321
+ for i, entry in enumerate(agent_work_history):
1322
+ history_parts.append(
1323
+ f"### Step {i+1}:\n"
1324
+ f"**Thought:** {entry['thought']}\n"
1325
+ f"**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n"
1326
+ f"**Observation:**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```"
1327
+ )
1328
+ formatted_agent_history = "\n\n".join(history_parts)
1329
+
1330
+ # Construct the "Thinking & Planning" prompt
1331
+ decision_prompt_template = f"""You are a strategic AI assistant. Your goal is to achieve a set of objectives by intelligently using research and system tools.
1332
+
1333
+ --- AVAILABLE TOOLS ---
1334
+ {formatted_tools_list}
1335
+
1336
+ --- CURRENT STATE ---
1337
+ Original User Request: {original_user_prompt}
1338
+ Current Research Objectives:
1339
+ {current_objectives}
1340
+
1341
+ Knowledge Scratchpad (our current understanding):
1342
+ {knowledge_scratchpad}
1343
+
1344
+ --- AGENT WORK HISTORY (previous steps in this turn) ---
1345
+ {formatted_agent_history}
1346
+
1347
+ --- INSTRUCTIONS ---
1348
+ 1. **Analyze:** Review the entire work history, objectives, and scratchpad.
1349
+ 2. **Update State:** Based on the latest observations, update the scratchpad and refine the objectives. The scratchpad should be a comprehensive summary of ALL knowledge gathered.
1350
+ 3. **Decide Next Action:** Choose ONE of the following: `call_tool`, `final_answer`, or `clarify`. Always prefer to gather information with `research::query_database` before attempting to use other tools if you lack context.
1351
+
1352
+ --- OUTPUT FORMAT ---
1353
+ Respond with a single JSON object inside a JSON markdown tag. Use this exact schema:
1354
+ ```json
1355
+ {{
1356
+ "thought": "Your reasoning for the chosen action, analyzing how the work history informs your next step. Explain why you are choosing a specific tool (or to answer).",
1357
+ "updated_scratchpad": "The new, complete, and comprehensive summary of all knowledge gathered. Integrate new findings with old ones. if no new knowledge is gathered, this should be an empty string.",
1358
+ "updated_objectives": "The full, potentially revised, list of objectives. If no change, repeat the current list.",
1359
+ "action": "The chosen action: 'call_tool', 'final_answer', or 'clarify'.",
1360
+ "tool_name": "(string, if action is 'call_tool') The full 'alias::tool_name' of the tool to use.",
1361
+ "tool_params": {{"query": "...", "param2": "..."}},
1362
+ "clarification_request": "(string, if action is 'clarify') Your question to the user."
1363
+ }}
1364
+ ```
1365
+ """
1366
+ raw_llm_decision_json = self.generate_text(
1367
+ prompt=decision_prompt_template, n_predict=2048, temperature=tool_call_decision_temperature
1368
+ )
1369
+
1370
+ # --- 4. Parse LLM's plan and update state ---
1371
+ try:
1372
+ llm_decision = robust_json_parser(raw_llm_decision_json)
1373
+ turn_history.append({"type": "llm_plan", "content": llm_decision})
1374
+
1375
+ current_objectives = llm_decision.get("updated_objectives", current_objectives)
1376
+ new_scratchpad = llm_decision.get("updated_scratchpad")
1377
+
1378
+ if new_scratchpad and new_scratchpad != knowledge_scratchpad:
1379
+ knowledge_scratchpad = new_scratchpad
1380
+ if streaming_callback:
1381
+ streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP, {"id": "scratchpad_update"}, turn_history)
1382
+ streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id":"scratch_pad_update"}, turn_history)
1383
+
1384
+ except (json.JSONDecodeError, AttributeError, KeyError) as e:
1385
+ ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}. Error: {e}")
1386
+ turn_history.append({"type": "error", "content": f"Failed to parse LLM plan: {raw_llm_decision_json}"})
1387
+ break
1388
+
1389
+ if streaming_callback:
1390
+ streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
1391
+
1392
+ # --- 5. Execute the chosen action ---
1393
+ action = llm_decision.get("action")
1394
+ tool_result = None
1395
+
1396
+ if action == "call_tool":
1397
+ if len(tool_calls_made_this_turn) >= max_tool_calls:
1398
+ ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
1399
+ break
1400
+
1401
+ tool_name = llm_decision.get("tool_name")
1402
+ tool_params = llm_decision.get("tool_params", {})
1403
+
1404
+ if not tool_name or not isinstance(tool_params, dict):
1405
+ ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
1406
+ break
1407
+
1408
+ if streaming_callback:
1409
+ streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
1410
+
1411
+ try:
1412
+ # ** DYNAMIC TOOL/RAG DISPATCH **
1413
+ if tool_name == "research::query_database":
1414
+ query = tool_params.get("query")
1415
+ if not query:
1416
+ tool_result = {"error": "RAG tool called without a 'query' parameter."}
1417
+ else:
1418
+ retrieved_chunks = rag_query_function(query, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
1419
+ if not retrieved_chunks:
1420
+ tool_result = {"summary": "No relevant documents found for the query.", "chunks": []}
1421
+ else:
1422
+ tool_result = {
1423
+ "summary": f"Found {len(retrieved_chunks)} relevant document chunks.",
1424
+ "chunks": retrieved_chunks
1425
+ }
1426
+ else:
1427
+ # Standard MCP tool execution
1428
+ tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
1429
+
1430
+ except Exception as e_exec:
1431
+ trace_exception(e_exec)
1432
+ tool_result = {"error": f"An exception occurred while executing tool '{tool_name}': {e_exec}"}
1433
+
1434
+ # Record the work cycle in the agent's history
1435
+ work_entry = {
1436
+ "thought": llm_decision.get("thought", "N/A"),
1437
+ "tool_name": tool_name,
1438
+ "tool_params": tool_params,
1439
+ "tool_result": tool_result
1440
+ }
1441
+ agent_work_history.append(work_entry)
1442
+ tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
1443
+
1444
+ if streaming_callback:
1445
+ streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
1446
+ streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
1447
+
1448
+ elif action == "clarify":
1449
+ clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
1450
+ return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True}
1451
+
1452
+ elif action == "final_answer":
1453
+ ASCIIColors.info("LLM decided to formulate a final answer.")
1454
+ break
1455
+
1456
+ else:
1457
+ ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
1458
+ break
1459
+ if streaming_callback:
1460
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1461
+
1462
+ if streaming_callback:
1463
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1464
+ # --- 6. Generate Final Answer ---
1465
+ if streaming_callback:
1466
+ streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
1467
+
1468
+ final_answer_prompt = f"""You are an AI assistant providing a final, comprehensive answer based on research and tool use.
1469
+
1470
+ --- CONTEXT ---
1471
+ Original User Request: "{original_user_prompt}"
1472
+
1473
+ --- SUMMARY OF FINDINGS (Knowledge Scratchpad) ---
1474
+ {knowledge_scratchpad}
1475
+
1476
+ --- INSTRUCTIONS ---
1477
+ - Synthesize a clear, complete answer for the user based ONLY on the information in the 'Summary of Findings'.
1478
+ - Address the user directly and answer their original request.
1479
+ - Do not make up information. If the findings are insufficient, state what you found and what remains unanswered.
1480
+ """
1481
+ final_answer_text = self.generate_text(
1482
+ prompt=final_answer_prompt,
1483
+ system_prompt=system_prompt,
1484
+ images=images,
1485
+ stream=streaming_callback is not None,
1486
+ streaming_callback=streaming_callback,
1487
+ temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
1488
+ **(llm_generation_kwargs or {})
1489
+ )
1490
+
1491
+ if streaming_callback:
1492
+ streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
1493
+
1494
+ final_answer = self.remove_thinking_blocks(final_answer_text)
1495
+ turn_history.append({"type":"final_answer_generated", "content": final_answer})
1496
+
1497
+ return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
1498
+
1146
1499
  def generate_code(
1147
1500
  self,
1148
1501
  prompt,