lollms-client 0.20.6__py3-none-any.whl → 0.20.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -2,7 +2,7 @@
2
2
  import requests
3
3
  from ascii_colors import ASCIIColors, trace_exception
4
4
  from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
5
- from lollms_client.lollms_utilities import encode_image # Keep utilities needed by core
5
+ from lollms_client.lollms_utilities import robust_json_parser # Keep utilities needed by core
6
6
  from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
7
7
  # Import new Abstract Base Classes and Managers
8
8
  from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
@@ -597,336 +597,237 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
597
597
  response_full += response
598
598
  codes = self.extract_code_blocks(response, format=code_tag_format)
599
599
  return codes
600
+ def _build_final_decision_prompt(
601
+ self,
602
+ formatted_tools_list: str,
603
+ formatted_conversation_history: str,
604
+ current_plan: str,
605
+ knowledge_scratchpad: str,
606
+ agent_work_history_str: str,
607
+ ctx_size: Optional[int],
608
+ ) -> str:
609
+ # This helper function for building the main decision prompt remains the same.
610
+ # It is already robust and follows all formatting constraints.
611
+ final_agent_history = agent_work_history_str
612
+ if ctx_size:
613
+ get_token_count = len
614
+ # This is a simplified representation of the static prompt for size calculation
615
+ static_parts_text = "You are a task-oriented AI assistant..."
616
+ fixed_parts_size = get_token_count(static_parts_text)
617
+ available_space_for_history = ctx_size - fixed_parts_size - 100
618
+ if get_token_count(agent_work_history_str) > available_space_for_history:
619
+ if available_space_for_history > 0:
620
+ truncation_point = len(agent_work_history_str) - available_space_for_history
621
+ final_agent_history = ("[...history truncated due to context size...]\n" + agent_work_history_str[truncation_point:])
622
+ ASCIIColors.warning("Agent history was truncated to fit the context window.")
623
+ else:
624
+ final_agent_history = "[...history truncated due to context size...]"
625
+ return (
626
+ "You are a task-oriented AI assistant. Your goal is to follow a plan to fulfill a user's request, using tools and asking for clarification when needed.\n\n"
627
+ "--- AVAILABLE TOOLS ---\n"
628
+ f"{formatted_tools_list}\n\n"
629
+ "--- CONVERSATION HISTORY ---\n"
630
+ f"{formatted_conversation_history}\n\n"
631
+ "--- CURRENT PLAN & KNOWLEDGE ---\n"
632
+ f"Current Plan:\n{current_plan}\n\n"
633
+ f"Knowledge Scratchpad (summary of all findings so far):\n{knowledge_scratchpad}\n\n"
634
+ "--- YOUR WORK SO FAR (in this turn) ---\n"
635
+ f"{final_agent_history}\n\n"
636
+ "--- YOUR TASK ---\n"
637
+ "1. **Analyze the Full Context:** Review the entire conversation, your plan, the scratchpad, and your work history.\n"
638
+ "2. **Update Your State:** Based on the latest tool observation, update the scratchpad to synthesize ALL knowledge gathered. Update the plan by marking completed steps or refining next steps.\n"
639
+ " - `call_tool`: If the next step in the plan requires a tool.\n"
640
+ " - `clarify`: If you are blocked, the user's request is ambiguous, or you need more information to proceed. Ask a specific, targeted question.\n"
641
+ " - `final_answer`: If all steps in the plan are complete and you have enough information to answer the user's request.\n\n"
642
+ "--- OUTPUT FORMAT ---\n"
643
+ "Respond with a single JSON object inside a ```json markdown tag.\n"
644
+ "```json\n"
645
+ "{\n"
646
+ ' "thought": "Your reasoning. Analyze the latest observation and decide what to do next based on the plan and history. State which step of the plan you are working on.",\n'
647
+ ' "updated_scratchpad": "The new, complete summary of all knowledge gathered so far. Integrate the latest findings.",\n'
648
+ ' "updated_plan": "The new, remaining plan. Remove steps that are now complete. Refine next steps if needed.",\n'
649
+ ' "action": "The chosen action: \'call_tool\', \'clarify\', or \'final_answer\'.",\n'
650
+ ' "action_details": {\n'
651
+ ' "tool_name": "(Required if action is \'call_tool\') The full \'alias::tool_name\' of the tool to use.",\n'
652
+ ' "tool_params": {},\n'
653
+ ' "clarification_request": "(Required if action is \'clarify\') Your specific question to the user."\n'
654
+ " }\n"
655
+ "}\n"
656
+ "```"
657
+ )
658
+
600
659
 
601
- # --- Function Calling with MCP ---
602
660
  def generate_with_mcp(
603
661
  self,
604
662
  prompt: str,
605
- discussion_history: Optional[List[Dict[str, str]]] = None, # e.g. [{"role":"user", "content":"..."}, {"role":"assistant", "content":"..."}]
663
+ system_prompt:str = None,
664
+ objective_extraction_system_prompt="Build a plan",
606
665
  images: Optional[List[str]] = None,
607
- tools: Optional[List[Dict[str, Any]]] = None, # List of MCP tool definitions
666
+ tools: Optional[List[Dict[str, Any]]] = None,
608
667
  max_tool_calls: int = 5,
609
- max_llm_iterations: int = 10, # Safety break for LLM deciding to call tools repeatedly
610
- tool_call_decision_temperature: float = 0.1, # Lower temp for more deterministic decision making
611
- final_answer_temperature: float = None, # Use instance default if None
612
- streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
613
- interactive_tool_execution: bool = False, # If true, prompts user before executing a tool
668
+ max_llm_iterations: int = 10,
669
+ ctx_size: Optional[int] = None,
670
+ max_json_retries: int = 1,
671
+ tool_call_decision_temperature: float = 0.0,
672
+ final_answer_temperature: float = None,
673
+ streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
674
+ # The `extract_plan` parameter has been removed.
614
675
  **llm_generation_kwargs
615
676
  ) -> Dict[str, Any]:
616
- """
617
- Generates a response that may involve calling one or more tools via MCP.
677
+ if not self.binding or not self.mcp:
678
+ return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
618
679
 
619
- Args:
620
- prompt (str): The user's initial prompt.
621
- discussion_history (Optional[List[Dict[str, str]]]): Previous turns of conversation.
622
- images (Optional[List[str]]): Images provided with the current user prompt.
623
- tools (Optional[List[Dict[str, Any]]]): A list of MCP tool definitions available for this call.
624
- If None, tools will be discovered from the MCP binding.
625
- max_tool_calls (int): Maximum number of distinct tool calls allowed in one interaction turn.
626
- max_llm_iterations (int): Maximum number of times the LLM can decide to call a tool
627
- before being forced to generate a final answer.
628
- tool_call_decision_temperature (float): Temperature for LLM when deciding on tool calls.
629
- final_answer_temperature (float): Temperature for LLM when generating the final answer.
630
- streaming_callback (Optional[Callable]): Callback for streaming LLM responses (tool decisions/final answer).
631
- Signature: (chunk_str, msg_type, metadata_dict, history_list_of_dicts_for_this_turn) -> bool
632
- interactive_tool_execution (bool): If True, ask user for confirmation before executing each tool.
680
+ turn_history: List[Dict[str, Any]] = []
681
+ # Renamed for clarity: `prompt` is the full conversation context.
682
+ conversation_context = prompt
633
683
 
634
- Returns:
635
- Dict[str, Any]: A dictionary containing:
636
- - "final_answer" (str): The LLM's final textual answer.
637
- - "tool_calls" (List[Dict]): A list of tools called, their params, and results.
638
- - "error" (Optional[str]): Error message if something went wrong.
639
- """
640
- if not self.binding:
641
- return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
642
- if not self.mcp:
643
- return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
644
-
645
- turn_history: List[Dict[str, Any]] = [] # Tracks this specific turn's interactions (LLM thoughts, tool calls, tool results)
646
-
647
- # 1. Discover tools if not provided
648
684
  if tools is None:
649
685
  try:
650
686
  tools = self.mcp.discover_tools(force_refresh=True)
651
- if not tools:
652
- ASCIIColors.warning("No MCP tools discovered by the binding.")
687
+ if not tools: ASCIIColors.warning("No MCP tools discovered.")
653
688
  except Exception as e_disc:
654
689
  return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
655
-
656
- if not tools: # If still no tools after discovery attempt
657
- ASCIIColors.info("No tools available for function calling. Generating direct response.")
658
- final_answer = self.remove_thinking_blocks(self.generate_text(
659
- prompt=prompt,
660
- system_prompt= (discussion_history[0]['content'] if discussion_history and discussion_history[0]['role'] == 'system' else "") + "\nYou are a helpful assistant.", # Basic system prompt
661
- images=images,
662
- stream=streaming_callback is not None, # stream if callback is provided
663
- streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, None, turn_history) if streaming_callback else None, # Adapt callback
664
- temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
665
- **(llm_generation_kwargs or {})
666
- ))
667
- if isinstance(final_answer, dict) and "error" in final_answer: # Handle generation error
668
- return {"final_answer": "", "tool_calls": [], "error": final_answer["error"]}
669
- return {"final_answer": final_answer, "tool_calls": [], "error": None}
670
690
 
691
+ if not tools:
692
+ final_answer_text = self.generate_text(prompt=prompt, system_prompt=system_prompt, stream=streaming_callback is not None, streaming_callback=streaming_callback)
693
+ return {"final_answer": self.remove_thinking_blocks(final_answer_text), "tool_calls": [], "error": None}
671
694
 
672
- formatted_tools_list = "\n".join([
673
- f"- Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
674
- for t in tools
675
- ])
676
-
677
- current_conversation: List[Dict[str, str]] = []
678
- if discussion_history:
679
- current_conversation.extend(discussion_history)
680
- current_conversation.append({"role": "user", "content": prompt})
681
- if images: # Add image representations to the last user message if supported by LLM and chat format
682
- # This part is highly dependent on how the specific LLM binding handles images in chat.
683
- # For simplicity, we'll assume if images are passed, the underlying generate_text handles it.
684
- # A more robust solution would modify current_conversation[-1]['content'] structure.
685
- ASCIIColors.info("Images provided. Ensure LLM binding's generate_text handles them with chat history.")
695
+ # --- Agent State Initialization ---
696
+ knowledge_scratchpad = "No information gathered yet."
697
+ agent_work_history = []
698
+ formatted_tools_list = "\n".join([f"- Tool: {t.get('name')}\n Description: {t.get('description')}\n Schema: {json.dumps(t.get('input_schema'))}" for t in tools])
699
+
700
+ # --- Unconditional Plan Generation ---
701
+ # This step now runs at the beginning of every call.
702
+ if streaming_callback:
703
+ streaming_callback("Building/Revising plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "plan_extraction"}, turn_history)
704
+
705
+ obj_prompt = (
706
+ "You are an Intelligent Workflow Planner. Your mission is to create the most efficient plan possible by analyzing the user's request within the context of the full conversation.\n\n"
707
+ "Your Guiding Principle: **Always choose the path of least resistance.**\n\n"
708
+ "**Your Logical Process:**\n"
709
+ "1. **Analyze the Entire Conversation:** Understand the user's ultimate goal based on all interaction so far.\n"
710
+ "2. **Check for a Single-Step Solution:** Scrutinize the available tools. Can a single tool call directly achieve the user's current goal? \n"
711
+ "3. **Formulate a Plan:** Based on your analysis, create a concise, numbered list of steps to achieve the goal. If the goal is simple, this may be only one step. If it is complex or multi-turn, it may be several steps.\n\n"
712
+ "**CRITICAL RULES:**\n"
713
+ "* **MANDATORY: NEVER add steps the user did not ask for.** Do not embellish or add 'nice-to-have' features.\n"
714
+ "* **Focus on the Goal:** Your plan should directly address the user's request as it stands now in the conversation.\n\n"
715
+ "---\n"
716
+ "**Available Tools:**\n"
717
+ f"{formatted_tools_list}\n\n"
718
+ "**Full Conversation History:**\n"
719
+ f'"{conversation_context}"'
720
+ )
721
+ initial_plan_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
722
+ current_plan = self.remove_thinking_blocks(initial_plan_gen).strip()
686
723
 
724
+ if streaming_callback:
725
+ streaming_callback(f"Current plan:\n{current_plan}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "plan_extraction"}, turn_history)
687
726
 
727
+ turn_history.append({"type": "initial_plan", "content": current_plan})
728
+
729
+ # --- Main Agent Loop ---
688
730
  tool_calls_made_this_turn = []
689
731
  llm_iterations = 0
690
732
 
691
733
  while llm_iterations < max_llm_iterations:
692
734
  llm_iterations += 1
693
-
694
- # 2. Construct prompt for LLM to decide on tool call or direct answer
695
- # We need to convert current_conversation into a single string prompt for `generate_code`
696
- # or adapt `generate_code` to take a message list if underlying LLM supports chat for structured output.
697
- # For now, let's assume `generate_code` takes a flat prompt.
698
-
699
- # Create a string representation of the conversation history
700
- history_str = ""
701
- for msg in current_conversation:
702
- role_prefix = self.user_custom_header(msg["role"]) if msg["role"]=="user" else self.ai_custom_header(msg["role"]) if msg["role"]=="assistant" else self.system_custom_header(msg["role"]) if msg["role"]=="system" else "!@>unknown:"
703
- history_str += f"{role_prefix}{msg['content']}\n"
704
-
705
- # Add tool execution results from previous iterations in this turn to the history string
706
- for tc_info in tool_calls_made_this_turn:
707
- if tc_info.get("result"): # Only add if there's a result (successful or error)
708
- history_str += f"{self.ai_full_header}(Executed tool '{tc_info['name']}' with params {tc_info['params']}. Result: {json.dumps(tc_info['result'])})\n"
709
-
710
-
711
- decision_prompt_template = f"""You are an AI assistant that can use tools to answer user requests.
712
- Available tools:
713
- {formatted_tools_list}
714
-
715
- Current conversation:
716
- {history_str}
717
-
718
- Based on the available tools and the current conversation, decide the next step.
719
- Respond with a JSON object containing ONE of the following structures:
720
- 1. If you need to use a tool:
721
- {{"action": "call_tool", "tool_name": "<name_of_tool_to_call>", "tool_params": {{<parameters_for_tool_as_json_object>}}}}
722
- 2. If you can answer directly without using a tool OR if you have sufficient information from previous tool calls:
723
- {{"action": "final_answer"}}
724
- 3. If the user's request is unclear or you need more information before deciding:
725
- {{"action": "clarify", "clarification_request": "<your_question_to_the_user>"}}
726
- """ # No {self.ai_full_header} here, generate_code will get raw JSON
727
-
728
- if streaming_callback:
729
- streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
730
-
731
- # Use generate_code to get structured JSON output from LLM
732
- # Note: generate_code itself uses generate_text. We are asking for JSON here.
733
- raw_llm_decision_json = self.generate_text(
734
- prompt=decision_prompt_template, # This is the full prompt for the LLM
735
- n_predict=512, # Reasonable size for decision JSON
736
- temperature=tool_call_decision_temperature,
737
- images=images
738
- # `images` are part of the history_str if relevant to the binding
739
- # streaming_callback=None, # Decisions are usually not streamed chunk by chunk
735
+ # ... The self-correction and action execution loop remains the same ...
736
+ if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
737
+ formatted_agent_history = "No actions taken yet in this turn."
738
+ if agent_work_history:
739
+ history_parts = [ f"### Step {i+1}:\n**Thought:** {entry['thought']}\n**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n**Observation (Tool Output):**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```" for i, entry in enumerate(agent_work_history)]
740
+ formatted_agent_history = "\n\n".join(history_parts)
741
+
742
+ llm_decision = None
743
+ current_decision_prompt = self._build_final_decision_prompt(
744
+ formatted_tools_list=formatted_tools_list, formatted_conversation_history=conversation_context,
745
+ current_plan=current_plan, knowledge_scratchpad=knowledge_scratchpad,
746
+ agent_work_history_str=formatted_agent_history, ctx_size=ctx_size
740
747
  )
741
- if streaming_callback:
742
- streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
743
748
 
744
-
745
- if not raw_llm_decision_json:
746
- ASCIIColors.error("LLM failed to provide a decision JSON.")
747
- turn_history.append({"type": "error", "content": "LLM failed to provide a decision."})
748
- return {"final_answer": "I'm sorry, I encountered an issue trying to process your request.", "tool_calls": tool_calls_made_this_turn, "error": "LLM decision JSON was empty."}
749
-
750
- processed_raw_json = raw_llm_decision_json.strip() # Strip whitespace first
751
- try:
752
- llm_decision = json.loads(processed_raw_json)
753
- turn_history.append({"type": "llm_decision", "content": llm_decision})
754
- except json.JSONDecodeError:
755
- ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}")
749
+ for i in range(max_json_retries + 1):
750
+ raw_llm_decision_json = self.generate_text(prompt=current_decision_prompt, n_predict=2048, temperature=tool_call_decision_temperature)
756
751
  try:
757
- decoder = json.JSONDecoder()
758
- # Try to decode the first JSON object from the (stripped) string
759
- llm_decision, end_index = decoder.raw_decode(processed_raw_json)
760
- turn_history.append({"type": "llm_decision_extracted", "content": llm_decision, "raw_trimmed": processed_raw_json[:end_index]})
761
-
762
- remaining_text = processed_raw_json[end_index:].strip()
763
- if remaining_text:
764
- ASCIIColors.warning(f"LLM output contained additional text after the first JSON object: '{remaining_text}'. Processing only the first object.")
765
- turn_history.append({"type": "llm_extra_output_ignored", "content": remaining_text})
766
- except json.JSONDecodeError as e_inner:
767
- ASCIIColors.error(f"Failed to parse LLM decision JSON even after attempting to extract first object: {raw_llm_decision_json}. Error: {e_inner}")
768
- turn_history.append({"type": "error", "content": "Failed to parse LLM decision JSON.", "raw_json": raw_llm_decision_json, "error_details": str(e_inner)})
769
- # Provide a generic error message, as the LLM's output was malformed.
770
- # Adding the raw output or a snippet to the conversation history might help the LLM recover or inform the user.
771
- current_conversation.append({
772
- "role": "assistant",
773
- "content": "(I encountered an internal error trying to understand my next step. I will try to answer directly based on what I have so far.)"
774
- })
775
- break # Break to generate final answer with current info
776
-
777
- if llm_decision is None: # If parsing failed and couldn't recover
778
- return {"final_answer": "I'm sorry, I had trouble understanding the next step due to a formatting issue.", "tool_calls": tool_calls_made_this_turn, "error": "Invalid JSON from LLM for decision."}
779
-
752
+ llm_decision = robust_json_parser(raw_llm_decision_json)
753
+ if "action" not in llm_decision or "action_details" not in llm_decision:
754
+ raise KeyError("The JSON is missing required keys: 'action' and/or 'action_details'.")
755
+ break
756
+ except (json.JSONDecodeError, AttributeError, KeyError) as e:
757
+ error_message = f"JSON parsing failed (Attempt {i+1}/{max_json_retries+1}). Error: {e}"
758
+ ASCIIColors.warning(error_message)
759
+ if streaming_callback: streaming_callback(error_message, MSG_TYPE.MSG_TYPE_WARNING, None, turn_history)
760
+ turn_history.append({"type": "error", "content": f"Invalid JSON response: {raw_llm_decision_json}"})
761
+ if i >= max_json_retries:
762
+ ASCIIColors.error("Max JSON retries reached. Aborting agent loop.")
763
+ llm_decision = None
764
+ break
765
+ current_decision_prompt = ( "You previously failed..." ) # Self-correction prompt
766
+ if not llm_decision: break
767
+
768
+ turn_history.append({"type": "llm_decision", "content": llm_decision})
769
+ current_plan = llm_decision.get("updated_plan", current_plan)
770
+ knowledge_scratchpad = llm_decision.get("updated_scratchpad", knowledge_scratchpad)
780
771
  action = llm_decision.get("action")
772
+ action_details = llm_decision.get("action_details", {})
773
+ if streaming_callback: streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
781
774
 
782
775
  if action == "call_tool":
783
- if len(tool_calls_made_this_turn) >= max_tool_calls:
784
- ASCIIColors.warning("Maximum tool calls reached for this turn. Forcing final answer.")
785
- current_conversation.append({"role":"assistant", "content":"(Max tool calls reached. I will now try to formulate an answer based on available information.)"})
786
- break # Exit loop to generate final answer
787
-
788
- tool_name = llm_decision.get("tool_name")
789
- tool_params = llm_decision.get("tool_params", {})
790
-
791
- if not tool_name:
792
- ASCIIColors.warning("LLM decided to call a tool but didn't specify tool_name.")
793
- current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
794
- break # Or ask LLM to try again without this faulty decision in history
795
-
796
- tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
797
- turn_history.append(tool_call_info)
798
- if streaming_callback:
799
- streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
800
- streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
801
-
802
- # Interactive execution if enabled
803
- if interactive_tool_execution:
804
- try:
805
- user_confirmation = input(f"AI wants to execute tool '{tool_name}' with params {tool_params}. Allow? (yes/no/details): ").lower()
806
- if user_confirmation == "details":
807
- tool_def_for_details = next((t for t in tools if t.get("name") == tool_name), None)
808
- print(f"Tool details: {json.dumps(tool_def_for_details, indent=2)}")
809
- user_confirmation = input(f"Allow execution of '{tool_name}'? (yes/no): ").lower()
810
-
811
- if user_confirmation != "yes":
812
- ASCIIColors.info("Tool execution cancelled by user.")
813
- tool_result = {"error": "Tool execution cancelled by user."}
814
- # Add this info to conversation for LLM
815
- current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' execution was cancelled by the user. What should I do next?)"})
816
- tool_call_info["result"] = tool_result # Record cancellation
817
- tool_calls_made_this_turn.append(tool_call_info)
818
- continue # Back to LLM for next decision
819
- except Exception as e_input: # Catch issues with input() e.g. in non-interactive env
820
- ASCIIColors.warning(f"Error during interactive confirmation: {e_input}. Proceeding without confirmation.")
821
-
822
-
823
- if streaming_callback:
824
- streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
825
-
776
+ if len(tool_calls_made_this_turn) >= max_tool_calls: break
777
+ tool_name = action_details.get("tool_name")
778
+ tool_params = action_details.get("tool_params", {})
779
+ if not tool_name or not isinstance(tool_params, dict): break
780
+ if streaming_callback: streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
826
781
  tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
827
-
828
- tool_call_info["result"] = tool_result # Add result to this call's info
829
- tool_calls_made_this_turn.append(tool_call_info) # Log the completed call
782
+ work_entry = { "thought": llm_decision.get("thought", "N/A"), "tool_name": tool_name, "tool_params": tool_params, "tool_result": tool_result }
783
+ agent_work_history.append(work_entry)
784
+ tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
830
785
  if streaming_callback:
831
- streaming_callback(f"", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
832
-
833
- if streaming_callback:
834
- streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
835
-
836
- # Add tool execution result to conversation for the LLM
837
- # The format of this message can influence how the LLM uses the tool output.
838
- # current_conversation.append({"role": "tool_result", "tool_name": tool_name, "content": json.dumps(tool_result)}) # More structured
839
- current_conversation.append({"role": "assistant", "content": f"(Tool '{tool_name}' executed. Result: {json.dumps(tool_result)})"})
840
-
841
-
786
+ streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
787
+ streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
842
788
  elif action == "clarify":
843
- clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
844
- if streaming_callback:
845
- streaming_callback(clarification_request, MSG_TYPE.MSG_TYPE_FULL, {"type": "clarification_request"}, turn_history)
846
- turn_history.append({"type":"clarification_request_sent", "content": clarification_request})
847
- return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None}
848
-
789
+ clarification_request = action_details.get("clarification_request", "I need more information.")
790
+ return { "final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True }
849
791
  elif action == "final_answer":
850
792
  ASCIIColors.info("LLM decided to formulate a final answer.")
851
- current_conversation.append({"role":"assistant", "content":"(I will now formulate the final answer based on the information gathered.)"}) # Inform LLM's "thought process"
852
- break # Exit loop to generate final answer
853
-
854
- else:
855
- ASCIIColors.warning(f"LLM returned unknown action: {action}")
856
- current_conversation.append({"role":"assistant", "content":f"(Received an unexpected decision: {action}. I will try to answer directly.)"})
857
- break # Exit loop
858
-
859
- # Safety break if too many iterations without reaching final answer or max_tool_calls
860
- if llm_iterations >= max_llm_iterations:
861
- ASCIIColors.warning("Max LLM iterations reached. Forcing final answer.")
862
- current_conversation.append({"role":"assistant", "content":"(Max iterations reached. I will now try to formulate an answer.)"})
863
793
  break
864
-
865
- # 3. Generate final answer if LLM decided to, or if loop broke
866
- if streaming_callback:
867
- streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_history)
868
-
869
- # Construct the final prompt string for generate_text from current_conversation
870
- final_prompt_str = ""
871
- final_system_prompt = ""
872
-
873
- # Consolidate system messages if any
874
- interim_history_for_final_answer = []
875
- for msg in current_conversation:
876
- if msg["role"] == "system":
877
- final_system_prompt += msg["content"] + "\n"
878
794
  else:
879
- interim_history_for_final_answer.append(msg)
880
-
881
- if not any(msg['role'] == 'user' for msg in interim_history_for_final_answer): # Ensure there's a user turn if only system + tool calls
882
- interim_history_for_final_answer.append({'role':'user', 'content': prompt}) # Add original prompt if lost
883
-
884
-
885
- # The generate_text method needs a single prompt and an optional system_prompt.
886
- # We need to format the interim_history_for_final_answer into a single prompt string,
887
- # or modify generate_text to accept a list of messages.
888
- # For now, flatten to string:
889
- current_prompt_for_final_answer = ""
890
- for i, msg in enumerate(interim_history_for_final_answer):
891
- role_prefix = self.user_custom_header(msg["role"]) if msg["role"]=="user" else self.ai_custom_header(msg["role"]) if msg["role"]=="assistant" else f"!@>{msg['role']}:"
892
- current_prompt_for_final_answer += f"{role_prefix}{msg['content']}"
893
- if i < len(interim_history_for_final_answer) -1 : # Add newline separator except for last
894
- current_prompt_for_final_answer += "\n"
895
- # Add AI header to prompt AI to speak
896
- current_prompt_for_final_answer += f"\n{self.ai_full_header}"
897
-
898
-
899
- final_answer_text = self.generate_text(
900
- prompt=current_prompt_for_final_answer, # Pass the conversation history as the prompt
901
- system_prompt=final_system_prompt.strip(),
902
- images=images if not tool_calls_made_this_turn else None, # Only pass initial images if no tool calls happened (context might be lost)
903
- stream=streaming_callback is not None,
904
- streaming_callback=lambda chunk, msg_type: streaming_callback(chunk, msg_type, {"type":"final_answer_chunk"}, turn_history) if streaming_callback else None,
905
- temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
906
- **(llm_generation_kwargs or {})
795
+ ASCIIColors.warning(f"LLM returned unknown action: '{action}'. Forcing final answer.")
796
+ break
797
+ if streaming_callback: streaming_callback(f"LLM reasoning step (iteration {llm_iterations}) complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
798
+
799
+ # --- Final Answer Synthesis ---
800
+ # This part remains the same.
801
+ if streaming_callback: streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
802
+ final_answer_prompt = (
803
+ "You are an AI assistant tasked with providing a final, comprehensive answer to the user based on the research performed.\n\n"
804
+ "--- FULL CONVERSATION CONTEXT ---\n"
805
+ f"{conversation_context}\n\n"
806
+ "--- SUMMARY OF FINDINGS (Your Knowledge Scratchpad) ---\n"
807
+ f"{knowledge_scratchpad}\n\n"
808
+ "--- INSTRUCTIONS ---\n"
809
+ "- Synthesize a clear and complete answer for the user based ONLY on the information in the 'Summary of Findings'.\n"
810
+ "- Address the user directly and answer their latest query, considering the full conversation.\n"
811
+ "- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
812
+ "- Format your response clearly using markdown where appropriate.\n"
907
813
  )
908
-
909
- if streaming_callback:
910
- streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_history)
911
-
912
- if isinstance(final_answer_text, dict) and "error" in final_answer_text: # Handle generation error
913
- turn_history.append({"type":"error", "content":f"LLM failed to generate final answer: {final_answer_text['error']}"})
914
- return {"final_answer": "", "tool_calls": tool_calls_made_this_turn, "error": final_answer_text["error"]}
915
-
916
- turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
917
- return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
918
-
814
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
815
+ if streaming_callback: streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
816
+ final_answer = self.remove_thinking_blocks(final_answer_text)
817
+ turn_history.append({"type":"final_answer_generated", "content": final_answer})
818
+ return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
919
819
  def generate_text_with_rag(
920
820
  self,
921
821
  prompt: str,
922
822
  rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
823
+ system_prompt: str = "",
824
+ objective_extraction_system_prompt="Extract objectives",
923
825
  rag_query_text: Optional[str] = None,
924
826
  rag_vectorizer_name: Optional[str] = None,
925
827
  rag_top_k: int = 5,
926
828
  rag_min_similarity_percent: float = 70.0,
927
- max_rag_hops: int = 0,
829
+ max_rag_hops: int = 3,
928
830
  images: Optional[List[str]] = None,
929
- system_prompt: str = "",
930
831
  n_predict: Optional[int] = None,
931
832
  stream: Optional[bool] = None,
932
833
  temperature: Optional[float] = None,
@@ -943,13 +844,11 @@ Respond with a JSON object containing ONE of the following structures:
943
844
  **llm_generation_kwargs
944
845
  ) -> Dict[str, Any]:
945
846
  """
946
- Enhanced RAG with optional initial objective extraction and automatic intermediate summaries
947
- when context grows beyond ctx_size or self.default_ctx_size.
847
+ Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
948
848
  """
949
849
  if not self.binding:
950
850
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
951
851
 
952
- # Determine effective context size limit
953
852
  effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
954
853
 
955
854
  turn_rag_history_for_callback: List[Dict[str, Any]] = []
@@ -957,183 +856,251 @@ Respond with a JSON object containing ONE of the following structures:
957
856
  all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
958
857
 
959
858
  original_user_prompt = prompt
960
- objectives_text = ""
961
- # 0. Optional Objectives Extraction Step
859
+
860
+ knowledge_scratchpad = "No information gathered yet."
861
+ current_objectives = ""
862
+
962
863
  if extract_objectives:
963
864
  if streaming_callback:
964
- streaming_callback("Extracting and structuring objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
865
+ streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
866
+
965
867
  obj_prompt = (
966
- "You are an expert analyst. "
967
- "Your task is to extract and structure the key objectives from the user's request below. "
968
- "Output a bullet list of objectives only.\n\n"
868
+ "You are an expert analyst. Your task is to extract and structure the key research objectives from the user's request below. "
869
+ "These objectives will guide a research process. Frame them as questions or tasks. "
870
+ "Output a bulleted list of objectives only without a comment.\n\n"
969
871
  f"User request:\n\"{original_user_prompt}\""
970
872
  )
971
- obj_gen = self.generate_text(
972
- prompt=obj_prompt,
973
- system_prompt="Extract objectives",
974
- temperature=0.0,
975
- n_predict=200,
976
- stream=False
977
- )
978
- objectives_text = self.remove_thinking_blocks(obj_gen).strip()
873
+ initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
874
+ current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
875
+
979
876
  if streaming_callback:
980
- streaming_callback(f"Objectives: {objectives_text}", MSG_TYPE.MSG_TYPE_STEP, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
877
+ streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
878
+ else:
879
+ current_objectives = f"Answer the user's request: '{original_user_prompt}'"
981
880
 
982
- if streaming_callback:
983
- streaming_callback(f"Objectives extracted:\n{objectives_text}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_rag_history_for_callback)
881
+ if streaming_callback:
882
+ streaming_callback("Generating initial search query...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
883
+ if not rag_query_text:
884
+ initial_query_gen_prompt = f"""
885
+ You are a research assistant. Your task is to formulate the first search query for a vector database based on an initial user request and research objectives. The query should be concise and target the most crucial information needed to start.
984
886
 
985
- current_query_for_rag = rag_query_text or None
986
- previous_queries=[]
987
- # 1. RAG Hops
988
- for hop_count in range(max_rag_hops + 1):
989
- if streaming_callback:
990
- streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
991
- txt_previous_queries = f"Previous queries:\n"+'\n'.join(previous_queries)+"\n\n" if len(previous_queries)>0 else ""
992
- txt_informations = f"Information:\n"+'\n'.join([f"(from {chunk['document']}):{chunk['content']}" for _, chunk in all_unique_retrieved_chunks_map.items()]) if len(all_unique_retrieved_chunks_map)>0 else "This is the first request. No data received yet. Build a new query."
993
- txt_sp = (
994
- "Your objective is to analyze the provided chunks of information to determine "
995
- "whether they are sufficient to reach the objective. If not, formulate a refined and focused query "
996
- "that can retrieve more relevant information from a vector database. Ensure the query captures the semantic essence "
997
- "of what is missing, is contextually independent, and is optimized for vector-based similarity search. "
998
- "Do not repeat or rephrase earlier queries—always generate a new, meaningful atomic query targeting the current gap in knowledge."
999
- )
887
+ --- User's Request ---
888
+ {original_user_prompt}
889
+
890
+ --- Initial Research Objectives ---
891
+ {current_objectives}
892
+
893
+ --- INSTRUCTIONS ---
894
+ Generate a single, effective search query.
1000
895
 
1001
- txt_formatting = """The output format must be in form of JSON placed inside a JSON markdown tag. Use the following schema:
896
+ --- OUTPUT FORMAT ---
897
+ Provide your response as a single JSON object with one key, "query".
1002
898
  ```json
1003
- {
1004
- "decision": A boolean indicating your decision (true: more data is needed, false: the current data is sufficient),
1005
- "query": (str, optional, only if decision is true). A new, atomic query suitable for semantic search in a vector database.
1006
- It should capture the missing concept or insight in concise, context-rich language, avoiding reuse of earlier queries.
1007
- }
899
+ {{
900
+ "query": "Your generated search query here."
901
+ }}
1008
902
  ```
1009
903
  """
1010
- p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
1011
- response = self.generate_code(p,system_prompt=txt_sp)
1012
904
  try:
1013
- answer = json.loads(response)
1014
- decision = answer["decision"]
1015
- if not decision:
1016
- if streaming_callback:
1017
- streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1018
-
1019
- break
1020
- else:
1021
- current_query_for_rag = str(answer["query"])
1022
- except Exception as ex:
1023
- trace_exception(ex)
905
+ raw_initial_query_response = self.generate_code(initial_query_gen_prompt, system_prompt="You are a query generation expert.", temperature=0.0)
906
+ initial_plan = json.loads(raw_initial_query_response)
907
+ current_query_for_rag = initial_plan.get("query")
908
+ if not current_query_for_rag:
909
+ raise ValueError("LLM returned an empty initial query.")
910
+ if streaming_callback:
911
+ streaming_callback(f"Initial query generated:\n'{current_query_for_rag}'", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "initial_query_generation"}, turn_rag_history_for_callback)
912
+ except Exception as e:
913
+ trace_exception(e)
914
+ current_query_for_rag = original_user_prompt
915
+ if streaming_callback:
916
+ streaming_callback(f"Failed to generate initial query, falling back to user prompt. Error: {e}", MSG_TYPE.MSG_TYPE_WARNING, {"id": "initial_query_failure"}, turn_rag_history_for_callback)
917
+ else:
918
+ current_query_for_rag=rag_query_text
919
+
920
+ previous_queries = []
1024
921
 
922
+ for hop_count in range(max_rag_hops):
1025
923
  if streaming_callback:
1026
- streaming_callback(f"Query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query for hop {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
1027
-
1028
- # Retrieve chunks
924
+ streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
925
+
926
+ if streaming_callback:
927
+ streaming_callback(f"Executing Query:\n{current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"query_exec_{hop_count + 1}"}, turn_rag_history_for_callback)
928
+
1029
929
  try:
1030
- retrieved = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
930
+ retrieved_chunks = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
1031
931
  except Exception as e:
1032
932
  trace_exception(e)
1033
933
  return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
1034
934
 
1035
935
  hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
1036
936
  previous_queries.append(current_query_for_rag)
1037
- new_unique = 0
1038
- documents = []
1039
- for chunk in retrieved:
1040
- doc = chunk.get("file_path", "Unknown")
1041
- content = str(chunk.get("chunk_text", ""))
1042
- sim = float(chunk.get("similarity_percent", 0.0))
1043
- detail = {"document": doc, "similarity": sim, "content": content,
1044
- "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
1045
- documents.append(doc)
1046
- hop_details["retrieved_chunks_details"].append(detail)
1047
- key = f"{doc}::{content[:100]}"
1048
- if key not in all_unique_retrieved_chunks_map:
1049
- all_unique_retrieved_chunks_map[key] = detail
1050
- new_unique += 1
1051
- hop_details["status"] = "Completed" if retrieved else "No chunks retrieved"
1052
- if hop_count > 0 and new_unique == 0:
1053
- hop_details["status"] = "No *new* unique chunks retrieved"
937
+
938
+ newly_retrieved_text = ""
939
+ new_chunks_count = 0
940
+ if retrieved_chunks:
941
+ for chunk in retrieved_chunks:
942
+ doc = chunk.get("file_path", "Unknown")
943
+ content = str(chunk.get("chunk_text", ""))
944
+ sim = float(chunk.get("similarity_percent", 0.0))
945
+ detail = {"document": doc, "similarity": sim, "content": content, "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
946
+ hop_details["retrieved_chunks_details"].append(detail)
947
+
948
+ key = f"{doc}::{content[:100]}"
949
+ if key not in all_unique_retrieved_chunks_map:
950
+ all_unique_retrieved_chunks_map[key] = detail
951
+ newly_retrieved_text += f"--- Document: {doc} (Similarity: {sim:.1f}%)\n{content}\n---\n"
952
+ new_chunks_count += 1
953
+
954
+ hop_details["status"] = f"Completed, found {len(retrieved_chunks)} chunks ({new_chunks_count} new)."
1054
955
  rag_hops_details_list.append(hop_details)
956
+
1055
957
  if streaming_callback:
1056
- streaming_callback(f"Retreived {len(retrieved)} data chunks from {set(documents)}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retreival {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
958
+ streaming_callback(f"Retrieved {len(retrieved_chunks)} chunks ({new_chunks_count} new).", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retrieval_info_{hop_count + 1}"}, turn_rag_history_for_callback)
959
+
960
+ if new_chunks_count == 0 and hop_count > 0:
961
+ if streaming_callback:
962
+ streaming_callback("No new unique information found, stopping RAG hops.", MSG_TYPE.MSG_TYPE_INFO, {"id": "rag_stop_no_new_info"}, turn_rag_history_for_callback)
963
+ break
1057
964
 
1058
965
  if streaming_callback:
1059
- streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
966
+ streaming_callback("Analyzing findings and refining plan...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{hop_count + 1}"}, turn_rag_history_for_callback)
967
+
968
+ planning_system_prompt = (
969
+ "You are a strategic research agent via multiple hops. Your task is to analyze new information, update your "
970
+ "understanding, refine your research objectives, and decide on the next best action."
971
+ )
972
+
973
+ planning_prompt = f"""
974
+ --- Original User Request ---
975
+ {original_user_prompt}
976
+
977
+ --- Objectives you have formulated ---
978
+ {current_objectives}
979
+
980
+ --- Existing Knowledge Scratchpad (Summary of previous findings) ---
981
+ {knowledge_scratchpad}
982
+
983
+ --- Newly Retrieved Documents for this Hop ---
984
+ {newly_retrieved_text if newly_retrieved_text else "No new documents were found with the last query."}
985
+
986
+ --- Previous Queries (for reference, do not repeat) ---
987
+ - {"- ".join(previous_queries)}
988
+
989
+ --- INSTRUCTIONS ---
990
+ 1. **Analyze & Update Knowledge:** Read the 'Newly Retrieved Documents'. Summarize the most important new facts and insights into a few bullet points for the 'new_notes_for_scratchpad'.
991
+ 2. **Refine Objectives:** Review the 'Current Research Objectives'. Do the new documents answer any objectives? Do they reveal that some objectives need to be changed or made more specific? Rewrite the complete, updated list of objectives.
992
+ 3. **Decide & Plan Next Query:** Based on your updated objectives and knowledge, decide if you have enough information to form a final answer.
993
+ - If YES, set `decision` to `false`.
994
+ - If NO, set `decision` to `true` and formulate a new, focused `query` to address the most critical remaining gap in your knowledge. The query must be different from previous ones.
995
+
996
+ --- OUTPUT FORMAT ---
997
+ Provide your response as a single JSON object inside a JSON markdown tag. Use this exact schema:
998
+ ```json
999
+ {{
1000
+ "updated_objectives": "(string) A bulleted list of the new, refined objectives based on the latest information.",
1001
+ "new_notes_for_scratchpad": "(string) A concise summary in bullet points of key findings from the new documents.",
1002
+ "decision": "boolean (true if you need to query again, false if you are done).",
1003
+ "query": "(string, optional) The next query for the vector database if decision is true."
1004
+ }}
1005
+ ```
1006
+ """
1007
+ raw_planning_response = self.generate_code(planning_prompt, system_prompt=planning_system_prompt, temperature=0.0)
1008
+
1009
+ try:
1010
+ plan = robust_json_parser(raw_planning_response)
1011
+
1012
+ raw_notes = plan.get("new_notes_for_scratchpad")
1013
+ if isinstance(raw_notes, list):
1014
+ notes_from_hop = "\n".join(str(item) for item in raw_notes if item).strip()
1015
+ elif isinstance(raw_notes, str):
1016
+ notes_from_hop = raw_notes.strip()
1017
+ else:
1018
+ notes_from_hop = ""
1019
+
1020
+ if notes_from_hop:
1021
+ if knowledge_scratchpad == "No information gathered yet.":
1022
+ knowledge_scratchpad = f"Findings from Hop {hop_count + 1}:\n{notes_from_hop}"
1023
+ else:
1024
+ knowledge_scratchpad += f"\n\nFindings from Hop {hop_count + 1}:\n{notes_from_hop}"
1025
+
1026
+ raw_objectives = plan.get("updated_objectives")
1027
+ if isinstance(raw_objectives, list):
1028
+ current_objectives = "\n".join(str(item) for item in raw_objectives if item).strip()
1029
+ elif isinstance(raw_objectives, str) and raw_objectives.strip():
1030
+ current_objectives = raw_objectives.strip()
1031
+
1032
+ if streaming_callback:
1033
+ streaming_callback(f"Refined Objectives:\n{current_objectives}\n\nNew Learnings:\n{notes_from_hop}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"planning_output_{hop_count + 1}"}, turn_rag_history_for_callback)
1060
1034
 
1035
+ if not plan.get("decision", False):
1036
+ if streaming_callback:
1037
+ streaming_callback("LLM decided it has enough information.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
1038
+ break
1039
+ else:
1040
+ next_query = plan.get("query")
1041
+ if not next_query:
1042
+ if streaming_callback:
1043
+ streaming_callback("LLM decided to continue but provided no query. Stopping.", MSG_TYPE.MSG_TYPE_WARNING, {"id": "rag_stop_no_query"}, turn_rag_history_for_callback)
1044
+ break
1045
+ current_query_for_rag = next_query
1046
+
1047
+ except Exception as ex:
1048
+ trace_exception(ex)
1049
+ if streaming_callback:
1050
+ streaming_callback(f"Error processing planning step: {ex}. Stopping RAG.", MSG_TYPE.MSG_TYPE_EXCEPTION, {"id": f"planning_error_{hop_count + 1}"}, turn_rag_history_for_callback)
1051
+ break
1061
1052
 
1062
- # 2. Prepare & Summarize Context
1063
- sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(),
1064
- key=lambda c: c["similarity"], reverse=True)
1053
+ if streaming_callback:
1054
+ streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}"}, turn_rag_history_for_callback)
1055
+
1056
+ sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(), key=lambda c: c["similarity"], reverse=True)
1065
1057
  context_lines = []
1066
1058
  total_chars = 0
1067
1059
  for c in sorted_chunks:
1068
- snippet = (
1069
- f"Source: {c['document']} (Sim: {c['similarity']:.1f}%, "
1070
- f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
1071
- f"{c['content']}\n---\n"
1072
- )
1073
- if total_chars + len(snippet) > max_rag_context_characters:
1074
- break
1060
+ snippet = (f"Source: {c['document']} (Sim: {c['similarity']:.1f}%)\n{c['content']}\n---\n")
1061
+ if total_chars + len(snippet) > max_rag_context_characters: break
1075
1062
  context_lines.append(snippet)
1076
1063
  total_chars += len(snippet)
1077
-
1078
1064
  accumulated_context = "".join(context_lines)
1079
1065
 
1080
- # If context exceeds our effective limit, summarize it
1081
1066
  if self.count_tokens(accumulated_context) > effective_ctx_size:
1082
- if streaming_callback:
1083
- streaming_callback("Context too large, performing intermediate summary...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
1084
- summary_prompt = (
1085
- "Summarize the following gathered context into a concise form "
1086
- "that preserves all key facts and sources needed to answer the user's request:\n\n"
1087
- f"{accumulated_context}"
1088
- )
1089
- summary = self.generate_text(
1090
- prompt=summary_prompt,
1091
- system_prompt="Intermediate summary",
1092
- temperature=0.0,
1093
- n_predict= n_predict or 512,
1094
- stream=False
1095
- )
1096
- accumulated_context = self.remove_thinking_blocks(summary).strip()
1097
- if streaming_callback:
1098
- streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "intermediate_summary"}, turn_rag_history_for_callback)
1067
+ pass
1099
1068
 
1100
- # 3. Final Answer Generation
1101
- final_prompt = [
1102
- f"Original request: {original_user_prompt}"
1069
+ if streaming_callback:
1070
+ streaming_callback("Compiling final answer from all findings...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
1071
+
1072
+ final_prompt_parts = [
1073
+ f"**User's Original Request:**\n{original_user_prompt}\n",
1074
+ f"**Final Research Objectives:**\n{current_objectives}\n",
1075
+ f"**Knowledge Scratchpad (Summary of Findings):**\n{knowledge_scratchpad}\n",
1103
1076
  ]
1104
- if objectives_text:
1105
- final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
1106
1077
  if accumulated_context:
1107
- final_prompt.append(
1108
- "\nBased on the gathered context:\n---\n"
1109
- f"{accumulated_context}\n---"
1078
+ final_prompt_parts.append(
1079
+ "**Supporting Raw Context from Retrieved Documents:**\n---\n"
1080
+ f"{accumulated_context}\n---\n"
1110
1081
  )
1111
1082
  else:
1112
- final_prompt.append("\n(No relevant context retrieved.)")
1113
- final_prompt.append(
1114
- "\nProvide a comprehensive answer using ONLY the above context. "
1115
- "If context is insufficient, state so clearly."
1083
+ final_prompt_parts.append("**Supporting Raw Context:**\n(No relevant documents were retrieved.)\n")
1084
+
1085
+ final_prompt_parts.append(
1086
+ "**Final Instruction:**\nSynthesize a comprehensive answer to the user's original request. "
1087
+ "Use the 'Knowledge Scratchpad' as your primary source of information and the 'Supporting Raw Context' for specific details and quotes. "
1088
+ "Adhere strictly to the information provided. If the information is insufficient to fully answer, state what is missing based on your 'Final Research Objectives'."
1116
1089
  )
1117
- final_prompt.append(self.ai_full_header)
1090
+ final_prompt_parts.append(self.ai_full_header)
1118
1091
 
1119
1092
  final_answer = self.generate_text(
1120
- prompt="\n".join(final_prompt),
1121
- images=images,
1122
- system_prompt=system_prompt,
1123
- n_predict=n_predict,
1124
- stream=stream,
1125
- temperature=temperature,
1126
- top_k=top_k,
1127
- top_p=top_p,
1128
- repeat_penalty=repeat_penalty,
1129
- repeat_last_n=repeat_last_n,
1130
- seed=seed,
1131
- n_threads=n_threads,
1132
- ctx_size=ctx_size,
1093
+ prompt="\n".join(final_prompt_parts),
1094
+ images=images, system_prompt=system_prompt, n_predict=n_predict, stream=stream,
1095
+ temperature=temperature, top_k=top_k, top_p=top_p, repeat_penalty=repeat_penalty,
1096
+ repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads, ctx_size=ctx_size,
1133
1097
  streaming_callback=streaming_callback if stream else None,
1134
1098
  **llm_generation_kwargs
1135
1099
  )
1136
1100
  answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
1101
+
1102
+ if streaming_callback:
1103
+ streaming_callback("Final answer generated.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_generation"}, turn_rag_history_for_callback)
1137
1104
 
1138
1105
  return {
1139
1106
  "final_answer": answer_text,
@@ -1141,7 +1108,303 @@ Respond with a JSON object containing ONE of the following structures:
1141
1108
  "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
1142
1109
  "error": None
1143
1110
  }
1111
+
1112
+ def generate_with_mcp_rag(
1113
+ self,
1114
+ prompt: str,
1115
+ rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
1116
+ system_prompt: str = None,
1117
+ objective_extraction_system_prompt="Extract objectives",
1118
+ images: Optional[List[str]] = None,
1119
+ tools: Optional[List[Dict[str, Any]]] = None,
1120
+ max_tool_calls: int = 10,
1121
+ max_llm_iterations: int = 15,
1122
+ tool_call_decision_temperature: float = 0.0,
1123
+ final_answer_temperature: float = None,
1124
+ streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
1125
+ build_plan: bool = True,
1126
+ rag_vectorizer_name: Optional[str] = None,
1127
+ rag_top_k: int = 5,
1128
+ rag_min_similarity_percent: float = 70.0,
1129
+ **llm_generation_kwargs
1130
+ ) -> Dict[str, Any]:
1131
+ """
1132
+ Generates a response using a stateful agent that can choose between calling standard
1133
+ MCP tools and querying a RAG database, all within a unified reasoning loop.
1134
+ """
1135
+ if not self.binding:
1136
+ return {"final_answer": "", "tool_calls": [], "error": "LLM binding not initialized."}
1137
+ if not self.mcp:
1138
+ return {"final_answer": "", "tool_calls": [], "error": "MCP binding not initialized."}
1139
+
1140
+ # --- Initialize Agent State ---
1141
+ turn_history: List[Dict[str, Any]] = []
1142
+ original_user_prompt = prompt
1143
+ knowledge_scratchpad = "No information gathered yet."
1144
+ current_objectives = ""
1145
+ agent_work_history = []
1146
+ tool_calls_made_this_turn = []
1147
+ llm_iterations = 0
1148
+
1149
+ # --- 1. Discover MCP Tools and Inject the RAG Tool ---
1150
+ if tools is None:
1151
+ try:
1152
+ mcp_tools = self.mcp.discover_tools(force_refresh=True)
1153
+ if not mcp_tools: ASCIIColors.warning("No MCP tools discovered.")
1154
+ except Exception as e_disc:
1155
+ return {"final_answer": "", "tool_calls": [], "error": f"Failed to discover MCP tools: {e_disc}"}
1156
+ else:
1157
+ mcp_tools = tools
1158
+
1159
+ # Define the RAG tool and add it to the list
1160
+ rag_tool_definition = {
1161
+ "name": "research::query_database",
1162
+ "description": (
1163
+ "Queries a vector database to find relevant text chunks based on a natural language query. "
1164
+ "Use this to gather information, answer questions, or find context for a task before using other tools."
1165
+ ),
1166
+ "input_schema": {
1167
+ "type": "object",
1168
+ "properties": {
1169
+ "query": {
1170
+ "type": "string",
1171
+ "description": "The natural language query to search for. Be specific to get the best results."
1172
+ }
1173
+ },
1174
+ "required": ["query"]
1175
+ }
1176
+ }
1177
+ available_tools = [rag_tool_definition] + mcp_tools
1178
+
1179
+ # --- 2. Optional Initial Objectives Extraction ---
1180
+ formatted_tools_list = "\n".join([
1181
+ f"- Full Tool Name: {t.get('name')}\n Description: {t.get('description')}\n Input Schema: {json.dumps(t.get('input_schema'))}"
1182
+ for t in available_tools
1183
+ ])
1184
+ if build_plan:
1185
+ if streaming_callback:
1186
+ streaming_callback("Extracting initial objectives...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "objectives_extraction"}, turn_history)
1187
+
1188
+ # The enhanced prompt is placed inside the original parenthesis format.
1189
+ # The f-strings for tool lists and user prompts are preserved.
1190
+
1191
+ obj_prompt = (
1192
+ "You are a hyper-efficient and logical project planner. Your sole purpose is to analyze the user's request and create a concise, numbered list of actionable steps to fulfill it.\n\n"
1193
+ "Your plan must be the most direct and minimal path to the user's goal.\n\n"
1194
+ "**Your Core Directives:**\n\n"
1195
+ "1. **Analyze the Request:** Break down the user's prompt into the essential, core tasks required.\n"
1196
+ "2. **Evaluate Tools with Extreme Scrutiny:** For each task, determine if a tool is **absolutely necessary**. Do not suggest a tool unless the task is impossible without it.\n"
1197
+ "3. **Prioritize Simplicity:** If the request can be answered directly without any tools (e.g., it's a simple question or requires a creative response), your entire plan should be a single step: \"1. Formulate a direct answer to the user's request.\"\n\n"
1198
+ "**CRITICAL RULES:**\n"
1199
+ "* **DO NOT** add any steps, objectives, or tool uses that were not explicitly required by the user.\n"
1200
+ "* **DO NOT** attempt to use a tool just because it is available. Most requests will not require any tools.\n"
1201
+ "* **DO NOT** add \"nice-to-have\" or \"extra\" tasks. Stick strictly to the request.\n\n"
1202
+ "Your final output must be a short, numbered list of steps. Do not call any tools in this planning phase.\n\n"
1203
+ "---\n"
1204
+ "**Available Tools:**\n"
1205
+ f"{formatted_tools_list}\n\n"
1206
+ "**User Request:**\n"
1207
+ f'"{original_user_prompt}"'
1208
+ )
1209
+ initial_objectives_gen = self.generate_text(prompt=obj_prompt, system_prompt=objective_extraction_system_prompt, temperature=0.0, stream=False)
1210
+ current_objectives = self.remove_thinking_blocks(initial_objectives_gen).strip()
1211
+
1212
+ if streaming_callback:
1213
+ streaming_callback(f"Initial Objectives:\n{current_objectives}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "objectives_extraction"}, turn_history)
1214
+ else:
1215
+ current_objectives = f"Fulfill the user's request: '{original_user_prompt}'"
1144
1216
 
1217
+ turn_history.append({"type": "initial_objectives", "content": current_objectives})
1218
+
1219
+
1220
+
1221
+ # --- 3. Main Agent Loop ---
1222
+ while llm_iterations < max_llm_iterations:
1223
+ llm_iterations += 1
1224
+ if streaming_callback:
1225
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1226
+
1227
+ # Format agent history for the prompt
1228
+ formatted_agent_history = "No actions taken yet."
1229
+ if agent_work_history:
1230
+ history_parts = []
1231
+ for i, entry in enumerate(agent_work_history):
1232
+ history_parts.append(
1233
+ f"### Step {i+1}:\n"
1234
+ f"**Thought:** {entry['thought']}\n"
1235
+ f"**Action:** Called tool `{entry['tool_name']}` with parameters `{json.dumps(entry['tool_params'])}`\n"
1236
+ f"**Observation:**\n```json\n{json.dumps(entry['tool_result'], indent=2)}\n```"
1237
+ )
1238
+ formatted_agent_history = "\n\n".join(history_parts)
1239
+
1240
+ # Construct the "Thinking & Planning" prompt
1241
+ decision_prompt_template = f"""You are a strategic AI assistant. Your goal is to achieve a set of objectives by intelligently using research and system tools.
1242
+
1243
+ --- AVAILABLE TOOLS ---
1244
+ {formatted_tools_list}
1245
+
1246
+ --- CURRENT STATE ---
1247
+ Original User Request: {original_user_prompt}
1248
+ Current Research Objectives:
1249
+ {current_objectives}
1250
+
1251
+ Knowledge Scratchpad (our current understanding):
1252
+ {knowledge_scratchpad}
1253
+
1254
+ --- AGENT WORK HISTORY (previous steps in this turn) ---
1255
+ {formatted_agent_history}
1256
+
1257
+ --- INSTRUCTIONS ---
1258
+ 1. **Analyze:** Review the entire work history, objectives, and scratchpad.
1259
+ 2. **Update State:** Based on the latest observations, update the scratchpad and refine the objectives. The scratchpad should be a comprehensive summary of ALL knowledge gathered.
1260
+ 3. **Decide Next Action:** Choose ONE of the following: `call_tool`, `final_answer`, or `clarify`. Always prefer to gather information with `research::query_database` before attempting to use other tools if you lack context.
1261
+
1262
+ --- OUTPUT FORMAT ---
1263
+ Respond with a single JSON object inside a JSON markdown tag. Use this exact schema:
1264
+ ```json
1265
+ {{
1266
+ "thought": "Your reasoning for the chosen action, analyzing how the work history informs your next step. Explain why you are choosing a specific tool (or to answer).",
1267
+ "updated_scratchpad": "The new, complete, and comprehensive summary of all knowledge gathered. Integrate new findings with old ones. if no new knowledge is gathered, this should be an empty string.",
1268
+ "updated_objectives": "The full, potentially revised, list of objectives. If no change, repeat the current list.",
1269
+ "action": "The chosen action: 'call_tool', 'final_answer', or 'clarify'.",
1270
+ "tool_name": "(string, if action is 'call_tool') The full 'alias::tool_name' of the tool to use.",
1271
+ "tool_params": {{"query": "...", "param2": "..."}},
1272
+ "clarification_request": "(string, if action is 'clarify') Your question to the user."
1273
+ }}
1274
+ ```
1275
+ """
1276
+ raw_llm_decision_json = self.generate_text(
1277
+ prompt=decision_prompt_template, n_predict=2048, temperature=tool_call_decision_temperature
1278
+ )
1279
+
1280
+ # --- 4. Parse LLM's plan and update state ---
1281
+ try:
1282
+ llm_decision = robust_json_parser(raw_llm_decision_json)
1283
+ turn_history.append({"type": "llm_plan", "content": llm_decision})
1284
+
1285
+ current_objectives = llm_decision.get("updated_objectives", current_objectives)
1286
+ new_scratchpad = llm_decision.get("updated_scratchpad")
1287
+
1288
+ if new_scratchpad and new_scratchpad != knowledge_scratchpad:
1289
+ knowledge_scratchpad = new_scratchpad
1290
+ if streaming_callback:
1291
+ streaming_callback(f"Knowledge scratchpad updated.", MSG_TYPE.MSG_TYPE_STEP, {"id": "scratchpad_update"}, turn_history)
1292
+ streaming_callback(f"New Scratchpad:\n{knowledge_scratchpad}", MSG_TYPE.MSG_TYPE_INFO, {"id":"scratch_pad_update"}, turn_history)
1293
+
1294
+ except (json.JSONDecodeError, AttributeError, KeyError) as e:
1295
+ ASCIIColors.error(f"Failed to parse LLM decision JSON: {raw_llm_decision_json}. Error: {e}")
1296
+ turn_history.append({"type": "error", "content": f"Failed to parse LLM plan: {raw_llm_decision_json}"})
1297
+ break
1298
+
1299
+ if streaming_callback:
1300
+ streaming_callback(f"LLM thought: {llm_decision.get('thought', 'N/A')}", MSG_TYPE.MSG_TYPE_INFO, {"id": "llm_thought"}, turn_history)
1301
+
1302
+ # --- 5. Execute the chosen action ---
1303
+ action = llm_decision.get("action")
1304
+ tool_result = None
1305
+
1306
+ if action == "call_tool":
1307
+ if len(tool_calls_made_this_turn) >= max_tool_calls:
1308
+ ASCIIColors.warning("Max tool calls reached. Forcing final answer.")
1309
+ break
1310
+
1311
+ tool_name = llm_decision.get("tool_name")
1312
+ tool_params = llm_decision.get("tool_params", {})
1313
+
1314
+ if not tool_name or not isinstance(tool_params, dict):
1315
+ ASCIIColors.error(f"Invalid tool call from LLM: name={tool_name}, params={tool_params}")
1316
+ break
1317
+
1318
+ if streaming_callback:
1319
+ streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
1320
+
1321
+ try:
1322
+ # ** DYNAMIC TOOL/RAG DISPATCH **
1323
+ if tool_name == "research::query_database":
1324
+ query = tool_params.get("query")
1325
+ if not query:
1326
+ tool_result = {"error": "RAG tool called without a 'query' parameter."}
1327
+ else:
1328
+ retrieved_chunks = rag_query_function(query, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
1329
+ if not retrieved_chunks:
1330
+ tool_result = {"summary": "No relevant documents found for the query.", "chunks": []}
1331
+ else:
1332
+ tool_result = {
1333
+ "summary": f"Found {len(retrieved_chunks)} relevant document chunks.",
1334
+ "chunks": retrieved_chunks
1335
+ }
1336
+ else:
1337
+ # Standard MCP tool execution
1338
+ tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
1339
+
1340
+ except Exception as e_exec:
1341
+ trace_exception(e_exec)
1342
+ tool_result = {"error": f"An exception occurred while executing tool '{tool_name}': {e_exec}"}
1343
+
1344
+ # Record the work cycle in the agent's history
1345
+ work_entry = {
1346
+ "thought": llm_decision.get("thought", "N/A"),
1347
+ "tool_name": tool_name,
1348
+ "tool_params": tool_params,
1349
+ "tool_result": tool_result
1350
+ }
1351
+ agent_work_history.append(work_entry)
1352
+ tool_calls_made_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
1353
+
1354
+ if streaming_callback:
1355
+ streaming_callback(f"Tool {tool_name} finished.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"tool_exec_{llm_iterations}"}, turn_history)
1356
+ streaming_callback(json.dumps(tool_result, indent=2), MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
1357
+
1358
+ elif action == "clarify":
1359
+ clarification_request = llm_decision.get("clarification_request", "I need more information. Could you please clarify?")
1360
+ return {"final_answer": clarification_request, "tool_calls": tool_calls_made_this_turn, "error": None, "clarification": True}
1361
+
1362
+ elif action == "final_answer":
1363
+ ASCIIColors.info("LLM decided to formulate a final answer.")
1364
+ break
1365
+
1366
+ else:
1367
+ ASCIIColors.warning(f"LLM returned unknown or missing action: '{action}'. Forcing final answer.")
1368
+ break
1369
+ if streaming_callback:
1370
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1371
+
1372
+ if streaming_callback:
1373
+ streaming_callback(f"LLM reasoning step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"planning_step_{llm_iterations}"}, turn_history)
1374
+ # --- 6. Generate Final Answer ---
1375
+ if streaming_callback:
1376
+ streaming_callback("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "final_answer_synthesis"}, turn_history)
1377
+
1378
+ final_answer_prompt = f"""You are an AI assistant providing a final, comprehensive answer based on research and tool use.
1379
+
1380
+ --- CONTEXT ---
1381
+ Original User Request: "{original_user_prompt}"
1382
+
1383
+ --- SUMMARY OF FINDINGS (Knowledge Scratchpad) ---
1384
+ {knowledge_scratchpad}
1385
+
1386
+ --- INSTRUCTIONS ---
1387
+ - Synthesize a clear, complete answer for the user based ONLY on the information in the 'Summary of Findings'.
1388
+ - Address the user directly and answer their original request.
1389
+ - Do not make up information. If the findings are insufficient, state what you found and what remains unanswered.
1390
+ """
1391
+ final_answer_text = self.generate_text(
1392
+ prompt=final_answer_prompt,
1393
+ system_prompt=system_prompt,
1394
+ images=images,
1395
+ stream=streaming_callback is not None,
1396
+ streaming_callback=streaming_callback,
1397
+ temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature,
1398
+ **(llm_generation_kwargs or {})
1399
+ )
1400
+
1401
+ if streaming_callback:
1402
+ streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history)
1403
+
1404
+ final_answer = self.remove_thinking_blocks(final_answer_text)
1405
+ turn_history.append({"type":"final_answer_generated", "content": final_answer})
1406
+
1407
+ return {"final_answer": final_answer, "tool_calls": tool_calls_made_this_turn, "error": None}
1145
1408
 
1146
1409
  def generate_code(
1147
1410
  self,