PyPI - mbxai - Versions diffs - 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

mbxai 2.2.0py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

mbxai/__init__.py +23 -2
mbxai/agent/__init__.py +13 -2
mbxai/agent/client.py +840 -629
mbxai/agent/client_legacy.py +804 -0
mbxai/agent/models.py +264 -31
mbxai/examples/enhanced_agent_example.py +344 -0
mbxai/examples/redis_session_handler_example.py +248 -0
mbxai/mcp/server.py +1 -1
mbxai-2.3.0.dist-info/METADATA +1191 -0
{mbxai-2.2.0.dist-info → mbxai-2.3.0.dist-info}/RECORD +12 -9
mbxai-2.2.0.dist-info/METADATA +0 -492
{mbxai-2.2.0.dist-info → mbxai-2.3.0.dist-info}/WHEEL +0 -0
{mbxai-2.2.0.dist-info → mbxai-2.3.0.dist-info}/licenses/LICENSE +0 -0

mbxai/agent/client.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-Agent client implementation for MBX AI.
+Enhanced Agent client implementation for MBX AI with human-in-the-loop capabilities.
 """
-from typing import Any, Union, Type, Callable
+from typing import Any, Union, Type, Callable, Optional
 import logging
 import json
 from pydantic import BaseModel
@@ -10,49 +10,241 @@ from pydantic import BaseModel
 from ..openrouter import OpenRouterClient
 from ..tools import ToolClient
 from ..mcp import MCPClient
-from .models import AgentResponse, Question, QuestionList, AnswerList, Result, QualityCheck, TokenUsage, TokenSummary
+from .models import (
+    AgentResponse, AgentState, RequirementAnalysis, ToolAnalysis, TodoList, Task, TaskStatus,
+    HumanInLoopRequest, HumanInLoopResponse, HumanInLoopResponseBatch, HumanInteractionType, DialogOption,
+    GoalEvaluation, TokenUsage, TokenSummary, Result, SessionHandler, InMemorySessionHandler
+)
 logger = logging.getLogger(__name__)
-class AgentClient:
-    """
-    Agent client that wraps other AI clients with a dialog-based thinking process.
+class TaskManager:
+    """Manages task generation and execution for the agent."""
+    def __init__(self, ai_client):
+        self.ai_client = ai_client
+    def generate_todo_list(
+        self,
+        requirement_analysis: RequirementAnalysis,
+        tool_analysis: ToolAnalysis,
+        conversation_history: list[dict[str, Any]] = None
+    ) -> tuple[TodoList, TokenUsage]:
+        """Generate a todo list based on requirement and tool analysis."""
+        if conversation_history is None:
+            conversation_history = []
+        # Format available tools
+        tools_text = ""
+        if tool_analysis.relevant_tools:
+            tools_text = "\n\nAvailable Tools:\n"
+            for tool_name in tool_analysis.relevant_tools:
+                purpose = tool_analysis.tool_mapping.get(tool_name, "No description available")
+                tools_text += f"- {tool_name}: {purpose}\n"
+        # Format missing capabilities
+        missing_text = ""
+        if tool_analysis.missing_capabilities:
+            missing_text = "\n\nMissing Capabilities (to be handled manually):\n"
+            for capability in tool_analysis.missing_capabilities:
+                missing_text += f"- {capability}\n"
+        prompt = f"""
+Based on this requirement analysis:
+Goal: {requirement_analysis.goal}
+Sub-goals: {', '.join(requirement_analysis.sub_goals)}
+Success Criteria: {', '.join(requirement_analysis.success_criteria)}
+Constraints: {', '.join(requirement_analysis.constraints)}
+Complexity: {requirement_analysis.complexity_estimate}/10
+{tools_text}{missing_text}
+Create a detailed todo list with specific, actionable tasks to achieve the goal.
+Each task should be concrete and measurable. Consider dependencies between tasks.
+Assign the appropriate tools to tasks that need them.
+Estimate complexity for each task (1-5 scale).
+Provide an estimated total time to complete all tasks.
+Break down complex goals into smaller, manageable tasks that can be executed step by step.
+"""
+        messages = [{"role": "user", "content": prompt}]
+        try:
+            response = self.ai_client.parse(conversation_history + messages, TodoList)
+            todo_list = self._extract_parsed_content(response, TodoList)
+            token_usage = self._extract_token_usage(response)
+            # Validate and fix todo list
+            self._validate_todo_list(todo_list)
+            logger.info(f"Generated todo list with {len(todo_list.tasks)} tasks")
+            return todo_list, token_usage
+        except Exception as e:
+            logger.error(f"Failed to generate todo list: {e}")
+            # Return a basic todo list
+            basic_task = Task(
+                title="Complete the requirement",
+                description=requirement_analysis.goal,
+                estimated_complexity=requirement_analysis.complexity_estimate
+            )
+            return TodoList(tasks=[basic_task], estimated_total_time="Unknown"), TokenUsage()
+    def _validate_todo_list(self, todo_list: TodoList):
+        """Validate and fix the todo list."""
+        # Ensure all tasks have valid IDs
+        task_ids = set()
+        for task in todo_list.tasks:
+            if not task.id or task.id in task_ids:
+                task.id = str(__import__("uuid").uuid4())
+            task_ids.add(task.id)
+        # Validate dependencies exist
+        for task in todo_list.tasks:
+            valid_deps = [dep for dep in task.dependencies if dep in task_ids]
+            task.dependencies = valid_deps
+    def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
+        """Extract the parsed content from the AI response."""
+        if hasattr(response, 'choices') and len(response.choices) > 0:
+            choice = response.choices[0]
+            if hasattr(choice.message, 'parsed') and choice.message.parsed:
+                return choice.message.parsed
+            elif hasattr(choice.message, 'content'):
+                try:
+                    content_dict = json.loads(choice.message.content)
+                    return response_format(**content_dict)
+                except (json.JSONDecodeError, TypeError):
+                    if response_format == TodoList:
+                        return TodoList(tasks=[])
+                    else:
+                        return response_format()
+        # Fallback
+        if response_format == TodoList:
+            return TodoList(tasks=[])
+        else:
+            return response_format()
+    def _extract_token_usage(self, response: Any) -> TokenUsage:
+        """Extract token usage information from an AI response."""
+        try:
+            if hasattr(response, 'usage') and response.usage:
+                usage = response.usage
+                return TokenUsage(
+                    prompt_tokens=getattr(usage, 'prompt_tokens', 0),
+                    completion_tokens=getattr(usage, 'completion_tokens', 0),
+                    total_tokens=getattr(usage, 'total_tokens', 0)
+                )
+        except (AttributeError, TypeError) as e:
+            logger.debug(f"Could not extract token usage: {e}")
+        return TokenUsage()
+class DialogHandler:
+    """Handles human-in-the-loop interactions."""
+    def __init__(self, ai_client):
+        self.ai_client = ai_client
-    The agent follows a multi-step process:
-    1. Analyze the prompt and generate clarifying questions (if ask_questions=True)
-    2. Wait for user answers or auto-answer questions
-    3. Process the prompt with available information
-    4. Quality check the result and iterate if needed
-    5. Generate final response in the requested format
+    def create_human_interaction_request(
+        self,
+        interaction_type: HumanInteractionType,
+        context: str,
+        task: Optional[Task] = None,
+        available_dialog_options: list[DialogOption] = None
+    ) -> HumanInLoopRequest:
+        """Create a human interaction request based on the context."""
+        if available_dialog_options is None:
+            available_dialog_options = []
+        if interaction_type == HumanInteractionType.DECISION:
+            return self._create_decision_request(context, task)
+        elif interaction_type == HumanInteractionType.QUESTION:
+            return self._create_question_request(context, task)
+        elif interaction_type == HumanInteractionType.DIALOG_OPTION:
+            return self._create_dialog_option_request(context, task, available_dialog_options)
+        else:
+            # Default question request
+            return HumanInLoopRequest(
+                interaction_type=HumanInteractionType.QUESTION,
+                prompt=f"I need your input for: {context}",
+                context=context
+            )
-    Requirements:
-    - The wrapped AI client MUST have a 'parse' method for structured responses
-    - All AI interactions use structured Pydantic models for reliable parsing
-    - Supports OpenRouterClient, ToolClient, and MCPClient (all have parse methods)
+    def _create_decision_request(self, context: str, task: Optional[Task]) -> HumanInLoopRequest:
+        """Create a decision request."""
+        task_info = f" for task '{task.title}'" if task else ""
+        return HumanInLoopRequest(
+            interaction_type=HumanInteractionType.DECISION,
+            prompt=f"I need you to make a decision{task_info}: {context}",
+            options=["proceed", "skip", "modify", "abort"],
+            context=context
+        )
-    Tool Registration:
-    - Provides proxy methods for tool registration when supported by the underlying client
-    - register_tool(): Available with ToolClient and MCPClient
-    - register_mcp_server(): Available with MCPClient only
-    - Throws AttributeError for unsupported clients (e.g., OpenRouterClient)
+    def _create_question_request(self, context: str, task: Optional[Task]) -> HumanInLoopRequest:
+        """Create a question request."""
+        task_info = f" while working on '{task.title}'" if task else ""
+        return HumanInLoopRequest(
+            interaction_type=HumanInteractionType.QUESTION,
+            prompt=f"I have a question{task_info}: {context}",
+            context=context
+        )
-    Configuration:
-    - max_iterations: Controls how many times the agent will iterate to improve results (default: 2)
-    - Set to 0 to disable quality improvement iterations
+    def _create_dialog_option_request(
+        self,
+        context: str,
+        task: Optional[Task],
+        available_dialog_options: list[DialogOption]
+    ) -> HumanInLoopRequest:
+        """Create a dialog option request."""
+        task_info = f" for task '{task.title}'" if task else ""
+        return HumanInLoopRequest(
+            interaction_type=HumanInteractionType.DIALOG_OPTION,
+            prompt=f"Please select an action{task_info}: {context}",
+            dialog_options=available_dialog_options,
+            context=context
+        )
+class AgentClient:
+    """
+    Enhanced Agent client that follows a structured 6-step process:
+    1. Understand the requirement - What is the expected goal
+    2. Which tools do I have to help me reaching that goal
+    3. Think about a todo list - What is required to reach the goal
+    4. Work step by step on the todo list
+    5. When human in the loop is active, have a dialog to the user
+    6. Last step - Is the goal reached? If no, create a new todo-list, If yes return the answer
+    The agent supports:
+    - A prompt/requirement/task
+    - A list of tools
+    - A list of dialog-options
+    - Human in the loop - yes or no
+    - Possible question types for human in the loop (decision, question, dialog_option)
+    - Pydantic model for last response
     """
     def __init__(
-        self,
+        self,
         ai_client: Union[OpenRouterClient, ToolClient, MCPClient],
-        max_iterations: int = 2
+        human_in_loop: bool = False,
+        dialog_options: list[DialogOption] = None,
+        max_task_iterations: int = 10,
+        session_handler: SessionHandler = None
     ) -> None:
         """
         Initialize the AgentClient.
         Args:
             ai_client: The underlying AI client (OpenRouterClient, ToolClient, or MCPClient)
-            max_iterations: Maximum number of quality improvement iterations (default: 2)
+            human_in_loop: Whether to enable human-in-the-loop interactions
+            dialog_options: Available dialog options for human interactions
+            max_task_iterations: Maximum number of task execution iterations
+            session_handler: Custom session storage handler (defaults to InMemorySessionHandler)
         Raises:
             ValueError: If the client doesn't support structured responses (no parse method)
@@ -63,12 +255,20 @@ class AgentClient:
                 f"The provided client {type(ai_client).__name__} does not have a parse method."
             )
-        if max_iterations < 0:
-            raise ValueError("max_iterations must be non-negative")
+        if max_task_iterations < 1:
+            raise ValueError("max_task_iterations must be positive")
         self._ai_client = ai_client
-        self._max_iterations = max_iterations
-        self._agent_sessions: dict[str, dict[str, Any]] = {}
+        self._human_in_loop = human_in_loop
+        self._dialog_options = dialog_options or []
+        self._max_task_iterations = max_task_iterations
+        # Initialize session handler (default to in-memory if none provided)
+        self._session_handler = session_handler or InMemorySessionHandler()
+        # Create helper components
+        self._task_manager = TaskManager(ai_client)
+        self._dialog_handler = DialogHandler(ai_client)
     def register_tool(
         self,
@@ -80,17 +280,14 @@ class AgentClient:
         """
         Register a new tool with the underlying AI client.
-        This method proxies to the register_tool method of ToolClient or MCPClient.
         Args:
             name: The name of the tool
             description: A description of what the tool does
             function: The function to call when the tool is used
-            schema: The JSON schema for the tool's parameters. If None or empty,
-                   will be automatically generated from the function signature.
+            schema: The JSON schema for the tool's parameters
         Raises:
-            AttributeError: If the underlying client doesn't support tool registration (e.g., OpenRouterClient)
+            AttributeError: If the underlying client doesn't support tool registration
         """
         if hasattr(self._ai_client, 'register_tool'):
             self._ai_client.register_tool(name, description, function, schema)
@@ -105,700 +302,714 @@ class AgentClient:
         """
         Register an MCP server and load its tools.
-        This method proxies to the register_mcp_server method of MCPClient.
         Args:
             name: The name of the MCP server
             base_url: The base URL of the MCP server
         Raises:
-            AttributeError: If the underlying client doesn't support MCP server registration (e.g., OpenRouterClient, ToolClient)
+            AttributeError: If the underlying client doesn't support MCP server registration
         """
         if hasattr(self._ai_client, 'register_mcp_server'):
             self._ai_client.register_mcp_server(name, base_url)
-            logger.debug(f"Registered MCP server '{name}' at {base_url} with {type(self._ai_client).__name__}")
+            logger.debug(f"Registered MCP server '{name}' at {base_url}")
         else:
             raise AttributeError(
                 f"MCP server registration is not supported by {type(self._ai_client).__name__}. "
                 f"Use MCPClient to register MCP servers."
             )
-    def _call_ai_parse(self, messages: list[dict[str, Any]], response_format: Type[BaseModel], conversation_history: list[dict[str, Any]] = None) -> Any:
-        """Call the parse method on the AI client with optional conversation history."""
-        # Combine conversation history with new messages
-        if conversation_history:
-            full_messages = conversation_history + messages
-            logger.debug(f"🔗 AI call with {len(conversation_history)} history messages + {len(messages)} new messages = {len(full_messages)} total")
-        else:
-            full_messages = messages
-            logger.debug(f"🔗 AI call with {len(messages)} messages (no history)")
-        return self._ai_client.parse(full_messages, response_format)
-    def _validate_answers(self, answers: Any) -> bool:
-        """
-        Validate that answers parameter is a proper AnswerList with content.
-        Args:
-            answers: The answers parameter to validate
-        Returns:
-            True if answers is valid and has content, False otherwise
-        """
-        # Check if answers is the correct type
-        if not isinstance(answers, AnswerList):
-            logger.warning(f"Invalid answers type: {type(answers)}. Expected AnswerList, treating as no answers.")
-            return False
-        # Check if answers has content
-        if not hasattr(answers, 'answers') or not answers.answers:
-            logger.info(f"Empty answers list provided, proceeding without answers processing.")
-            return False
-        # Check if answers list contains valid Answer objects
-        for answer in answers.answers:
-            if not hasattr(answer, 'key') or not hasattr(answer, 'answer'):
-                logger.warning(f"Invalid answer object in list: {answer}. Treating as no answers.")
-                return False
-        logger.debug(f"Validated {len(answers.answers)} answers")
-        return True
-    def _extract_token_usage(self, response: Any) -> TokenUsage:
-        """Extract token usage information from an AI response."""
-        try:
-            if hasattr(response, 'usage') and response.usage:
-                usage = response.usage
-                return TokenUsage(
-                    prompt_tokens=getattr(usage, 'prompt_tokens', 0),
-                    completion_tokens=getattr(usage, 'completion_tokens', 0),
-                    total_tokens=getattr(usage, 'total_tokens', 0)
-                )
-        except (AttributeError, TypeError) as e:
-            logger.debug(f"Could not extract token usage: {e}")
-        return TokenUsage()  # Return empty usage if extraction fails
-    def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
-        """Extract the parsed content from the AI response."""
-        if hasattr(response, 'choices') and len(response.choices) > 0:
-            choice = response.choices[0]
-            if hasattr(choice.message, 'parsed') and choice.message.parsed:
-                return choice.message.parsed
-            elif hasattr(choice.message, 'content'):
-                # Try to parse the content as JSON
-                try:
-                    content_dict = json.loads(choice.message.content)
-                    return response_format(**content_dict)
-                except (json.JSONDecodeError, TypeError):
-                    # If parsing fails, create a default response
-                    if response_format == QuestionList:
-                        return QuestionList(questions=[])
-                    elif response_format == Result:
-                        return Result(result=choice.message.content)
-                    elif response_format == QualityCheck:
-                        return QualityCheck(is_good=True, feedback="")
-                    else:
-                        # For other formats, try to create with content
-                        return response_format(result=choice.message.content)
-        # Fallback - create empty/default response
-        if response_format == QuestionList:
-            return QuestionList(questions=[])
-        elif response_format == Result:
-            return Result(result="No response generated")
-        elif response_format == QualityCheck:
-            return QualityCheck(is_good=True, feedback="")
-        else:
-            return response_format()
     def agent(
         self,
-        prompt: str = None,
-        final_response_structure: Type[BaseModel] = None,
-        ask_questions: bool = True,
+        prompt: str,
+        final_response_structure: Type[BaseModel],
+        tools: list[str] = None,
+        dialog_options: list[DialogOption] = None,
+        human_in_loop: bool = None,
         agent_id: str = None,
-        answers: AnswerList | None = None
+        human_response: Union[HumanInLoopResponse, HumanInLoopResponseBatch, list[HumanInLoopResponse]] = None
     ) -> AgentResponse:
         """
-        Process a prompt through the agent's thinking process.
+        Process a prompt through the enhanced 6-step agent process.
         Args:
-            prompt: The prompt from the user (optional if agent_id exists with history)
-            final_response_structure: Pydantic model defining the expected final response format (required for new sessions)
-            ask_questions: Whether to ask clarifying questions (default: True)
+            prompt: The user's prompt/requirement/task
+            final_response_structure: Pydantic model defining the expected final response format
+            tools: List of tool names to use (if None, uses all available tools)
+            dialog_options: Available dialog options for this session
+            human_in_loop: Whether to enable human-in-the-loop (overrides default)
             agent_id: Optional agent session ID to continue an existing conversation
-            answers: Optional answers to questions (when continuing a conversation with questions)
+            human_response: Response(s) from human for continuing interaction - can be single response, batch, or list
         Returns:
-            AgentResponse containing either questions to ask or the final response
-        Raises:
-            ValueError: If neither prompt nor agent_id with history is provided, or if final_response_structure is missing for new sessions
+            AgentResponse containing the current state and any required interactions
         """
-        # Validate inputs and determine session type
-        is_existing_session = agent_id is not None and agent_id in self._agent_sessions
-        existing_session = self._agent_sessions.get(agent_id, {}) if agent_id else {}
-        conversation_history = existing_session.get("conversation_history", []).copy()
-        # Validation logic
+        # Setup session
+        is_existing_session = agent_id is not None and self._session_handler.session_exists(agent_id)
         if not is_existing_session:
-            # New session - both prompt and final_response_structure are required
-            if not prompt:
-                raise ValueError("Prompt is required when starting a new agent session")
-            if not final_response_structure:
-                raise ValueError("final_response_structure is required when starting a new agent session")
-            # Create new agent_id if not provided
             if agent_id is None:
                 agent_id = str(__import__("uuid").uuid4())
-            logger.info(f"🚀 Starting new agent process (ID: {agent_id}) with prompt: {prompt[:100]}...")
+            logger.info(f"🚀 Starting new agent process (ID: {agent_id})")
         else:
-            # Existing session - use previous final_response_structure if not provided
-            if not final_response_structure:
-                final_response_structure = existing_session.get("final_response_structure")
-                if not final_response_structure:
-                    raise ValueError("final_response_structure not found in existing session and not provided")
-            # Handle optional prompt for existing sessions
-            if not prompt:
-                # Use conversation history to continue without explicit prompt
-                prompt = "[Continue conversation based on history]"
-                logger.info(f"🔄 Continuing agent process (ID: {agent_id}) without explicit prompt (using history)")
-            else:
-                logger.info(f"🔄 Continuing agent process (ID: {agent_id}) with prompt: {prompt[:100]}...")
-        # Initialize token summary
-        token_summary = TokenSummary()
-        if conversation_history:
-            logger.info(f"📜 Agent {agent_id}: Loaded conversation history with {len(conversation_history)} messages")
-        # Store conversation history for AI calls (don't include current prompt yet)
-        history_for_ai = conversation_history.copy()
+            logger.info(f"🔄 Continuing agent process (ID: {agent_id})")
+        # Initialize or get session data
+        session = self._session_handler.get_session(agent_id) or {
+            "original_prompt": prompt,
+            "final_response_structure": final_response_structure,
+            "human_in_loop": human_in_loop if human_in_loop is not None else self._human_in_loop,
+            "dialog_options": dialog_options or self._dialog_options,
+            "conversation_history": [],
+            "token_summary": TokenSummary(),
+            "state": AgentState.ANALYZING_REQUIREMENT,
+            "requirement_analysis": None,
+            "tool_analysis": None,
+            "todo_list": None,
+            "current_task_index": 0,
+            "iteration_count": 0
+        })
+        # Handle human response(s) if provided
+        if human_response:
+            session = self._handle_human_responses(session, human_response)
+        # Store session
+        self._session_handler.set_session(agent_id, session)
+        # Process based on current state
+        return self._process_agent_state(agent_id, session)
+    def _process_agent_state(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Process the agent based on its current state."""
+        state = session["state"]
+        token_summary = session["token_summary"]
-        # Add current prompt to full conversation history for session storage
-        conversation_history.append({"role": "user", "content": prompt})
-        # Handle answers provided (skip question generation and process directly)
-        if answers is not None:
-            if self._validate_answers(answers):
-                logger.info(f"📝 Agent {agent_id}: Processing with provided answers, skipping question generation")
-                return self._process_answers_directly(agent_id, prompt, final_response_structure, answers, token_summary, history_for_ai)
+        try:
+            if state == AgentState.ANALYZING_REQUIREMENT:
+                return self._step1_analyze_requirement(agent_id, session)
+            elif state == AgentState.ANALYZING_TOOLS:
+                return self._step2_analyze_tools(agent_id, session)
+            elif state == AgentState.GENERATING_TODO:
+                return self._step3_generate_todo(agent_id, session)
+            elif state == AgentState.EXECUTING_TASKS:
+                return self._step4_execute_tasks(agent_id, session)
+            elif state == AgentState.WAITING_FOR_HUMAN:
+                return self._step5_handle_human_interaction(agent_id, session)
+            elif state == AgentState.EVALUATING_GOAL:
+                return self._step6_evaluate_goal(agent_id, session)
+            elif state == AgentState.COMPLETED:
+                return AgentResponse(
+                    agent_id=agent_id,
+                    state=AgentState.COMPLETED,
+                    final_response=session.get("final_response"),
+                    token_summary=token_summary
+                )
             else:
-                logger.info(f"📝 Agent {agent_id}: Invalid or empty answers provided, proceeding with normal flow")
+                # Unknown state, reset to beginning
+                session["state"] = AgentState.ANALYZING_REQUIREMENT
+                return self._step1_analyze_requirement(agent_id, session)
+        except Exception as e:
+            logger.error(f"Error in agent state {state}: {e}")
+            session["state"] = AgentState.FAILED
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.FAILED,
+                final_response=f"Agent failed with error: {str(e)}",
+                token_summary=token_summary
+            )
+    def _step1_analyze_requirement(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 1: Understand the requirement - What is the expected goal."""
+        logger.info(f"📋 Agent {agent_id}: Step 1 - Analyzing requirement")
+        prompt = session["original_prompt"]
+        conversation_history = session["conversation_history"]
-        # Step 1: Generate questions (if ask_questions is True)
-        if ask_questions:
-            logger.info(f"❓ Agent {agent_id}: Analyzing prompt and generating clarifying questions")
-            questions_prompt = f"""
-Understand this prompt and what the user wants to achieve by it:
+        analysis_prompt = f"""
+Analyze this user requirement and understand what they want to achieve:
 ==========
 {prompt}
 ==========
-Think about useful steps and which information are required for it. First ask for required information and details to improve that process, when that is useful for the given case. When it's not useful, return an empty list of questions.
-Use available tools to gather information or perform actions that would improve your response.
-Analyze the prompt carefully and determine if additional information would significantly improve the quality of the response. Only ask questions that are truly necessary and would materially impact the outcome.
+Break down the requirement into:
+1. The main goal the user wants to achieve
+2. Sub-goals that contribute to the main goal
+3. Success criteria to determine if the goal is achieved
+4. Any constraints or limitations to consider
+5. Complexity estimate (1-10 scale, where 1 is trivial and 10 is extremely complex)
-IMPORTANT: For each question, provide a technical key identifier that:
-- Uses only alphanumeric characters and underscores
-- Starts with a letter
-- Is descriptive but concise (e.g., "user_name", "email_address", "preferred_genre", "budget_range")
-- Contains no spaces, hyphens, or special characters like ?, !, @, etc.
+Provide a comprehensive analysis of what the user wants to accomplish.
 """
+        messages = [{"role": "user", "content": analysis_prompt}]
+        try:
+            response = self._ai_client.parse(conversation_history + messages, RequirementAnalysis)
+            requirement_analysis = self._extract_parsed_content(response, RequirementAnalysis)
+            token_usage = self._extract_token_usage(response)
-            messages = [{"role": "user", "content": questions_prompt}]
+            # Update session
+            session["requirement_analysis"] = requirement_analysis
+            session["token_summary"].requirement_analysis = token_usage
+            session["state"] = AgentState.ANALYZING_TOOLS
-            try:
-                response = self._call_ai_parse(messages, QuestionList, history_for_ai)
-                question_list = self._extract_parsed_content(response, QuestionList)
-                # Extract token usage for question generation
-                token_summary.question_generation = self._extract_token_usage(response)
-                logger.info(f"❓ Agent {agent_id}: Generated {len(question_list.questions)} questions (tokens: {token_summary.question_generation.total_tokens})")
-                # If we have questions, return them to the user
-                if question_list.questions:
-                    agent_response = AgentResponse(agent_id=agent_id, questions=question_list.questions, token_summary=token_summary)
-                    # Store the session for continuation
-                    self._agent_sessions[agent_response.agent_id] = {
-                        "original_prompt": prompt,
-                        "final_response_structure": final_response_structure,
-                        "questions": question_list.questions,
-                        "step": "waiting_for_answers",
-                        "token_summary": token_summary,
-                        "conversation_history": history_for_ai  # Include history without current prompt
-                    }
-                    logger.info(f"📋 Agent {agent_id}: Waiting for user answers to {len(question_list.questions)} questions")
-                    return agent_response
-            except Exception as e:
-                logger.warning(f"Failed to generate questions: {e}. Proceeding without questions.")
-        # Step 2 & 3: No questions or ask_questions=False - proceed directly
-        logger.info(f"⚡ Agent {agent_id}: No questions needed, proceeding directly to processing")
-        return self._process_with_answers(prompt, final_response_structure, [], agent_id, token_summary, history_for_ai)
-    def _process_answers_directly(
-        self,
-        agent_id: str,
-        prompt: str,
-        final_response_structure: Type[BaseModel],
-        answers: AnswerList,
-        token_summary: TokenSummary,
-        conversation_history: list[dict[str, Any]]
-    ) -> AgentResponse:
-        """
-        Process answers directly without going through question generation.
-        Args:
-            agent_id: The agent session identifier
-            prompt: The current prompt
-            final_response_structure: Expected response structure
-            answers: Provided answers
-            token_summary: Current token usage summary
-            conversation_history: Conversation history
+            logger.info(f"📋 Agent {agent_id}: Requirement analysis completed - Goal: {requirement_analysis.goal}")
-        Returns:
-            AgentResponse with the final result
-        """
-        # Check if we have a session with questions to match against
-        session = self._agent_sessions.get(agent_id, {})
-        questions = session.get("questions", [])
-        if not questions:
-            # No previous questions - treat as simple additional context
-            logger.info(f"📝 Agent {agent_id}: No previous questions found, treating answers as additional context")
-            answer_dict = {answer.key: answer.answer for answer in answers.answers}
-            qa_pairs = []
-            for answer in answers.answers:
-                qa_pairs.append({
-                    "question": f"Information about {answer.key}",
-                    "key": answer.key,
-                    "answer": answer.answer,
-                    "required": True
-                })
+            # Continue to next step
+            return self._step2_analyze_tools(agent_id, session)
+        except Exception as e:
+            logger.error(f"Failed to analyze requirement: {e}")
+            session["state"] = AgentState.FAILED
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.FAILED,
+                final_response=f"Failed to analyze requirement: {str(e)}",
+                token_summary=session["token_summary"]
+            )
+    def _step2_analyze_tools(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 2: Which tools do I have to help me reach that goal."""
+        logger.info(f"🔧 Agent {agent_id}: Step 2 - Analyzing available tools")
+        requirement_analysis = session["requirement_analysis"]
+        conversation_history = session["conversation_history"]
+        # Get available tools
+        available_tools = self._get_available_tools()
+        tools_text = "Available Tools:\n"
+        if available_tools:
+            for tool_name, tool_desc in available_tools.items():
+                tools_text += f"- {tool_name}: {tool_desc}\n"
         else:
-            # Match answers with previous questions
-            logger.info(f"📝 Agent {agent_id}: Matching {len(answers.answers)} answers with previous questions")
-            answer_dict = {answer.key: answer.answer for answer in answers.answers}
-            # Create question-answer pairs for better context
-            qa_pairs = []
-            for question in questions:
-                answer_text = answer_dict.get(question.key, "No answer provided")
-                qa_pairs.append({
-                    "question": question.question,
-                    "key": question.key,
-                    "answer": answer_text,
-                    "required": question.required
-                })
-        # Process with the provided answers and question context
-        result = self._process_with_answers(
-            prompt,
-            final_response_structure,
-            qa_pairs,
-            agent_id,
-            token_summary,
-            conversation_history
-        )
+            tools_text += "No tools are currently available.\n"
-        # Note: History management is now handled in _process_with_answers
-        # No need to duplicate history management here
-        return result
+        analysis_prompt = f"""
+Given this goal analysis:
+Goal: {requirement_analysis.goal}
+Sub-goals: {', '.join(requirement_analysis.sub_goals)}
+Success Criteria: {', '.join(requirement_analysis.success_criteria)}
-    def _format_qa_context_for_quality_check(self, answers: Union[list, dict[str, str]]) -> str:
-        """
-        Format question-answer context for quality check and improvement prompts.
-        Args:
-            answers: Question-answer pairs or simple answers
-        Returns:
-            Formatted context text
-        """
-        if not answers:
-            return ""
-        if isinstance(answers, list) and answers:
-            # Check if it's a list of question-answer pairs (enhanced format)
-            if isinstance(answers[0], dict) and "question" in answers[0]:
-                context_text = "\nContext Information (Questions & Answers):\n"
-                context_text += "The response was generated with the following additional context:\n\n"
-                for i, qa_pair in enumerate(answers, 1):
-                    question = qa_pair.get("question", "Unknown question")
-                    answer = qa_pair.get("answer", "No answer provided")
-                    required = qa_pair.get("required", True)
-                    status_marker = "🔴 REQUIRED" if required else "🟡 OPTIONAL"
-                    context_text += f"{i}. {status_marker} Q: {question}\n"
-                    context_text += f"   A: {answer}\n\n"
-                return context_text
-            else:
-                # Legacy format - simple list
-                return f"\nAdditional context: {', '.join(str(a) for a in answers)}\n\n"
-        elif isinstance(answers, dict) and answers:
-            # Legacy format - simple dict
-            context_text = "\nAdditional context provided:\n"
-            for key, answer in answers.items():
-                context_text += f"- {key}: {answer}\n"
-            return context_text + "\n"
-        return ""
-    def _process_with_answers(
-        self,
-        prompt: str,
-        final_response_structure: Type[BaseModel],
-        answers: Union[list, dict[str, str]],
-        agent_id: str,
-        token_summary: TokenSummary,
-        conversation_history: list[dict[str, Any]] = None
-    ) -> AgentResponse:
-        """
-        Process the prompt with answers through the thinking pipeline.
+And these available tools:
+{tools_text}
-        Args:
-            prompt: The original prompt
-            final_response_structure: Expected final response structure
-            answers: Question-answer pairs or simple answers (empty if no questions were asked)
-            agent_id: The agent session identifier
-            token_summary: Current token usage summary
-            conversation_history: Optional conversation history for dialog context
+Analyze which tools are relevant for achieving this goal:
+1. List the relevant tools and explain how each helps achieve the goal
+2. Map each tool to its specific purpose for this goal
+3. Identify any missing capabilities that aren't covered by available tools
-        Returns:
-            AgentResponse with the final result
-        """
-        if conversation_history is None:
-            conversation_history = []
-        # Step 3: Process the prompt with thinking
-        logger.info(f"🧠 Agent {agent_id}: Processing prompt and generating initial response")
-        result = self._think_and_process(prompt, answers, agent_id, token_summary, conversation_history)
+Provide a comprehensive tool analysis.
+"""
-        # Step 4: Quality check and iteration
-        final_result = self._quality_check_and_iterate(prompt, result, answers, agent_id, token_summary, conversation_history)
+        messages = [{"role": "user", "content": analysis_prompt}]
-        # Step 5: Generate final answer in requested format
-        logger.info(f"📝 Agent {agent_id}: Generating final structured response")
-        final_response = self._generate_final_response(prompt, final_result, final_response_structure, agent_id, token_summary, conversation_history)
-        # Update session with the final response in conversation history
-        if agent_id in self._agent_sessions:
-            # Update conversation history with assistant response
-            updated_history = conversation_history.copy()
-            updated_history.append({"role": "assistant", "content": str(final_response)})
-            self._agent_sessions[agent_id]["conversation_history"] = updated_history
-            self._agent_sessions[agent_id]["step"] = "completed"
-            self._agent_sessions[agent_id]["token_summary"] = token_summary
-            self._agent_sessions[agent_id]["final_response_structure"] = final_response_structure
-            logger.info(f"💾 Agent {agent_id}: Updated session with conversation history ({len(updated_history)} messages)")
-        else:
-            # Create new session if it doesn't exist
-            updated_history = conversation_history.copy()
-            updated_history.append({"role": "assistant", "content": str(final_response)})
+        try:
+            response = self._ai_client.parse(conversation_history + messages, ToolAnalysis)
+            tool_analysis = self._extract_parsed_content(response, ToolAnalysis)
+            token_usage = self._extract_token_usage(response)
+            # Update session
+            session["tool_analysis"] = tool_analysis
+            session["token_summary"].tool_analysis = token_usage
+            session["state"] = AgentState.GENERATING_TODO
+            logger.info(f"🔧 Agent {agent_id}: Tool analysis completed - {len(tool_analysis.relevant_tools)} relevant tools")
+            # Continue to next step
+            return self._step3_generate_todo(agent_id, session)
-            self._agent_sessions[agent_id] = {
-                "step": "completed",
-                "conversation_history": updated_history,
-                "token_summary": token_summary,
-                "final_response_structure": final_response_structure
-            }
-            logger.info(f"💾 Agent {agent_id}: Created new session with conversation history ({len(updated_history)} messages)")
+        except Exception as e:
+            logger.error(f"Failed to analyze tools: {e}")
+            session["state"] = AgentState.FAILED
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.FAILED,
+                final_response=f"Failed to analyze tools: {str(e)}",
+                token_summary=session["token_summary"]
+            )
+    def _step3_generate_todo(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 3: Think about a todo list - What is required to reach the goal."""
+        logger.info(f"📝 Agent {agent_id}: Step 3 - Generating todo list")
-        # Log final token summary
-        logger.info(f"📊 Agent {agent_id}: Token usage summary - Total: {token_summary.total_tokens} "
-                   f"(Prompt: {token_summary.total_prompt_tokens}, Completion: {token_summary.total_completion_tokens})")
+        requirement_analysis = session["requirement_analysis"]
+        tool_analysis = session["tool_analysis"]
+        conversation_history = session["conversation_history"]
-        return AgentResponse(agent_id=agent_id, final_response=final_response, token_summary=token_summary)
+        try:
+            todo_list, token_usage = self._task_manager.generate_todo_list(
+                requirement_analysis,
+                tool_analysis,
+                conversation_history
+            )
+            # Update session
+            session["todo_list"] = todo_list
+            session["token_summary"].todo_generation = token_usage
+            session["current_task_index"] = 0
+            session["state"] = AgentState.EXECUTING_TASKS
+            logger.info(f"📝 Agent {agent_id}: Todo list generated with {len(todo_list.tasks)} tasks")
+            # Return current state for visibility
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.EXECUTING_TASKS,
+                requirement_analysis=requirement_analysis,
+                tool_analysis=tool_analysis,
+                todo_list=todo_list,
+                token_summary=session["token_summary"]
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate todo list: {e}")
+            session["state"] = AgentState.FAILED
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.FAILED,
+                final_response=f"Failed to generate todo list: {str(e)}",
+                token_summary=session["token_summary"]
+            )
-    def _think_and_process(self, prompt: str, answers: Union[list, dict[str, str]], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> str:
-        """
-        Process the prompt with thinking.
+    def _step4_execute_tasks(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 4: Work step by step on the todo list."""
+        logger.info(f"⚡ Agent {agent_id}: Step 4 - Executing tasks")
+        todo_list = session["todo_list"]
+        conversation_history = session["conversation_history"]
+        # Check if we've exceeded iteration limit
+        session["iteration_count"] = session.get("iteration_count", 0) + 1
+        if session["iteration_count"] > self._max_task_iterations:
+            logger.warning(f"Agent {agent_id}: Reached max task iterations, moving to goal evaluation")
+            session["state"] = AgentState.EVALUATING_GOAL
+            return self._step6_evaluate_goal(agent_id, session)
+        # Get next task to execute
+        next_task = todo_list.get_next_task()
+        if not next_task:
+            # No more tasks, move to goal evaluation
+            logger.info(f"⚡ Agent {agent_id}: All tasks completed, moving to goal evaluation")
+            session["state"] = AgentState.EVALUATING_GOAL
+            return self._step6_evaluate_goal(agent_id, session)
+        # Mark task as in progress
+        next_task.status = TaskStatus.IN_PROGRESS
+        session["current_task"] = next_task
+        logger.info(f"⚡ Agent {agent_id}: Executing task '{next_task.title}'")
+        # Check if human interaction is needed for this task
+        if session["human_in_loop"] and self._should_request_human_interaction(next_task, session):
+            session["state"] = AgentState.WAITING_FOR_HUMAN
+            return self._step5_handle_human_interaction(agent_id, session)
+        # Execute the task
+        try:
+            result, token_usage = self._execute_task(next_task, session, conversation_history)
+            # Update task status
+            next_task.status = TaskStatus.COMPLETED
+            next_task.result = result
+            # Track token usage
+            session["token_summary"].task_execution.append(token_usage)
+            logger.info(f"⚡ Agent {agent_id}: Task '{next_task.title}' completed")
+            # Continue with next task
+            return self._step4_execute_tasks(agent_id, session)
+        except Exception as e:
+            logger.error(f"Task execution failed: {e}")
+            next_task.status = TaskStatus.FAILED
+            next_task.error_message = str(e)
+            # Continue with next task (skip failed one)
+            return self._step4_execute_tasks(agent_id, session)
-        Args:
-            prompt: The original prompt
-            answers: Question-answer pairs or simple answers
-            agent_id: The agent session identifier
-            token_summary: Current token usage summary
-            conversation_history: Optional conversation history for dialog context
+    def _step5_handle_human_interaction(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 5: When human in the loop is active, have a dialog to the user."""
+        logger.info(f"👤 Agent {agent_id}: Step 5 - Handling human interaction")
+        current_task = session.get("current_task")
+        dialog_options = session.get("dialog_options", [])
+        # Create human interaction request
+        context = f"I need your input for the task: {current_task.title if current_task else 'General question'}"
+        if current_task:
+            context += f"\nTask description: {current_task.description}"
+        # Determine interaction type based on task and context
+        interaction_type = self._determine_interaction_type(current_task, session)
+        human_request = self._dialog_handler.create_human_interaction_request(
+            interaction_type=interaction_type,
+            context=context,
+            task=current_task,
+            available_dialog_options=dialog_options
+        )
+        session["pending_human_request"] = human_request
+        return AgentResponse(
+            agent_id=agent_id,
+            state=AgentState.WAITING_FOR_HUMAN,
+            current_task=current_task,
+            human_interaction_request=human_request,
+            todo_list=session["todo_list"],
+            token_summary=session["token_summary"]
+        )
-        Returns:
-            The AI's result
-        """
-        if conversation_history is None:
-            conversation_history = []
-        # Format answers for the prompt with enhanced context
-        answers_text = ""
-        if isinstance(answers, list) and answers:
-            # Check if it's a list of question-answer pairs (enhanced format)
-            if answers and isinstance(answers[0], dict) and "question" in answers[0]:
-                answers_text = "\n\nQuestion-Answer Context:\n"
-                answers_text += "The following questions were asked to gather more information, along with the answers provided:\n\n"
-                for i, qa_pair in enumerate(answers, 1):
-                    question = qa_pair.get("question", "Unknown question")
-                    answer = qa_pair.get("answer", "No answer provided")
-                    key = qa_pair.get("key", "")
-                    required = qa_pair.get("required", True)
-                    status_marker = "🔴 REQUIRED" if required else "🟡 OPTIONAL"
-                    answers_text += f"{i}. {status_marker} Question: {question}\n"
-                    answers_text += f"   Answer: {answer}\n"
-                    if key:
-                        answers_text += f"   (Key: {key})\n"
-                    answers_text += "\n"
-            else:
-                # Legacy format - simple list
-                answers_text = f"\n\nAdditional information: {', '.join(str(a) for a in answers)}\n"
-        elif isinstance(answers, dict) and answers:
-            # Legacy format - simple dict
-            answers_text = "\n\nAdditional information provided:\n"
-            for key, answer in answers.items():
-                answers_text += f"- {key}: {answer}\n"
-        thinking_prompt = f"""
-Think about this prompt, the goal and the steps required to fulfill it:
-==========
-{prompt}
-==========
-{answers_text}
+    def _step6_evaluate_goal(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Step 6: Is the goal reached? If no, create a new todo-list, If yes return the answer."""
+        logger.info(f"🎯 Agent {agent_id}: Step 6 - Evaluating goal achievement")
+        requirement_analysis = session["requirement_analysis"]
+        todo_list = session["todo_list"]
+        conversation_history = session["conversation_history"]
+        # Prepare task results summary
+        completed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.COMPLETED]
+        failed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.FAILED]
+        tasks_summary = "Completed Tasks:\n"
+        for task in completed_tasks:
+            tasks_summary += f"- {task.title}: {task.result or 'Completed'}\n"
+        if failed_tasks:
+            tasks_summary += "\nFailed Tasks:\n"
+            for task in failed_tasks:
+                tasks_summary += f"- {task.title}: {task.error_message or 'Failed'}\n"
+        evaluation_prompt = f"""
+Evaluate whether the original goal has been achieved based on the work completed:
-Consider the prompt carefully, analyze what the user wants to achieve, and think through the best approach to provide a comprehensive and helpful response.
+Original Goal: {requirement_analysis.goal}
+Success Criteria: {', '.join(requirement_analysis.success_criteria)}
-IMPORTANT: When formulating your response, take into account both the original prompt AND the specific questions that were asked along with their answers. The questions reveal what additional information was deemed necessary, and the answers provide crucial context that should inform your response.
+{tasks_summary}
-Use any available tools to gather information or perform actions that would improve your response.
+Determine:
+1. Whether the main goal has been achieved (true/false)
+2. Percentage of goal completion (0-100)
+3. Which success criteria have been met
+4. Which success criteria still need to be met
+5. Detailed feedback on the goal achievement
+6. Next steps if the goal is not fully achieved
-Provide your best result for the given prompt, incorporating all the context from the question-answer pairs.
+Provide a comprehensive evaluation of goal achievement.
 """
-        messages = [{"role": "user", "content": thinking_prompt}]
+        messages = [{"role": "user", "content": evaluation_prompt}]
         try:
-            response = self._call_ai_parse(messages, Result, conversation_history)
-            result_obj = self._extract_parsed_content(response, Result)
+            response = self._ai_client.parse(conversation_history + messages, GoalEvaluation)
+            goal_evaluation = self._extract_parsed_content(response, GoalEvaluation)
+            token_usage = self._extract_token_usage(response)
+            # Update session
+            session["goal_evaluation"] = goal_evaluation
+            session["token_summary"].goal_evaluation = token_usage
-            # Track token usage for thinking process
-            token_summary.thinking_process = self._extract_token_usage(response)
-            logger.info(f"🧠 Agent {agent_id}: Thinking completed (tokens: {token_summary.thinking_process.total_tokens})")
+            if goal_evaluation.goal_achieved:
+                # Goal achieved, generate final response
+                logger.info(f"🎯 Agent {agent_id}: Goal achieved! Generating final response")
+                return self._generate_final_response(agent_id, session)
+            else:
+                # Goal not achieved, complete with partial success for now
+                logger.info(f"🎯 Agent {agent_id}: Goal not fully achieved ({goal_evaluation.completion_percentage}%), completing with current progress")
+                return self._generate_final_response(agent_id, session)
-            return result_obj.result
         except Exception as e:
-            logger.error(f"Error in thinking process: {e}")
-            raise RuntimeError(f"Failed to process prompt with AI client: {e}") from e
-    def _quality_check_and_iterate(self, prompt: str, result: str, answers: Union[list, dict[str, str]], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> str:
-        """
-        Check the quality of the result and iterate if needed.
-        Args:
-            prompt: The original prompt
-            result: The current result
-            answers: Question-answer pairs or simple answers
-            agent_id: The agent session identifier
-            token_summary: Current token usage summary
-            conversation_history: Optional conversation history for dialog context
+            logger.error(f"Failed to evaluate goal: {e}")
+            session["state"] = AgentState.FAILED
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.FAILED,
+                final_response=f"Failed to evaluate goal: {str(e)}",
+                token_summary=session["token_summary"]
+            )
-        Returns:
-            The final improved result
-        """
-        if conversation_history is None:
-            conversation_history = []
-        current_result = result
+    def _generate_final_response(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
+        """Generate the final response in the requested format."""
+        logger.info(f"📝 Agent {agent_id}: Generating final structured response")
-        if self._max_iterations == 0:
-            logger.info(f"✅ Agent {agent_id}: Skipping quality check (max_iterations=0)")
-            return current_result
+        requirement_analysis = session["requirement_analysis"]
+        goal_evaluation = session.get("goal_evaluation")
+        todo_list = session["todo_list"]
+        final_response_structure = session["final_response_structure"]
+        conversation_history = session["conversation_history"]
-        logger.info(f"🔍 Agent {agent_id}: Starting quality check and improvement process (max iterations: {self._max_iterations})")
+        # Prepare summary of work done
+        completed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.COMPLETED]
+        work_summary = "Work completed:\n"
+        for task in completed_tasks:
+            work_summary += f"- {task.title}: {task.result or 'Completed'}\n"
-        # Format context information for quality checks
-        context_text = self._format_qa_context_for_quality_check(answers)
+        evaluation_text = ""
+        if goal_evaluation:
+            evaluation_text = f"\nGoal Achievement: {goal_evaluation.completion_percentage}% complete\nFeedback: {goal_evaluation.feedback}"
-        for iteration in range(self._max_iterations):
-            quality_prompt = f"""
-Given this original prompt:
-==========
-{prompt}
-==========
-{context_text}
-And this result:
-==========
-{current_result}
-==========
-Is this result good and comprehensive, or does it need to be improved? Consider if the response fully addresses the prompt, provides sufficient detail, and would be helpful to the user.
+        final_prompt = f"""
+Based on the work completed for this goal:
+Goal: {requirement_analysis.goal}
-IMPORTANT: Also evaluate whether the result properly incorporates and addresses the information provided through the question-answer pairs above. The response should demonstrate that it has taken this additional context into account.
+{work_summary}{evaluation_text}
-Evaluate the quality and provide feedback if improvements are needed.
+Generate the final response in the exact format requested. Summarize the results and provide a comprehensive answer that addresses the original requirement.
 """
+        messages = [{"role": "user", "content": final_prompt}]
+        try:
+            response = self._ai_client.parse(conversation_history + messages, final_response_structure)
+            final_response = self._extract_parsed_content(response, final_response_structure)
+            token_usage = self._extract_token_usage(response)
-            messages = [{"role": "user", "content": quality_prompt}]
+            # Update session
+            session["token_summary"].final_response = token_usage
+            session["final_response"] = final_response
+            session["state"] = AgentState.COMPLETED
+            logger.info(f"📝 Agent {agent_id}: Final response generated successfully")
+            return AgentResponse(
+                agent_id=agent_id,
+                state=AgentState.COMPLETED,
+                final_response=final_response,
+                goal_evaluation=goal_evaluation,
+                token_summary=session["token_summary"]
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate final response: {e}")
+            # Fallback response
             try:
-                response = self._call_ai_parse(messages, QualityCheck, conversation_history)
-                quality_check = self._extract_parsed_content(response, QualityCheck)
-                # Track token usage for quality check
-                quality_check_tokens = self._extract_token_usage(response)
-                token_summary.quality_checks.append(quality_check_tokens)
-                if quality_check.is_good:
-                    logger.info(f"✅ Agent {agent_id}: Quality check passed on iteration {iteration + 1} (tokens: {quality_check_tokens.total_tokens})")
-                    break
-                logger.info(f"🔄 Agent {agent_id}: Quality check iteration {iteration + 1} - Improvements needed: {quality_check.feedback[:100]}... (tokens: {quality_check_tokens.total_tokens})")
+                fallback_response = self._create_fallback_response(final_response_structure, str(e))
+                session["final_response"] = fallback_response
+                session["state"] = AgentState.COMPLETED
-                # Improve the result
-                improvement_prompt = f"""
-The original prompt was:
-==========
-{prompt}
-==========
-{context_text}
-The current result is:
-==========
-{current_result}
-==========
+                return AgentResponse(
+                    agent_id=agent_id,
+                    state=AgentState.COMPLETED,
+                    final_response=fallback_response,
+                    token_summary=session["token_summary"]
+                )
+            except Exception as fallback_error:
+                logger.error(f"Fallback response creation failed: {fallback_error}")
+                session["state"] = AgentState.FAILED
+                return AgentResponse(
+                    agent_id=agent_id,
+                    state=AgentState.FAILED,
+                    final_response=f"Failed to generate response: {str(e)}",
+                    token_summary=session["token_summary"]
+                )
-Feedback for improvement:
-==========
-{quality_check.feedback}
-==========
+    # Essential helper methods
+    def _handle_human_responses(self, session: dict[str, Any], human_response: Union[HumanInLoopResponse, HumanInLoopResponseBatch, list[HumanInLoopResponse]]) -> dict[str, Any]:
+        """Handle human response(s) - supports single response, batch, or list."""
+        # Normalize input to a list of responses
+        responses = []
+        if isinstance(human_response, HumanInLoopResponse):
+            responses = [human_response]
+        elif isinstance(human_response, HumanInLoopResponseBatch):
+            responses = human_response.responses
+        elif isinstance(human_response, list):
+            responses = human_response
+        else:
+            logger.warning(f"Unknown human response type: {type(human_response)}")
+            return session
+        # Process each response
+        for response in responses:
+            session = self._handle_human_response(session, response)
+        return session
-Please provide an improved version that addresses the feedback while maintaining the strengths of the current result. Make sure to incorporate all the context from the question-answer pairs above.
-"""
-                messages = [{"role": "user", "content": improvement_prompt}]
-                improvement_response = self._call_ai_parse(messages, Result, conversation_history)
-                result_obj = self._extract_parsed_content(improvement_response, Result)
-                current_result = result_obj.result
-                # Track token usage for improvement
-                improvement_tokens = self._extract_token_usage(improvement_response)
-                token_summary.improvements.append(improvement_tokens)
-                logger.info(f"⚡ Agent {agent_id}: Improvement iteration {iteration + 1} completed (tokens: {improvement_tokens.total_tokens})")
-            except Exception as e:
-                logger.warning(f"Error in quality check iteration {iteration}: {e}")
-                break
+    def _handle_human_response(self, session: dict[str, Any], human_response: HumanInLoopResponse) -> dict[str, Any]:
+        """Handle human response and update session accordingly."""
+        pending_request = session.get("pending_human_request")
+        if not pending_request or pending_request.id != human_response.interaction_id:
+            logger.warning("Received human response for unknown or expired interaction")
+            return session
-        total_quality_tokens = sum(usage.total_tokens for usage in token_summary.quality_checks)
-        total_improvement_tokens = sum(usage.total_tokens for usage in token_summary.improvements)
-        logger.info(f"🏁 Agent {agent_id}: Quality check completed - {len(token_summary.quality_checks)} checks, {len(token_summary.improvements)} improvements (Quality tokens: {total_quality_tokens}, Improvement tokens: {total_improvement_tokens})")
+        # For now, just continue with execution
+        session["state"] = AgentState.EXECUTING_TASKS
+        session.pop("pending_human_request", None)
-        return current_result
+        # Store human input for context
+        if human_response.answer:
+            session["human_context"] = session.get("human_context", "") + f"\nUser: {human_response.answer}"
+        if human_response.additional_context:
+            session["human_context"] = session.get("human_context", "") + f"\nContext: {human_response.additional_context}"
+        return session
-    def _generate_final_response(self, prompt: str, result: str, final_response_structure: Type[BaseModel], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> BaseModel:
-        """
-        Generate the final response in the requested format.
+    def _should_request_human_interaction(self, task: Task, session: dict[str, Any]) -> bool:
+        """Determine if human interaction is needed for a task."""
+        # Request human interaction for high complexity tasks
+        if task.estimated_complexity >= 4:
+            return True
+        # Check if task requires capabilities we don't have
+        tool_analysis = session.get("tool_analysis")
+        if tool_analysis and tool_analysis.missing_capabilities:
+            for capability in tool_analysis.missing_capabilities:
+                if capability.lower() in task.description.lower():
+                    return True
+        return False
-        Args:
-            prompt: The original prompt
-            result: The processed result
-            final_response_structure: The expected response structure
-            agent_id: The agent session identifier
-            token_summary: Current token usage summary
-            conversation_history: Optional conversation history for dialog context
+    def _determine_interaction_type(self, task: Optional[Task], session: dict[str, Any]) -> HumanInteractionType:
+        """Determine the appropriate interaction type for a task."""
+        if not task:
+            return HumanInteractionType.QUESTION
+        # If we have dialog options available, prefer those
+        dialog_options = session.get("dialog_options", [])
+        if dialog_options:
+            return HumanInteractionType.DIALOG_OPTION
+        # For high complexity tasks, use decision
+        if task.estimated_complexity >= 4:
+            return HumanInteractionType.DECISION
+        # Default to question
+        return HumanInteractionType.QUESTION
-        Returns:
-            The final response in the requested format
-        """
-        if conversation_history is None:
-            conversation_history = []
-        final_prompt = f"""
-Given this original prompt:
-==========
-{prompt}
-==========
+    def _execute_task(self, task: Task, session: dict[str, Any], conversation_history: list[dict[str, Any]]) -> tuple[str, TokenUsage]:
+        """Execute a task and return the result."""
+        # Prepare context
+        requirement = session["requirement_analysis"]
+        human_context = session.get("human_context", "")
+        execution_prompt = f"""
+Execute this task to help achieve the goal:
+Goal: {requirement.goal}
+Task: {task.title}
+Description: {task.description}
+Tools needed: {', '.join(task.tools_needed) if task.tools_needed else 'None'}
-And this processed result:
-==========
-{result}
-==========
+{human_context}
-Generate the final answer in the exact format requested. Make sure the response is well-structured and addresses all aspects of the original prompt.
+Use any available tools to complete this task. Provide a detailed result of what was accomplished.
+If the task cannot be completed with available tools, explain what was attempted and what is missing.
 """
-        messages = [{"role": "user", "content": final_prompt}]
+        messages = [{"role": "user", "content": execution_prompt}]
         try:
-            response = self._call_ai_parse(messages, final_response_structure, conversation_history)
-            final_response = self._extract_parsed_content(response, final_response_structure)
+            response = self._ai_client.parse(conversation_history + messages, Result)
+            result_obj = self._extract_parsed_content(response, Result)
+            token_usage = self._extract_token_usage(response)
-            # Track token usage for final response generation
-            token_summary.final_response = self._extract_token_usage(response)
-            logger.info(f"📝 Agent {agent_id}: Final structured response generated (tokens: {token_summary.final_response.total_tokens})")
+            return result_obj.result, token_usage
-            return final_response
         except Exception as e:
-            logger.error(f"Error generating final response: {e}")
-            # Fallback - try to create a basic response
-            try:
-                # If the structure has a 'result' field, use that
-                if hasattr(final_response_structure, 'model_fields') and 'result' in final_response_structure.model_fields:
-                    return final_response_structure(result=result)
-                else:
-                    # Try to create with the first field
-                    fields = final_response_structure.model_fields
-                    if fields:
-                        first_field = next(iter(fields.keys()))
-                        return final_response_structure(**{first_field: result})
-                    else:
-                        return final_response_structure()
-            except Exception as fallback_error:
-                logger.error(f"Fallback response creation failed: {fallback_error}")
-                # Last resort - return the structure with default values
-                return final_response_structure()
+            logger.error(f"Task execution failed: {e}")
+            return f"Task execution failed: {str(e)}", TokenUsage()
-    def get_session_info(self, agent_id: str) -> dict[str, Any]:
-        """
-        Get information about an agent session.
+    def _get_available_tools(self) -> dict[str, str]:
+        """Get available tools from the AI client."""
+        tools = {}
-        Args:
-            agent_id: The agent session identifier
-        Returns:
-            Session information dictionary
-        Raises:
-            ValueError: If the agent session is not found
-        """
-        if agent_id not in self._agent_sessions:
+        # Try to get tools from different client types
+        if hasattr(self._ai_client, 'tools') and self._ai_client.tools:
+            for tool in self._ai_client.tools:
+                tools[tool.name] = tool.description
+        elif hasattr(self._ai_client, '_tools') and self._ai_client._tools:
+            for tool in self._ai_client._tools:
+                tools[tool.name] = tool.description
+        return tools
+    def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
+        """Extract the parsed content from the AI response."""
+        if hasattr(response, 'choices') and len(response.choices) > 0:
+            choice = response.choices[0]
+            if hasattr(choice.message, 'parsed') and choice.message.parsed:
+                return choice.message.parsed
+            elif hasattr(choice.message, 'content'):
+                try:
+                    content_dict = json.loads(choice.message.content)
+                    return response_format(**content_dict)
+                except (json.JSONDecodeError, TypeError):
+                    # Create default response based on type
+                    return self._create_fallback_response(response_format)
+        return self._create_fallback_response(response_format)
+    def _create_fallback_response(self, response_format: Type[BaseModel], error_msg: str = "") -> BaseModel:
+        """Create a fallback response when parsing fails."""
+        try:
+            if response_format == RequirementAnalysis:
+                return RequirementAnalysis(
+                    goal="Goal analysis failed",
+                    success_criteria=["Unable to determine criteria"],
+                    complexity_estimate=5
+                )
+            elif response_format == ToolAnalysis:
+                return ToolAnalysis(
+                    relevant_tools=[],
+                    tool_mapping={},
+                    missing_capabilities=["Analysis failed"]
+                )
+            elif response_format == TodoList:
+                return TodoList(tasks=[])
+            elif response_format == GoalEvaluation:
+                return GoalEvaluation(
+                    goal_achieved=False,
+                    completion_percentage=0,
+                    completed_criteria=[],
+                    remaining_criteria=["Evaluation failed"],
+                    feedback=f"Goal evaluation failed: {error_msg}"
+                )
+            elif response_format == Result:
+                return Result(result=f"Result generation failed: {error_msg}")
+            else:
+                # Try to create with default values
+                return response_format()
+        except Exception:
+            # Last resort - return basic result
+            return Result(result=f"Failed to create response: {error_msg}")
+    def _extract_token_usage(self, response: Any) -> TokenUsage:
+        """Extract token usage information from an AI response."""
+        try:
+            if hasattr(response, 'usage') and response.usage:
+                usage = response.usage
+                return TokenUsage(
+                    prompt_tokens=getattr(usage, 'prompt_tokens', 0),
+                    completion_tokens=getattr(usage, 'completion_tokens', 0),
+                    total_tokens=getattr(usage, 'total_tokens', 0)
+                )
+        except (AttributeError, TypeError) as e:
+            logger.debug(f"Could not extract token usage: {e}")
+        return TokenUsage()
+    # Session management methods (kept for compatibility)
+    def get_session_info(self, agent_id: str) -> dict[str, Any]:
+        """Get information about an agent session."""
+        session = self._session_handler.get_session(agent_id)
+        if not session:
             raise ValueError(f"Agent session {agent_id} not found")
-        session = self._agent_sessions[agent_id].copy()
-        # Remove sensitive information and add summary
+        session = session.copy()
         session["conversation_length"] = len(session.get("conversation_history", []))
         return session
     def delete_session(self, agent_id: str) -> bool:
-        """
-        Delete an agent session.
-        Args:
-            agent_id: The agent session identifier
-        Returns:
-            True if session was deleted, False if it didn't exist
-        """
-        if agent_id in self._agent_sessions:
-            del self._agent_sessions[agent_id]
+        """Delete an agent session."""
+        deleted = self._session_handler.delete_session(agent_id)
+        if deleted:
             logger.info(f"🗑️ Deleted agent session {agent_id}")
-            return True
-        return False
+        return deleted
     def list_sessions(self) -> list[str]:
-        """
-        List all active agent session IDs.
-        Returns:
-            List of agent session IDs
-        """
-        return list(self._agent_sessions.keys())
+        """List all active agent session IDs."""
+        return self._session_handler.list_sessions()

mbxai 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

mbxai 2.2.0py3-none-any.whl → 2.3.0py3-none-any.whl