mbxai 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mbxai/agent/client.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
- Agent client implementation for MBX AI.
2
+ Enhanced Agent client implementation for MBX AI with human-in-the-loop capabilities.
3
3
  """
4
4
 
5
- from typing import Any, Union, Type, Callable
5
+ from typing import Any, Union, Type, Callable, Optional
6
6
  import logging
7
7
  import json
8
8
  from pydantic import BaseModel
@@ -10,49 +10,241 @@ from pydantic import BaseModel
10
10
  from ..openrouter import OpenRouterClient
11
11
  from ..tools import ToolClient
12
12
  from ..mcp import MCPClient
13
- from .models import AgentResponse, Question, QuestionList, AnswerList, Result, QualityCheck, TokenUsage, TokenSummary
13
+ from .models import (
14
+ AgentResponse, AgentState, RequirementAnalysis, ToolAnalysis, TodoList, Task, TaskStatus,
15
+ HumanInLoopRequest, HumanInLoopResponse, HumanInLoopResponseBatch, HumanInteractionType, DialogOption,
16
+ GoalEvaluation, TokenUsage, TokenSummary, Result, SessionHandler, InMemorySessionHandler
17
+ )
14
18
 
15
19
  logger = logging.getLogger(__name__)
16
20
 
17
21
 
18
- class AgentClient:
19
- """
20
- Agent client that wraps other AI clients with a dialog-based thinking process.
22
+ class TaskManager:
23
+ """Manages task generation and execution for the agent."""
24
+
25
+ def __init__(self, ai_client):
26
+ self.ai_client = ai_client
27
+
28
+ def generate_todo_list(
29
+ self,
30
+ requirement_analysis: RequirementAnalysis,
31
+ tool_analysis: ToolAnalysis,
32
+ conversation_history: list[dict[str, Any]] = None
33
+ ) -> tuple[TodoList, TokenUsage]:
34
+ """Generate a todo list based on requirement and tool analysis."""
35
+ if conversation_history is None:
36
+ conversation_history = []
37
+
38
+ # Format available tools
39
+ tools_text = ""
40
+ if tool_analysis.relevant_tools:
41
+ tools_text = "\n\nAvailable Tools:\n"
42
+ for tool_name in tool_analysis.relevant_tools:
43
+ purpose = tool_analysis.tool_mapping.get(tool_name, "No description available")
44
+ tools_text += f"- {tool_name}: {purpose}\n"
45
+
46
+ # Format missing capabilities
47
+ missing_text = ""
48
+ if tool_analysis.missing_capabilities:
49
+ missing_text = "\n\nMissing Capabilities (to be handled manually):\n"
50
+ for capability in tool_analysis.missing_capabilities:
51
+ missing_text += f"- {capability}\n"
52
+
53
+ prompt = f"""
54
+ Based on this requirement analysis:
55
+ Goal: {requirement_analysis.goal}
56
+ Sub-goals: {', '.join(requirement_analysis.sub_goals)}
57
+ Success Criteria: {', '.join(requirement_analysis.success_criteria)}
58
+ Constraints: {', '.join(requirement_analysis.constraints)}
59
+ Complexity: {requirement_analysis.complexity_estimate}/10
60
+ {tools_text}{missing_text}
61
+
62
+ Create a detailed todo list with specific, actionable tasks to achieve the goal.
63
+ Each task should be concrete and measurable. Consider dependencies between tasks.
64
+ Assign the appropriate tools to tasks that need them.
65
+ Estimate complexity for each task (1-5 scale).
66
+ Provide an estimated total time to complete all tasks.
67
+
68
+ Break down complex goals into smaller, manageable tasks that can be executed step by step.
69
+ """
70
+
71
+ messages = [{"role": "user", "content": prompt}]
72
+
73
+ try:
74
+ response = self.ai_client.parse(conversation_history + messages, TodoList)
75
+ todo_list = self._extract_parsed_content(response, TodoList)
76
+ token_usage = self._extract_token_usage(response)
77
+
78
+ # Validate and fix todo list
79
+ self._validate_todo_list(todo_list)
80
+
81
+ logger.info(f"Generated todo list with {len(todo_list.tasks)} tasks")
82
+ return todo_list, token_usage
83
+
84
+ except Exception as e:
85
+ logger.error(f"Failed to generate todo list: {e}")
86
+ # Return a basic todo list
87
+ basic_task = Task(
88
+ title="Complete the requirement",
89
+ description=requirement_analysis.goal,
90
+ estimated_complexity=requirement_analysis.complexity_estimate
91
+ )
92
+ return TodoList(tasks=[basic_task], estimated_total_time="Unknown"), TokenUsage()
93
+
94
+ def _validate_todo_list(self, todo_list: TodoList):
95
+ """Validate and fix the todo list."""
96
+ # Ensure all tasks have valid IDs
97
+ task_ids = set()
98
+ for task in todo_list.tasks:
99
+ if not task.id or task.id in task_ids:
100
+ task.id = str(__import__("uuid").uuid4())
101
+ task_ids.add(task.id)
102
+
103
+ # Validate dependencies exist
104
+ for task in todo_list.tasks:
105
+ valid_deps = [dep for dep in task.dependencies if dep in task_ids]
106
+ task.dependencies = valid_deps
107
+
108
+ def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
109
+ """Extract the parsed content from the AI response."""
110
+ if hasattr(response, 'choices') and len(response.choices) > 0:
111
+ choice = response.choices[0]
112
+ if hasattr(choice.message, 'parsed') and choice.message.parsed:
113
+ return choice.message.parsed
114
+ elif hasattr(choice.message, 'content'):
115
+ try:
116
+ content_dict = json.loads(choice.message.content)
117
+ return response_format(**content_dict)
118
+ except (json.JSONDecodeError, TypeError):
119
+ if response_format == TodoList:
120
+ return TodoList(tasks=[])
121
+ else:
122
+ return response_format()
123
+
124
+ # Fallback
125
+ if response_format == TodoList:
126
+ return TodoList(tasks=[])
127
+ else:
128
+ return response_format()
129
+
130
+ def _extract_token_usage(self, response: Any) -> TokenUsage:
131
+ """Extract token usage information from an AI response."""
132
+ try:
133
+ if hasattr(response, 'usage') and response.usage:
134
+ usage = response.usage
135
+ return TokenUsage(
136
+ prompt_tokens=getattr(usage, 'prompt_tokens', 0),
137
+ completion_tokens=getattr(usage, 'completion_tokens', 0),
138
+ total_tokens=getattr(usage, 'total_tokens', 0)
139
+ )
140
+ except (AttributeError, TypeError) as e:
141
+ logger.debug(f"Could not extract token usage: {e}")
142
+
143
+ return TokenUsage()
144
+
145
+
146
+ class DialogHandler:
147
+ """Handles human-in-the-loop interactions."""
148
+
149
+ def __init__(self, ai_client):
150
+ self.ai_client = ai_client
21
151
 
22
- The agent follows a multi-step process:
23
- 1. Analyze the prompt and generate clarifying questions (if ask_questions=True)
24
- 2. Wait for user answers or auto-answer questions
25
- 3. Process the prompt with available information
26
- 4. Quality check the result and iterate if needed
27
- 5. Generate final response in the requested format
152
+ def create_human_interaction_request(
153
+ self,
154
+ interaction_type: HumanInteractionType,
155
+ context: str,
156
+ task: Optional[Task] = None,
157
+ available_dialog_options: list[DialogOption] = None
158
+ ) -> HumanInLoopRequest:
159
+ """Create a human interaction request based on the context."""
160
+ if available_dialog_options is None:
161
+ available_dialog_options = []
162
+
163
+ if interaction_type == HumanInteractionType.DECISION:
164
+ return self._create_decision_request(context, task)
165
+ elif interaction_type == HumanInteractionType.QUESTION:
166
+ return self._create_question_request(context, task)
167
+ elif interaction_type == HumanInteractionType.DIALOG_OPTION:
168
+ return self._create_dialog_option_request(context, task, available_dialog_options)
169
+ else:
170
+ # Default question request
171
+ return HumanInLoopRequest(
172
+ interaction_type=HumanInteractionType.QUESTION,
173
+ prompt=f"I need your input for: {context}",
174
+ context=context
175
+ )
28
176
 
29
- Requirements:
30
- - The wrapped AI client MUST have a 'parse' method for structured responses
31
- - All AI interactions use structured Pydantic models for reliable parsing
32
- - Supports OpenRouterClient, ToolClient, and MCPClient (all have parse methods)
177
+ def _create_decision_request(self, context: str, task: Optional[Task]) -> HumanInLoopRequest:
178
+ """Create a decision request."""
179
+ task_info = f" for task '{task.title}'" if task else ""
180
+ return HumanInLoopRequest(
181
+ interaction_type=HumanInteractionType.DECISION,
182
+ prompt=f"I need you to make a decision{task_info}: {context}",
183
+ options=["proceed", "skip", "modify", "abort"],
184
+ context=context
185
+ )
33
186
 
34
- Tool Registration:
35
- - Provides proxy methods for tool registration when supported by the underlying client
36
- - register_tool(): Available with ToolClient and MCPClient
37
- - register_mcp_server(): Available with MCPClient only
38
- - Throws AttributeError for unsupported clients (e.g., OpenRouterClient)
187
+ def _create_question_request(self, context: str, task: Optional[Task]) -> HumanInLoopRequest:
188
+ """Create a question request."""
189
+ task_info = f" while working on '{task.title}'" if task else ""
190
+ return HumanInLoopRequest(
191
+ interaction_type=HumanInteractionType.QUESTION,
192
+ prompt=f"I have a question{task_info}: {context}",
193
+ context=context
194
+ )
39
195
 
40
- Configuration:
41
- - max_iterations: Controls how many times the agent will iterate to improve results (default: 2)
42
- - Set to 0 to disable quality improvement iterations
196
+ def _create_dialog_option_request(
197
+ self,
198
+ context: str,
199
+ task: Optional[Task],
200
+ available_dialog_options: list[DialogOption]
201
+ ) -> HumanInLoopRequest:
202
+ """Create a dialog option request."""
203
+ task_info = f" for task '{task.title}'" if task else ""
204
+ return HumanInLoopRequest(
205
+ interaction_type=HumanInteractionType.DIALOG_OPTION,
206
+ prompt=f"Please select an action{task_info}: {context}",
207
+ dialog_options=available_dialog_options,
208
+ context=context
209
+ )
210
+
211
+
212
+ class AgentClient:
213
+ """
214
+ Enhanced Agent client that follows a structured 6-step process:
215
+ 1. Understand the requirement - What is the expected goal
216
+ 2. Which tools do I have to help me reaching that goal
217
+ 3. Think about a todo list - What is required to reach the goal
218
+ 4. Work step by step on the todo list
219
+ 5. When human in the loop is active, have a dialog to the user
220
+ 6. Last step - Is the goal reached? If no, create a new todo-list, If yes return the answer
221
+
222
+ The agent supports:
223
+ - A prompt/requirement/task
224
+ - A list of tools
225
+ - A list of dialog-options
226
+ - Human in the loop - yes or no
227
+ - Possible question types for human in the loop (decision, question, dialog_option)
228
+ - Pydantic model for last response
43
229
  """
44
230
 
45
231
  def __init__(
46
- self,
232
+ self,
47
233
  ai_client: Union[OpenRouterClient, ToolClient, MCPClient],
48
- max_iterations: int = 2
234
+ human_in_loop: bool = False,
235
+ dialog_options: list[DialogOption] = None,
236
+ max_task_iterations: int = 10,
237
+ session_handler: SessionHandler = None
49
238
  ) -> None:
50
239
  """
51
240
  Initialize the AgentClient.
52
241
 
53
242
  Args:
54
243
  ai_client: The underlying AI client (OpenRouterClient, ToolClient, or MCPClient)
55
- max_iterations: Maximum number of quality improvement iterations (default: 2)
244
+ human_in_loop: Whether to enable human-in-the-loop interactions
245
+ dialog_options: Available dialog options for human interactions
246
+ max_task_iterations: Maximum number of task execution iterations
247
+ session_handler: Custom session storage handler (defaults to InMemorySessionHandler)
56
248
 
57
249
  Raises:
58
250
  ValueError: If the client doesn't support structured responses (no parse method)
@@ -63,12 +255,20 @@ class AgentClient:
63
255
  f"The provided client {type(ai_client).__name__} does not have a parse method."
64
256
  )
65
257
 
66
- if max_iterations < 0:
67
- raise ValueError("max_iterations must be non-negative")
258
+ if max_task_iterations < 1:
259
+ raise ValueError("max_task_iterations must be positive")
68
260
 
69
261
  self._ai_client = ai_client
70
- self._max_iterations = max_iterations
71
- self._agent_sessions: dict[str, dict[str, Any]] = {}
262
+ self._human_in_loop = human_in_loop
263
+ self._dialog_options = dialog_options or []
264
+ self._max_task_iterations = max_task_iterations
265
+
266
+ # Initialize session handler (default to in-memory if none provided)
267
+ self._session_handler = session_handler or InMemorySessionHandler()
268
+
269
+ # Create helper components
270
+ self._task_manager = TaskManager(ai_client)
271
+ self._dialog_handler = DialogHandler(ai_client)
72
272
 
73
273
  def register_tool(
74
274
  self,
@@ -80,17 +280,14 @@ class AgentClient:
80
280
  """
81
281
  Register a new tool with the underlying AI client.
82
282
 
83
- This method proxies to the register_tool method of ToolClient or MCPClient.
84
-
85
283
  Args:
86
284
  name: The name of the tool
87
285
  description: A description of what the tool does
88
286
  function: The function to call when the tool is used
89
- schema: The JSON schema for the tool's parameters. If None or empty,
90
- will be automatically generated from the function signature.
287
+ schema: The JSON schema for the tool's parameters
91
288
 
92
289
  Raises:
93
- AttributeError: If the underlying client doesn't support tool registration (e.g., OpenRouterClient)
290
+ AttributeError: If the underlying client doesn't support tool registration
94
291
  """
95
292
  if hasattr(self._ai_client, 'register_tool'):
96
293
  self._ai_client.register_tool(name, description, function, schema)
@@ -105,700 +302,714 @@ class AgentClient:
105
302
  """
106
303
  Register an MCP server and load its tools.
107
304
 
108
- This method proxies to the register_mcp_server method of MCPClient.
109
-
110
305
  Args:
111
306
  name: The name of the MCP server
112
307
  base_url: The base URL of the MCP server
113
308
 
114
309
  Raises:
115
- AttributeError: If the underlying client doesn't support MCP server registration (e.g., OpenRouterClient, ToolClient)
310
+ AttributeError: If the underlying client doesn't support MCP server registration
116
311
  """
117
312
  if hasattr(self._ai_client, 'register_mcp_server'):
118
313
  self._ai_client.register_mcp_server(name, base_url)
119
- logger.debug(f"Registered MCP server '{name}' at {base_url} with {type(self._ai_client).__name__}")
314
+ logger.debug(f"Registered MCP server '{name}' at {base_url}")
120
315
  else:
121
316
  raise AttributeError(
122
317
  f"MCP server registration is not supported by {type(self._ai_client).__name__}. "
123
318
  f"Use MCPClient to register MCP servers."
124
319
  )
125
320
 
126
- def _call_ai_parse(self, messages: list[dict[str, Any]], response_format: Type[BaseModel], conversation_history: list[dict[str, Any]] = None) -> Any:
127
- """Call the parse method on the AI client with optional conversation history."""
128
- # Combine conversation history with new messages
129
- if conversation_history:
130
- full_messages = conversation_history + messages
131
- logger.debug(f"🔗 AI call with {len(conversation_history)} history messages + {len(messages)} new messages = {len(full_messages)} total")
132
- else:
133
- full_messages = messages
134
- logger.debug(f"🔗 AI call with {len(messages)} messages (no history)")
135
- return self._ai_client.parse(full_messages, response_format)
136
-
137
- def _validate_answers(self, answers: Any) -> bool:
138
- """
139
- Validate that answers parameter is a proper AnswerList with content.
140
-
141
- Args:
142
- answers: The answers parameter to validate
143
-
144
- Returns:
145
- True if answers is valid and has content, False otherwise
146
- """
147
- # Check if answers is the correct type
148
- if not isinstance(answers, AnswerList):
149
- logger.warning(f"Invalid answers type: {type(answers)}. Expected AnswerList, treating as no answers.")
150
- return False
151
-
152
- # Check if answers has content
153
- if not hasattr(answers, 'answers') or not answers.answers:
154
- logger.info(f"Empty answers list provided, proceeding without answers processing.")
155
- return False
156
-
157
- # Check if answers list contains valid Answer objects
158
- for answer in answers.answers:
159
- if not hasattr(answer, 'key') or not hasattr(answer, 'answer'):
160
- logger.warning(f"Invalid answer object in list: {answer}. Treating as no answers.")
161
- return False
162
-
163
- logger.debug(f"Validated {len(answers.answers)} answers")
164
- return True
165
-
166
- def _extract_token_usage(self, response: Any) -> TokenUsage:
167
- """Extract token usage information from an AI response."""
168
- try:
169
- if hasattr(response, 'usage') and response.usage:
170
- usage = response.usage
171
- return TokenUsage(
172
- prompt_tokens=getattr(usage, 'prompt_tokens', 0),
173
- completion_tokens=getattr(usage, 'completion_tokens', 0),
174
- total_tokens=getattr(usage, 'total_tokens', 0)
175
- )
176
- except (AttributeError, TypeError) as e:
177
- logger.debug(f"Could not extract token usage: {e}")
178
-
179
- return TokenUsage() # Return empty usage if extraction fails
180
-
181
- def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
182
- """Extract the parsed content from the AI response."""
183
- if hasattr(response, 'choices') and len(response.choices) > 0:
184
- choice = response.choices[0]
185
- if hasattr(choice.message, 'parsed') and choice.message.parsed:
186
- return choice.message.parsed
187
- elif hasattr(choice.message, 'content'):
188
- # Try to parse the content as JSON
189
- try:
190
- content_dict = json.loads(choice.message.content)
191
- return response_format(**content_dict)
192
- except (json.JSONDecodeError, TypeError):
193
- # If parsing fails, create a default response
194
- if response_format == QuestionList:
195
- return QuestionList(questions=[])
196
- elif response_format == Result:
197
- return Result(result=choice.message.content)
198
- elif response_format == QualityCheck:
199
- return QualityCheck(is_good=True, feedback="")
200
- else:
201
- # For other formats, try to create with content
202
- return response_format(result=choice.message.content)
203
-
204
- # Fallback - create empty/default response
205
- if response_format == QuestionList:
206
- return QuestionList(questions=[])
207
- elif response_format == Result:
208
- return Result(result="No response generated")
209
- elif response_format == QualityCheck:
210
- return QualityCheck(is_good=True, feedback="")
211
- else:
212
- return response_format()
213
-
214
321
  def agent(
215
322
  self,
216
- prompt: str = None,
217
- final_response_structure: Type[BaseModel] = None,
218
- ask_questions: bool = True,
323
+ prompt: str,
324
+ final_response_structure: Type[BaseModel],
325
+ tools: list[str] = None,
326
+ dialog_options: list[DialogOption] = None,
327
+ human_in_loop: bool = None,
219
328
  agent_id: str = None,
220
- answers: AnswerList | None = None
329
+ human_response: Union[HumanInLoopResponse, HumanInLoopResponseBatch, list[HumanInLoopResponse]] = None
221
330
  ) -> AgentResponse:
222
331
  """
223
- Process a prompt through the agent's thinking process.
332
+ Process a prompt through the enhanced 6-step agent process.
224
333
 
225
334
  Args:
226
- prompt: The prompt from the user (optional if agent_id exists with history)
227
- final_response_structure: Pydantic model defining the expected final response format (required for new sessions)
228
- ask_questions: Whether to ask clarifying questions (default: True)
335
+ prompt: The user's prompt/requirement/task
336
+ final_response_structure: Pydantic model defining the expected final response format
337
+ tools: List of tool names to use (if None, uses all available tools)
338
+ dialog_options: Available dialog options for this session
339
+ human_in_loop: Whether to enable human-in-the-loop (overrides default)
229
340
  agent_id: Optional agent session ID to continue an existing conversation
230
- answers: Optional answers to questions (when continuing a conversation with questions)
341
+ human_response: Response(s) from human for continuing interaction - can be single response, batch, or list
231
342
 
232
343
  Returns:
233
- AgentResponse containing either questions to ask or the final response
234
-
235
- Raises:
236
- ValueError: If neither prompt nor agent_id with history is provided, or if final_response_structure is missing for new sessions
344
+ AgentResponse containing the current state and any required interactions
237
345
  """
238
- # Validate inputs and determine session type
239
- is_existing_session = agent_id is not None and agent_id in self._agent_sessions
240
- existing_session = self._agent_sessions.get(agent_id, {}) if agent_id else {}
241
- conversation_history = existing_session.get("conversation_history", []).copy()
242
-
243
- # Validation logic
346
+ # Setup session
347
+ is_existing_session = agent_id is not None and self._session_handler.session_exists(agent_id)
244
348
  if not is_existing_session:
245
- # New session - both prompt and final_response_structure are required
246
- if not prompt:
247
- raise ValueError("Prompt is required when starting a new agent session")
248
- if not final_response_structure:
249
- raise ValueError("final_response_structure is required when starting a new agent session")
250
-
251
- # Create new agent_id if not provided
252
349
  if agent_id is None:
253
350
  agent_id = str(__import__("uuid").uuid4())
254
- logger.info(f"🚀 Starting new agent process (ID: {agent_id}) with prompt: {prompt[:100]}...")
351
+ logger.info(f"🚀 Starting new agent process (ID: {agent_id})")
255
352
  else:
256
- # Existing session - use previous final_response_structure if not provided
257
- if not final_response_structure:
258
- final_response_structure = existing_session.get("final_response_structure")
259
- if not final_response_structure:
260
- raise ValueError("final_response_structure not found in existing session and not provided")
261
-
262
- # Handle optional prompt for existing sessions
263
- if not prompt:
264
- # Use conversation history to continue without explicit prompt
265
- prompt = "[Continue conversation based on history]"
266
- logger.info(f"🔄 Continuing agent process (ID: {agent_id}) without explicit prompt (using history)")
267
- else:
268
- logger.info(f"🔄 Continuing agent process (ID: {agent_id}) with prompt: {prompt[:100]}...")
269
-
270
- # Initialize token summary
271
- token_summary = TokenSummary()
272
-
273
- if conversation_history:
274
- logger.info(f"📜 Agent {agent_id}: Loaded conversation history with {len(conversation_history)} messages")
275
-
276
- # Store conversation history for AI calls (don't include current prompt yet)
277
- history_for_ai = conversation_history.copy()
353
+ logger.info(f"🔄 Continuing agent process (ID: {agent_id})")
354
+
355
+ # Initialize or get session data
356
+ session = self._session_handler.get_session(agent_id) or {
357
+ "original_prompt": prompt,
358
+ "final_response_structure": final_response_structure,
359
+ "human_in_loop": human_in_loop if human_in_loop is not None else self._human_in_loop,
360
+ "dialog_options": dialog_options or self._dialog_options,
361
+ "conversation_history": [],
362
+ "token_summary": TokenSummary(),
363
+ "state": AgentState.ANALYZING_REQUIREMENT,
364
+ "requirement_analysis": None,
365
+ "tool_analysis": None,
366
+ "todo_list": None,
367
+ "current_task_index": 0,
368
+ "iteration_count": 0
369
+ })
370
+
371
+ # Handle human response(s) if provided
372
+ if human_response:
373
+ session = self._handle_human_responses(session, human_response)
374
+
375
+ # Store session
376
+ self._session_handler.set_session(agent_id, session)
377
+
378
+ # Process based on current state
379
+ return self._process_agent_state(agent_id, session)
380
+
381
+ def _process_agent_state(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
382
+ """Process the agent based on its current state."""
383
+ state = session["state"]
384
+ token_summary = session["token_summary"]
278
385
 
279
- # Add current prompt to full conversation history for session storage
280
- conversation_history.append({"role": "user", "content": prompt})
281
-
282
- # Handle answers provided (skip question generation and process directly)
283
- if answers is not None:
284
- if self._validate_answers(answers):
285
- logger.info(f"📝 Agent {agent_id}: Processing with provided answers, skipping question generation")
286
- return self._process_answers_directly(agent_id, prompt, final_response_structure, answers, token_summary, history_for_ai)
386
+ try:
387
+ if state == AgentState.ANALYZING_REQUIREMENT:
388
+ return self._step1_analyze_requirement(agent_id, session)
389
+ elif state == AgentState.ANALYZING_TOOLS:
390
+ return self._step2_analyze_tools(agent_id, session)
391
+ elif state == AgentState.GENERATING_TODO:
392
+ return self._step3_generate_todo(agent_id, session)
393
+ elif state == AgentState.EXECUTING_TASKS:
394
+ return self._step4_execute_tasks(agent_id, session)
395
+ elif state == AgentState.WAITING_FOR_HUMAN:
396
+ return self._step5_handle_human_interaction(agent_id, session)
397
+ elif state == AgentState.EVALUATING_GOAL:
398
+ return self._step6_evaluate_goal(agent_id, session)
399
+ elif state == AgentState.COMPLETED:
400
+ return AgentResponse(
401
+ agent_id=agent_id,
402
+ state=AgentState.COMPLETED,
403
+ final_response=session.get("final_response"),
404
+ token_summary=token_summary
405
+ )
287
406
  else:
288
- logger.info(f"📝 Agent {agent_id}: Invalid or empty answers provided, proceeding with normal flow")
407
+ # Unknown state, reset to beginning
408
+ session["state"] = AgentState.ANALYZING_REQUIREMENT
409
+ return self._step1_analyze_requirement(agent_id, session)
410
+
411
+ except Exception as e:
412
+ logger.error(f"Error in agent state {state}: {e}")
413
+ session["state"] = AgentState.FAILED
414
+ return AgentResponse(
415
+ agent_id=agent_id,
416
+ state=AgentState.FAILED,
417
+ final_response=f"Agent failed with error: {str(e)}",
418
+ token_summary=token_summary
419
+ )
420
+
421
+ def _step1_analyze_requirement(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
422
+ """Step 1: Understand the requirement - What is the expected goal."""
423
+ logger.info(f"📋 Agent {agent_id}: Step 1 - Analyzing requirement")
424
+
425
+ prompt = session["original_prompt"]
426
+ conversation_history = session["conversation_history"]
289
427
 
290
- # Step 1: Generate questions (if ask_questions is True)
291
- if ask_questions:
292
- logger.info(f"❓ Agent {agent_id}: Analyzing prompt and generating clarifying questions")
293
- questions_prompt = f"""
294
- Understand this prompt and what the user wants to achieve by it:
428
+ analysis_prompt = f"""
429
+ Analyze this user requirement and understand what they want to achieve:
295
430
  ==========
296
431
  {prompt}
297
432
  ==========
298
433
 
299
- Think about useful steps and which information are required for it. First ask for required information and details to improve that process, when that is useful for the given case. When it's not useful, return an empty list of questions.
300
- Use available tools to gather information or perform actions that would improve your response.
301
- Analyze the prompt carefully and determine if additional information would significantly improve the quality of the response. Only ask questions that are truly necessary and would materially impact the outcome.
434
+ Break down the requirement into:
435
+ 1. The main goal the user wants to achieve
436
+ 2. Sub-goals that contribute to the main goal
437
+ 3. Success criteria to determine if the goal is achieved
438
+ 4. Any constraints or limitations to consider
439
+ 5. Complexity estimate (1-10 scale, where 1 is trivial and 10 is extremely complex)
302
440
 
303
- IMPORTANT: For each question, provide a technical key identifier that:
304
- - Uses only alphanumeric characters and underscores
305
- - Starts with a letter
306
- - Is descriptive but concise (e.g., "user_name", "email_address", "preferred_genre", "budget_range")
307
- - Contains no spaces, hyphens, or special characters like ?, !, @, etc.
441
+ Provide a comprehensive analysis of what the user wants to accomplish.
308
442
  """
443
+
444
+ messages = [{"role": "user", "content": analysis_prompt}]
445
+
446
+ try:
447
+ response = self._ai_client.parse(conversation_history + messages, RequirementAnalysis)
448
+ requirement_analysis = self._extract_parsed_content(response, RequirementAnalysis)
449
+ token_usage = self._extract_token_usage(response)
309
450
 
310
- messages = [{"role": "user", "content": questions_prompt}]
451
+ # Update session
452
+ session["requirement_analysis"] = requirement_analysis
453
+ session["token_summary"].requirement_analysis = token_usage
454
+ session["state"] = AgentState.ANALYZING_TOOLS
311
455
 
312
- try:
313
- response = self._call_ai_parse(messages, QuestionList, history_for_ai)
314
- question_list = self._extract_parsed_content(response, QuestionList)
315
-
316
- # Extract token usage for question generation
317
- token_summary.question_generation = self._extract_token_usage(response)
318
-
319
- logger.info(f"❓ Agent {agent_id}: Generated {len(question_list.questions)} questions (tokens: {token_summary.question_generation.total_tokens})")
320
-
321
- # If we have questions, return them to the user
322
- if question_list.questions:
323
- agent_response = AgentResponse(agent_id=agent_id, questions=question_list.questions, token_summary=token_summary)
324
- # Store the session for continuation
325
- self._agent_sessions[agent_response.agent_id] = {
326
- "original_prompt": prompt,
327
- "final_response_structure": final_response_structure,
328
- "questions": question_list.questions,
329
- "step": "waiting_for_answers",
330
- "token_summary": token_summary,
331
- "conversation_history": history_for_ai # Include history without current prompt
332
- }
333
- logger.info(f"📋 Agent {agent_id}: Waiting for user answers to {len(question_list.questions)} questions")
334
- return agent_response
335
-
336
- except Exception as e:
337
- logger.warning(f"Failed to generate questions: {e}. Proceeding without questions.")
338
-
339
- # Step 2 & 3: No questions or ask_questions=False - proceed directly
340
- logger.info(f"⚡ Agent {agent_id}: No questions needed, proceeding directly to processing")
341
- return self._process_with_answers(prompt, final_response_structure, [], agent_id, token_summary, history_for_ai)
342
-
343
- def _process_answers_directly(
344
- self,
345
- agent_id: str,
346
- prompt: str,
347
- final_response_structure: Type[BaseModel],
348
- answers: AnswerList,
349
- token_summary: TokenSummary,
350
- conversation_history: list[dict[str, Any]]
351
- ) -> AgentResponse:
352
- """
353
- Process answers directly without going through question generation.
354
-
355
- Args:
356
- agent_id: The agent session identifier
357
- prompt: The current prompt
358
- final_response_structure: Expected response structure
359
- answers: Provided answers
360
- token_summary: Current token usage summary
361
- conversation_history: Conversation history
456
+ logger.info(f"📋 Agent {agent_id}: Requirement analysis completed - Goal: {requirement_analysis.goal}")
362
457
 
363
- Returns:
364
- AgentResponse with the final result
365
- """
366
- # Check if we have a session with questions to match against
367
- session = self._agent_sessions.get(agent_id, {})
368
- questions = session.get("questions", [])
369
-
370
- if not questions:
371
- # No previous questions - treat as simple additional context
372
- logger.info(f"📝 Agent {agent_id}: No previous questions found, treating answers as additional context")
373
- answer_dict = {answer.key: answer.answer for answer in answers.answers}
374
- qa_pairs = []
375
- for answer in answers.answers:
376
- qa_pairs.append({
377
- "question": f"Information about {answer.key}",
378
- "key": answer.key,
379
- "answer": answer.answer,
380
- "required": True
381
- })
458
+ # Continue to next step
459
+ return self._step2_analyze_tools(agent_id, session)
460
+
461
+ except Exception as e:
462
+ logger.error(f"Failed to analyze requirement: {e}")
463
+ session["state"] = AgentState.FAILED
464
+ return AgentResponse(
465
+ agent_id=agent_id,
466
+ state=AgentState.FAILED,
467
+ final_response=f"Failed to analyze requirement: {str(e)}",
468
+ token_summary=session["token_summary"]
469
+ )
470
+
471
+ def _step2_analyze_tools(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
472
+ """Step 2: Which tools do I have to help me reach that goal."""
473
+ logger.info(f"🔧 Agent {agent_id}: Step 2 - Analyzing available tools")
474
+
475
+ requirement_analysis = session["requirement_analysis"]
476
+ conversation_history = session["conversation_history"]
477
+
478
+ # Get available tools
479
+ available_tools = self._get_available_tools()
480
+
481
+ tools_text = "Available Tools:\n"
482
+ if available_tools:
483
+ for tool_name, tool_desc in available_tools.items():
484
+ tools_text += f"- {tool_name}: {tool_desc}\n"
382
485
  else:
383
- # Match answers with previous questions
384
- logger.info(f"📝 Agent {agent_id}: Matching {len(answers.answers)} answers with previous questions")
385
- answer_dict = {answer.key: answer.answer for answer in answers.answers}
386
-
387
- # Create question-answer pairs for better context
388
- qa_pairs = []
389
- for question in questions:
390
- answer_text = answer_dict.get(question.key, "No answer provided")
391
- qa_pairs.append({
392
- "question": question.question,
393
- "key": question.key,
394
- "answer": answer_text,
395
- "required": question.required
396
- })
397
-
398
- # Process with the provided answers and question context
399
- result = self._process_with_answers(
400
- prompt,
401
- final_response_structure,
402
- qa_pairs,
403
- agent_id,
404
- token_summary,
405
- conversation_history
406
- )
486
+ tools_text += "No tools are currently available.\n"
407
487
 
408
- # Note: History management is now handled in _process_with_answers
409
- # No need to duplicate history management here
410
- return result
488
+ analysis_prompt = f"""
489
+ Given this goal analysis:
490
+ Goal: {requirement_analysis.goal}
491
+ Sub-goals: {', '.join(requirement_analysis.sub_goals)}
492
+ Success Criteria: {', '.join(requirement_analysis.success_criteria)}
411
493
 
412
- def _format_qa_context_for_quality_check(self, answers: Union[list, dict[str, str]]) -> str:
413
- """
414
- Format question-answer context for quality check and improvement prompts.
415
-
416
- Args:
417
- answers: Question-answer pairs or simple answers
418
-
419
- Returns:
420
- Formatted context text
421
- """
422
- if not answers:
423
- return ""
424
-
425
- if isinstance(answers, list) and answers:
426
- # Check if it's a list of question-answer pairs (enhanced format)
427
- if isinstance(answers[0], dict) and "question" in answers[0]:
428
- context_text = "\nContext Information (Questions & Answers):\n"
429
- context_text += "The response was generated with the following additional context:\n\n"
430
- for i, qa_pair in enumerate(answers, 1):
431
- question = qa_pair.get("question", "Unknown question")
432
- answer = qa_pair.get("answer", "No answer provided")
433
- required = qa_pair.get("required", True)
434
-
435
- status_marker = "🔴 REQUIRED" if required else "🟡 OPTIONAL"
436
- context_text += f"{i}. {status_marker} Q: {question}\n"
437
- context_text += f" A: {answer}\n\n"
438
- return context_text
439
- else:
440
- # Legacy format - simple list
441
- return f"\nAdditional context: {', '.join(str(a) for a in answers)}\n\n"
442
- elif isinstance(answers, dict) and answers:
443
- # Legacy format - simple dict
444
- context_text = "\nAdditional context provided:\n"
445
- for key, answer in answers.items():
446
- context_text += f"- {key}: {answer}\n"
447
- return context_text + "\n"
448
-
449
- return ""
450
-
451
- def _process_with_answers(
452
- self,
453
- prompt: str,
454
- final_response_structure: Type[BaseModel],
455
- answers: Union[list, dict[str, str]],
456
- agent_id: str,
457
- token_summary: TokenSummary,
458
- conversation_history: list[dict[str, Any]] = None
459
- ) -> AgentResponse:
460
- """
461
- Process the prompt with answers through the thinking pipeline.
494
+ And these available tools:
495
+ {tools_text}
462
496
 
463
- Args:
464
- prompt: The original prompt
465
- final_response_structure: Expected final response structure
466
- answers: Question-answer pairs or simple answers (empty if no questions were asked)
467
- agent_id: The agent session identifier
468
- token_summary: Current token usage summary
469
- conversation_history: Optional conversation history for dialog context
497
+ Analyze which tools are relevant for achieving this goal:
498
+ 1. List the relevant tools and explain how each helps achieve the goal
499
+ 2. Map each tool to its specific purpose for this goal
500
+ 3. Identify any missing capabilities that aren't covered by available tools
470
501
 
471
- Returns:
472
- AgentResponse with the final result
473
- """
474
- if conversation_history is None:
475
- conversation_history = []
476
-
477
- # Step 3: Process the prompt with thinking
478
- logger.info(f"🧠 Agent {agent_id}: Processing prompt and generating initial response")
479
- result = self._think_and_process(prompt, answers, agent_id, token_summary, conversation_history)
502
+ Provide a comprehensive tool analysis.
503
+ """
480
504
 
481
- # Step 4: Quality check and iteration
482
- final_result = self._quality_check_and_iterate(prompt, result, answers, agent_id, token_summary, conversation_history)
505
+ messages = [{"role": "user", "content": analysis_prompt}]
483
506
 
484
- # Step 5: Generate final answer in requested format
485
- logger.info(f"📝 Agent {agent_id}: Generating final structured response")
486
- final_response = self._generate_final_response(prompt, final_result, final_response_structure, agent_id, token_summary, conversation_history)
487
-
488
- # Update session with the final response in conversation history
489
- if agent_id in self._agent_sessions:
490
- # Update conversation history with assistant response
491
- updated_history = conversation_history.copy()
492
- updated_history.append({"role": "assistant", "content": str(final_response)})
493
-
494
- self._agent_sessions[agent_id]["conversation_history"] = updated_history
495
- self._agent_sessions[agent_id]["step"] = "completed"
496
- self._agent_sessions[agent_id]["token_summary"] = token_summary
497
- self._agent_sessions[agent_id]["final_response_structure"] = final_response_structure
498
- logger.info(f"💾 Agent {agent_id}: Updated session with conversation history ({len(updated_history)} messages)")
499
- else:
500
- # Create new session if it doesn't exist
501
- updated_history = conversation_history.copy()
502
- updated_history.append({"role": "assistant", "content": str(final_response)})
507
+ try:
508
+ response = self._ai_client.parse(conversation_history + messages, ToolAnalysis)
509
+ tool_analysis = self._extract_parsed_content(response, ToolAnalysis)
510
+ token_usage = self._extract_token_usage(response)
511
+
512
+ # Update session
513
+ session["tool_analysis"] = tool_analysis
514
+ session["token_summary"].tool_analysis = token_usage
515
+ session["state"] = AgentState.GENERATING_TODO
516
+
517
+ logger.info(f"🔧 Agent {agent_id}: Tool analysis completed - {len(tool_analysis.relevant_tools)} relevant tools")
518
+
519
+ # Continue to next step
520
+ return self._step3_generate_todo(agent_id, session)
503
521
 
504
- self._agent_sessions[agent_id] = {
505
- "step": "completed",
506
- "conversation_history": updated_history,
507
- "token_summary": token_summary,
508
- "final_response_structure": final_response_structure
509
- }
510
- logger.info(f"💾 Agent {agent_id}: Created new session with conversation history ({len(updated_history)} messages)")
522
+ except Exception as e:
523
+ logger.error(f"Failed to analyze tools: {e}")
524
+ session["state"] = AgentState.FAILED
525
+ return AgentResponse(
526
+ agent_id=agent_id,
527
+ state=AgentState.FAILED,
528
+ final_response=f"Failed to analyze tools: {str(e)}",
529
+ token_summary=session["token_summary"]
530
+ )
531
+
532
+ def _step3_generate_todo(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
533
+ """Step 3: Think about a todo list - What is required to reach the goal."""
534
+ logger.info(f"📝 Agent {agent_id}: Step 3 - Generating todo list")
511
535
 
512
- # Log final token summary
513
- logger.info(f"📊 Agent {agent_id}: Token usage summary - Total: {token_summary.total_tokens} "
514
- f"(Prompt: {token_summary.total_prompt_tokens}, Completion: {token_summary.total_completion_tokens})")
536
+ requirement_analysis = session["requirement_analysis"]
537
+ tool_analysis = session["tool_analysis"]
538
+ conversation_history = session["conversation_history"]
515
539
 
516
- return AgentResponse(agent_id=agent_id, final_response=final_response, token_summary=token_summary)
540
+ try:
541
+ todo_list, token_usage = self._task_manager.generate_todo_list(
542
+ requirement_analysis,
543
+ tool_analysis,
544
+ conversation_history
545
+ )
546
+
547
+ # Update session
548
+ session["todo_list"] = todo_list
549
+ session["token_summary"].todo_generation = token_usage
550
+ session["current_task_index"] = 0
551
+ session["state"] = AgentState.EXECUTING_TASKS
552
+
553
+ logger.info(f"📝 Agent {agent_id}: Todo list generated with {len(todo_list.tasks)} tasks")
554
+
555
+ # Return current state for visibility
556
+ return AgentResponse(
557
+ agent_id=agent_id,
558
+ state=AgentState.EXECUTING_TASKS,
559
+ requirement_analysis=requirement_analysis,
560
+ tool_analysis=tool_analysis,
561
+ todo_list=todo_list,
562
+ token_summary=session["token_summary"]
563
+ )
564
+
565
+ except Exception as e:
566
+ logger.error(f"Failed to generate todo list: {e}")
567
+ session["state"] = AgentState.FAILED
568
+ return AgentResponse(
569
+ agent_id=agent_id,
570
+ state=AgentState.FAILED,
571
+ final_response=f"Failed to generate todo list: {str(e)}",
572
+ token_summary=session["token_summary"]
573
+ )
517
574
 
518
- def _think_and_process(self, prompt: str, answers: Union[list, dict[str, str]], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> str:
519
- """
520
- Process the prompt with thinking.
575
+ def _step4_execute_tasks(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
576
+ """Step 4: Work step by step on the todo list."""
577
+ logger.info(f"⚡ Agent {agent_id}: Step 4 - Executing tasks")
578
+
579
+ todo_list = session["todo_list"]
580
+ conversation_history = session["conversation_history"]
581
+
582
+ # Check if we've exceeded iteration limit
583
+ session["iteration_count"] = session.get("iteration_count", 0) + 1
584
+ if session["iteration_count"] > self._max_task_iterations:
585
+ logger.warning(f"Agent {agent_id}: Reached max task iterations, moving to goal evaluation")
586
+ session["state"] = AgentState.EVALUATING_GOAL
587
+ return self._step6_evaluate_goal(agent_id, session)
588
+
589
+ # Get next task to execute
590
+ next_task = todo_list.get_next_task()
591
+
592
+ if not next_task:
593
+ # No more tasks, move to goal evaluation
594
+ logger.info(f"⚡ Agent {agent_id}: All tasks completed, moving to goal evaluation")
595
+ session["state"] = AgentState.EVALUATING_GOAL
596
+ return self._step6_evaluate_goal(agent_id, session)
597
+
598
+ # Mark task as in progress
599
+ next_task.status = TaskStatus.IN_PROGRESS
600
+ session["current_task"] = next_task
601
+
602
+ logger.info(f"⚡ Agent {agent_id}: Executing task '{next_task.title}'")
603
+
604
+ # Check if human interaction is needed for this task
605
+ if session["human_in_loop"] and self._should_request_human_interaction(next_task, session):
606
+ session["state"] = AgentState.WAITING_FOR_HUMAN
607
+ return self._step5_handle_human_interaction(agent_id, session)
608
+
609
+ # Execute the task
610
+ try:
611
+ result, token_usage = self._execute_task(next_task, session, conversation_history)
612
+
613
+ # Update task status
614
+ next_task.status = TaskStatus.COMPLETED
615
+ next_task.result = result
616
+
617
+ # Track token usage
618
+ session["token_summary"].task_execution.append(token_usage)
619
+
620
+ logger.info(f"⚡ Agent {agent_id}: Task '{next_task.title}' completed")
621
+
622
+ # Continue with next task
623
+ return self._step4_execute_tasks(agent_id, session)
624
+
625
+ except Exception as e:
626
+ logger.error(f"Task execution failed: {e}")
627
+ next_task.status = TaskStatus.FAILED
628
+ next_task.error_message = str(e)
629
+
630
+ # Continue with next task (skip failed one)
631
+ return self._step4_execute_tasks(agent_id, session)
521
632
 
522
- Args:
523
- prompt: The original prompt
524
- answers: Question-answer pairs or simple answers
525
- agent_id: The agent session identifier
526
- token_summary: Current token usage summary
527
- conversation_history: Optional conversation history for dialog context
633
+ def _step5_handle_human_interaction(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
634
+ """Step 5: When human in the loop is active, have a dialog to the user."""
635
+ logger.info(f"👤 Agent {agent_id}: Step 5 - Handling human interaction")
636
+
637
+ current_task = session.get("current_task")
638
+ dialog_options = session.get("dialog_options", [])
639
+
640
+ # Create human interaction request
641
+ context = f"I need your input for the task: {current_task.title if current_task else 'General question'}"
642
+ if current_task:
643
+ context += f"\nTask description: {current_task.description}"
644
+
645
+ # Determine interaction type based on task and context
646
+ interaction_type = self._determine_interaction_type(current_task, session)
647
+
648
+ human_request = self._dialog_handler.create_human_interaction_request(
649
+ interaction_type=interaction_type,
650
+ context=context,
651
+ task=current_task,
652
+ available_dialog_options=dialog_options
653
+ )
654
+
655
+ session["pending_human_request"] = human_request
656
+
657
+ return AgentResponse(
658
+ agent_id=agent_id,
659
+ state=AgentState.WAITING_FOR_HUMAN,
660
+ current_task=current_task,
661
+ human_interaction_request=human_request,
662
+ todo_list=session["todo_list"],
663
+ token_summary=session["token_summary"]
664
+ )
528
665
 
529
- Returns:
530
- The AI's result
531
- """
532
- if conversation_history is None:
533
- conversation_history = []
534
- # Format answers for the prompt with enhanced context
535
- answers_text = ""
536
- if isinstance(answers, list) and answers:
537
- # Check if it's a list of question-answer pairs (enhanced format)
538
- if answers and isinstance(answers[0], dict) and "question" in answers[0]:
539
- answers_text = "\n\nQuestion-Answer Context:\n"
540
- answers_text += "The following questions were asked to gather more information, along with the answers provided:\n\n"
541
- for i, qa_pair in enumerate(answers, 1):
542
- question = qa_pair.get("question", "Unknown question")
543
- answer = qa_pair.get("answer", "No answer provided")
544
- key = qa_pair.get("key", "")
545
- required = qa_pair.get("required", True)
546
-
547
- status_marker = "🔴 REQUIRED" if required else "🟡 OPTIONAL"
548
- answers_text += f"{i}. {status_marker} Question: {question}\n"
549
- answers_text += f" Answer: {answer}\n"
550
- if key:
551
- answers_text += f" (Key: {key})\n"
552
- answers_text += "\n"
553
- else:
554
- # Legacy format - simple list
555
- answers_text = f"\n\nAdditional information: {', '.join(str(a) for a in answers)}\n"
556
- elif isinstance(answers, dict) and answers:
557
- # Legacy format - simple dict
558
- answers_text = "\n\nAdditional information provided:\n"
559
- for key, answer in answers.items():
560
- answers_text += f"- {key}: {answer}\n"
561
-
562
- thinking_prompt = f"""
563
- Think about this prompt, the goal and the steps required to fulfill it:
564
- ==========
565
- {prompt}
566
- ==========
567
- {answers_text}
666
+ def _step6_evaluate_goal(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
667
+ """Step 6: Is the goal reached? If no, create a new todo-list, If yes return the answer."""
668
+ logger.info(f"🎯 Agent {agent_id}: Step 6 - Evaluating goal achievement")
669
+
670
+ requirement_analysis = session["requirement_analysis"]
671
+ todo_list = session["todo_list"]
672
+ conversation_history = session["conversation_history"]
673
+
674
+ # Prepare task results summary
675
+ completed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.COMPLETED]
676
+ failed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.FAILED]
677
+
678
+ tasks_summary = "Completed Tasks:\n"
679
+ for task in completed_tasks:
680
+ tasks_summary += f"- {task.title}: {task.result or 'Completed'}\n"
681
+
682
+ if failed_tasks:
683
+ tasks_summary += "\nFailed Tasks:\n"
684
+ for task in failed_tasks:
685
+ tasks_summary += f"- {task.title}: {task.error_message or 'Failed'}\n"
686
+
687
+ evaluation_prompt = f"""
688
+ Evaluate whether the original goal has been achieved based on the work completed:
568
689
 
569
- Consider the prompt carefully, analyze what the user wants to achieve, and think through the best approach to provide a comprehensive and helpful response.
690
+ Original Goal: {requirement_analysis.goal}
691
+ Success Criteria: {', '.join(requirement_analysis.success_criteria)}
570
692
 
571
- IMPORTANT: When formulating your response, take into account both the original prompt AND the specific questions that were asked along with their answers. The questions reveal what additional information was deemed necessary, and the answers provide crucial context that should inform your response.
693
+ {tasks_summary}
572
694
 
573
- Use any available tools to gather information or perform actions that would improve your response.
695
+ Determine:
696
+ 1. Whether the main goal has been achieved (true/false)
697
+ 2. Percentage of goal completion (0-100)
698
+ 3. Which success criteria have been met
699
+ 4. Which success criteria still need to be met
700
+ 5. Detailed feedback on the goal achievement
701
+ 6. Next steps if the goal is not fully achieved
574
702
 
575
- Provide your best result for the given prompt, incorporating all the context from the question-answer pairs.
703
+ Provide a comprehensive evaluation of goal achievement.
576
704
  """
577
705
 
578
- messages = [{"role": "user", "content": thinking_prompt}]
706
+ messages = [{"role": "user", "content": evaluation_prompt}]
579
707
 
580
708
  try:
581
- response = self._call_ai_parse(messages, Result, conversation_history)
582
- result_obj = self._extract_parsed_content(response, Result)
709
+ response = self._ai_client.parse(conversation_history + messages, GoalEvaluation)
710
+ goal_evaluation = self._extract_parsed_content(response, GoalEvaluation)
711
+ token_usage = self._extract_token_usage(response)
712
+
713
+ # Update session
714
+ session["goal_evaluation"] = goal_evaluation
715
+ session["token_summary"].goal_evaluation = token_usage
583
716
 
584
- # Track token usage for thinking process
585
- token_summary.thinking_process = self._extract_token_usage(response)
586
- logger.info(f"🧠 Agent {agent_id}: Thinking completed (tokens: {token_summary.thinking_process.total_tokens})")
717
+ if goal_evaluation.goal_achieved:
718
+ # Goal achieved, generate final response
719
+ logger.info(f"🎯 Agent {agent_id}: Goal achieved! Generating final response")
720
+ return self._generate_final_response(agent_id, session)
721
+ else:
722
+ # Goal not achieved, complete with partial success for now
723
+ logger.info(f"🎯 Agent {agent_id}: Goal not fully achieved ({goal_evaluation.completion_percentage}%), completing with current progress")
724
+ return self._generate_final_response(agent_id, session)
587
725
 
588
- return result_obj.result
589
726
  except Exception as e:
590
- logger.error(f"Error in thinking process: {e}")
591
- raise RuntimeError(f"Failed to process prompt with AI client: {e}") from e
592
-
593
- def _quality_check_and_iterate(self, prompt: str, result: str, answers: Union[list, dict[str, str]], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> str:
594
- """
595
- Check the quality of the result and iterate if needed.
596
-
597
- Args:
598
- prompt: The original prompt
599
- result: The current result
600
- answers: Question-answer pairs or simple answers
601
- agent_id: The agent session identifier
602
- token_summary: Current token usage summary
603
- conversation_history: Optional conversation history for dialog context
727
+ logger.error(f"Failed to evaluate goal: {e}")
728
+ session["state"] = AgentState.FAILED
729
+ return AgentResponse(
730
+ agent_id=agent_id,
731
+ state=AgentState.FAILED,
732
+ final_response=f"Failed to evaluate goal: {str(e)}",
733
+ token_summary=session["token_summary"]
734
+ )
604
735
 
605
- Returns:
606
- The final improved result
607
- """
608
- if conversation_history is None:
609
- conversation_history = []
610
-
611
- current_result = result
736
+ def _generate_final_response(self, agent_id: str, session: dict[str, Any]) -> AgentResponse:
737
+ """Generate the final response in the requested format."""
738
+ logger.info(f"📝 Agent {agent_id}: Generating final structured response")
612
739
 
613
- if self._max_iterations == 0:
614
- logger.info(f"✅ Agent {agent_id}: Skipping quality check (max_iterations=0)")
615
- return current_result
740
+ requirement_analysis = session["requirement_analysis"]
741
+ goal_evaluation = session.get("goal_evaluation")
742
+ todo_list = session["todo_list"]
743
+ final_response_structure = session["final_response_structure"]
744
+ conversation_history = session["conversation_history"]
616
745
 
617
- logger.info(f"🔍 Agent {agent_id}: Starting quality check and improvement process (max iterations: {self._max_iterations})")
746
+ # Prepare summary of work done
747
+ completed_tasks = [task for task in todo_list.tasks if task.status == TaskStatus.COMPLETED]
748
+ work_summary = "Work completed:\n"
749
+ for task in completed_tasks:
750
+ work_summary += f"- {task.title}: {task.result or 'Completed'}\n"
618
751
 
619
- # Format context information for quality checks
620
- context_text = self._format_qa_context_for_quality_check(answers)
752
+ evaluation_text = ""
753
+ if goal_evaluation:
754
+ evaluation_text = f"\nGoal Achievement: {goal_evaluation.completion_percentage}% complete\nFeedback: {goal_evaluation.feedback}"
621
755
 
622
- for iteration in range(self._max_iterations):
623
- quality_prompt = f"""
624
- Given this original prompt:
625
- ==========
626
- {prompt}
627
- ==========
628
- {context_text}
629
- And this result:
630
- ==========
631
- {current_result}
632
- ==========
633
-
634
- Is this result good and comprehensive, or does it need to be improved? Consider if the response fully addresses the prompt, provides sufficient detail, and would be helpful to the user.
756
+ final_prompt = f"""
757
+ Based on the work completed for this goal:
758
+ Goal: {requirement_analysis.goal}
635
759
 
636
- IMPORTANT: Also evaluate whether the result properly incorporates and addresses the information provided through the question-answer pairs above. The response should demonstrate that it has taken this additional context into account.
760
+ {work_summary}{evaluation_text}
637
761
 
638
- Evaluate the quality and provide feedback if improvements are needed.
762
+ Generate the final response in the exact format requested. Summarize the results and provide a comprehensive answer that addresses the original requirement.
639
763
  """
764
+
765
+ messages = [{"role": "user", "content": final_prompt}]
766
+
767
+ try:
768
+ response = self._ai_client.parse(conversation_history + messages, final_response_structure)
769
+ final_response = self._extract_parsed_content(response, final_response_structure)
770
+ token_usage = self._extract_token_usage(response)
640
771
 
641
- messages = [{"role": "user", "content": quality_prompt}]
772
+ # Update session
773
+ session["token_summary"].final_response = token_usage
774
+ session["final_response"] = final_response
775
+ session["state"] = AgentState.COMPLETED
776
+
777
+ logger.info(f"📝 Agent {agent_id}: Final response generated successfully")
778
+
779
+ return AgentResponse(
780
+ agent_id=agent_id,
781
+ state=AgentState.COMPLETED,
782
+ final_response=final_response,
783
+ goal_evaluation=goal_evaluation,
784
+ token_summary=session["token_summary"]
785
+ )
642
786
 
787
+ except Exception as e:
788
+ logger.error(f"Failed to generate final response: {e}")
789
+ # Fallback response
643
790
  try:
644
- response = self._call_ai_parse(messages, QualityCheck, conversation_history)
645
- quality_check = self._extract_parsed_content(response, QualityCheck)
646
-
647
- # Track token usage for quality check
648
- quality_check_tokens = self._extract_token_usage(response)
649
- token_summary.quality_checks.append(quality_check_tokens)
650
-
651
- if quality_check.is_good:
652
- logger.info(f"✅ Agent {agent_id}: Quality check passed on iteration {iteration + 1} (tokens: {quality_check_tokens.total_tokens})")
653
- break
654
-
655
- logger.info(f"🔄 Agent {agent_id}: Quality check iteration {iteration + 1} - Improvements needed: {quality_check.feedback[:100]}... (tokens: {quality_check_tokens.total_tokens})")
791
+ fallback_response = self._create_fallback_response(final_response_structure, str(e))
792
+ session["final_response"] = fallback_response
793
+ session["state"] = AgentState.COMPLETED
656
794
 
657
- # Improve the result
658
- improvement_prompt = f"""
659
- The original prompt was:
660
- ==========
661
- {prompt}
662
- ==========
663
- {context_text}
664
- The current result is:
665
- ==========
666
- {current_result}
667
- ==========
795
+ return AgentResponse(
796
+ agent_id=agent_id,
797
+ state=AgentState.COMPLETED,
798
+ final_response=fallback_response,
799
+ token_summary=session["token_summary"]
800
+ )
801
+ except Exception as fallback_error:
802
+ logger.error(f"Fallback response creation failed: {fallback_error}")
803
+ session["state"] = AgentState.FAILED
804
+ return AgentResponse(
805
+ agent_id=agent_id,
806
+ state=AgentState.FAILED,
807
+ final_response=f"Failed to generate response: {str(e)}",
808
+ token_summary=session["token_summary"]
809
+ )
668
810
 
669
- Feedback for improvement:
670
- ==========
671
- {quality_check.feedback}
672
- ==========
811
+ # Essential helper methods
812
+ def _handle_human_responses(self, session: dict[str, Any], human_response: Union[HumanInLoopResponse, HumanInLoopResponseBatch, list[HumanInLoopResponse]]) -> dict[str, Any]:
813
+ """Handle human response(s) - supports single response, batch, or list."""
814
+ # Normalize input to a list of responses
815
+ responses = []
816
+
817
+ if isinstance(human_response, HumanInLoopResponse):
818
+ responses = [human_response]
819
+ elif isinstance(human_response, HumanInLoopResponseBatch):
820
+ responses = human_response.responses
821
+ elif isinstance(human_response, list):
822
+ responses = human_response
823
+ else:
824
+ logger.warning(f"Unknown human response type: {type(human_response)}")
825
+ return session
826
+
827
+ # Process each response
828
+ for response in responses:
829
+ session = self._handle_human_response(session, response)
830
+
831
+ return session
673
832
 
674
- Please provide an improved version that addresses the feedback while maintaining the strengths of the current result. Make sure to incorporate all the context from the question-answer pairs above.
675
- """
676
-
677
- messages = [{"role": "user", "content": improvement_prompt}]
678
- improvement_response = self._call_ai_parse(messages, Result, conversation_history)
679
- result_obj = self._extract_parsed_content(improvement_response, Result)
680
- current_result = result_obj.result
681
-
682
- # Track token usage for improvement
683
- improvement_tokens = self._extract_token_usage(improvement_response)
684
- token_summary.improvements.append(improvement_tokens)
685
-
686
- logger.info(f"⚡ Agent {agent_id}: Improvement iteration {iteration + 1} completed (tokens: {improvement_tokens.total_tokens})")
687
-
688
- except Exception as e:
689
- logger.warning(f"Error in quality check iteration {iteration}: {e}")
690
- break
833
+ def _handle_human_response(self, session: dict[str, Any], human_response: HumanInLoopResponse) -> dict[str, Any]:
834
+ """Handle human response and update session accordingly."""
835
+ pending_request = session.get("pending_human_request")
836
+ if not pending_request or pending_request.id != human_response.interaction_id:
837
+ logger.warning("Received human response for unknown or expired interaction")
838
+ return session
691
839
 
692
- total_quality_tokens = sum(usage.total_tokens for usage in token_summary.quality_checks)
693
- total_improvement_tokens = sum(usage.total_tokens for usage in token_summary.improvements)
694
- logger.info(f"🏁 Agent {agent_id}: Quality check completed - {len(token_summary.quality_checks)} checks, {len(token_summary.improvements)} improvements (Quality tokens: {total_quality_tokens}, Improvement tokens: {total_improvement_tokens})")
840
+ # For now, just continue with execution
841
+ session["state"] = AgentState.EXECUTING_TASKS
842
+ session.pop("pending_human_request", None)
695
843
 
696
- return current_result
844
+ # Store human input for context
845
+ if human_response.answer:
846
+ session["human_context"] = session.get("human_context", "") + f"\nUser: {human_response.answer}"
847
+ if human_response.additional_context:
848
+ session["human_context"] = session.get("human_context", "") + f"\nContext: {human_response.additional_context}"
849
+
850
+ return session
697
851
 
698
- def _generate_final_response(self, prompt: str, result: str, final_response_structure: Type[BaseModel], agent_id: str, token_summary: TokenSummary, conversation_history: list[dict[str, Any]] = None) -> BaseModel:
699
- """
700
- Generate the final response in the requested format.
852
+ def _should_request_human_interaction(self, task: Task, session: dict[str, Any]) -> bool:
853
+ """Determine if human interaction is needed for a task."""
854
+ # Request human interaction for high complexity tasks
855
+ if task.estimated_complexity >= 4:
856
+ return True
857
+
858
+ # Check if task requires capabilities we don't have
859
+ tool_analysis = session.get("tool_analysis")
860
+ if tool_analysis and tool_analysis.missing_capabilities:
861
+ for capability in tool_analysis.missing_capabilities:
862
+ if capability.lower() in task.description.lower():
863
+ return True
864
+
865
+ return False
701
866
 
702
- Args:
703
- prompt: The original prompt
704
- result: The processed result
705
- final_response_structure: The expected response structure
706
- agent_id: The agent session identifier
707
- token_summary: Current token usage summary
708
- conversation_history: Optional conversation history for dialog context
867
+ def _determine_interaction_type(self, task: Optional[Task], session: dict[str, Any]) -> HumanInteractionType:
868
+ """Determine the appropriate interaction type for a task."""
869
+ if not task:
870
+ return HumanInteractionType.QUESTION
871
+
872
+ # If we have dialog options available, prefer those
873
+ dialog_options = session.get("dialog_options", [])
874
+ if dialog_options:
875
+ return HumanInteractionType.DIALOG_OPTION
876
+
877
+ # For high complexity tasks, use decision
878
+ if task.estimated_complexity >= 4:
879
+ return HumanInteractionType.DECISION
880
+
881
+ # Default to question
882
+ return HumanInteractionType.QUESTION
709
883
 
710
- Returns:
711
- The final response in the requested format
712
- """
713
- if conversation_history is None:
714
- conversation_history = []
715
- final_prompt = f"""
716
- Given this original prompt:
717
- ==========
718
- {prompt}
719
- ==========
884
+ def _execute_task(self, task: Task, session: dict[str, Any], conversation_history: list[dict[str, Any]]) -> tuple[str, TokenUsage]:
885
+ """Execute a task and return the result."""
886
+ # Prepare context
887
+ requirement = session["requirement_analysis"]
888
+ human_context = session.get("human_context", "")
889
+
890
+ execution_prompt = f"""
891
+ Execute this task to help achieve the goal:
892
+ Goal: {requirement.goal}
893
+ Task: {task.title}
894
+ Description: {task.description}
895
+ Tools needed: {', '.join(task.tools_needed) if task.tools_needed else 'None'}
720
896
 
721
- And this processed result:
722
- ==========
723
- {result}
724
- ==========
897
+ {human_context}
725
898
 
726
- Generate the final answer in the exact format requested. Make sure the response is well-structured and addresses all aspects of the original prompt.
899
+ Use any available tools to complete this task. Provide a detailed result of what was accomplished.
900
+ If the task cannot be completed with available tools, explain what was attempted and what is missing.
727
901
  """
728
902
 
729
- messages = [{"role": "user", "content": final_prompt}]
903
+ messages = [{"role": "user", "content": execution_prompt}]
730
904
 
731
905
  try:
732
- response = self._call_ai_parse(messages, final_response_structure, conversation_history)
733
- final_response = self._extract_parsed_content(response, final_response_structure)
906
+ response = self._ai_client.parse(conversation_history + messages, Result)
907
+ result_obj = self._extract_parsed_content(response, Result)
908
+ token_usage = self._extract_token_usage(response)
734
909
 
735
- # Track token usage for final response generation
736
- token_summary.final_response = self._extract_token_usage(response)
737
- logger.info(f"📝 Agent {agent_id}: Final structured response generated (tokens: {token_summary.final_response.total_tokens})")
910
+ return result_obj.result, token_usage
738
911
 
739
- return final_response
740
912
  except Exception as e:
741
- logger.error(f"Error generating final response: {e}")
742
- # Fallback - try to create a basic response
743
- try:
744
- # If the structure has a 'result' field, use that
745
- if hasattr(final_response_structure, 'model_fields') and 'result' in final_response_structure.model_fields:
746
- return final_response_structure(result=result)
747
- else:
748
- # Try to create with the first field
749
- fields = final_response_structure.model_fields
750
- if fields:
751
- first_field = next(iter(fields.keys()))
752
- return final_response_structure(**{first_field: result})
753
- else:
754
- return final_response_structure()
755
- except Exception as fallback_error:
756
- logger.error(f"Fallback response creation failed: {fallback_error}")
757
- # Last resort - return the structure with default values
758
- return final_response_structure()
913
+ logger.error(f"Task execution failed: {e}")
914
+ return f"Task execution failed: {str(e)}", TokenUsage()
759
915
 
760
- def get_session_info(self, agent_id: str) -> dict[str, Any]:
761
- """
762
- Get information about an agent session.
916
+ def _get_available_tools(self) -> dict[str, str]:
917
+ """Get available tools from the AI client."""
918
+ tools = {}
763
919
 
764
- Args:
765
- agent_id: The agent session identifier
766
-
767
- Returns:
768
- Session information dictionary
769
-
770
- Raises:
771
- ValueError: If the agent session is not found
772
- """
773
- if agent_id not in self._agent_sessions:
920
+ # Try to get tools from different client types
921
+ if hasattr(self._ai_client, 'tools') and self._ai_client.tools:
922
+ for tool in self._ai_client.tools:
923
+ tools[tool.name] = tool.description
924
+ elif hasattr(self._ai_client, '_tools') and self._ai_client._tools:
925
+ for tool in self._ai_client._tools:
926
+ tools[tool.name] = tool.description
927
+
928
+ return tools
929
+
930
+ def _extract_parsed_content(self, response: Any, response_format: Type[BaseModel]) -> BaseModel:
931
+ """Extract the parsed content from the AI response."""
932
+ if hasattr(response, 'choices') and len(response.choices) > 0:
933
+ choice = response.choices[0]
934
+ if hasattr(choice.message, 'parsed') and choice.message.parsed:
935
+ return choice.message.parsed
936
+ elif hasattr(choice.message, 'content'):
937
+ try:
938
+ content_dict = json.loads(choice.message.content)
939
+ return response_format(**content_dict)
940
+ except (json.JSONDecodeError, TypeError):
941
+ # Create default response based on type
942
+ return self._create_fallback_response(response_format)
943
+
944
+ return self._create_fallback_response(response_format)
945
+
946
+ def _create_fallback_response(self, response_format: Type[BaseModel], error_msg: str = "") -> BaseModel:
947
+ """Create a fallback response when parsing fails."""
948
+ try:
949
+ if response_format == RequirementAnalysis:
950
+ return RequirementAnalysis(
951
+ goal="Goal analysis failed",
952
+ success_criteria=["Unable to determine criteria"],
953
+ complexity_estimate=5
954
+ )
955
+ elif response_format == ToolAnalysis:
956
+ return ToolAnalysis(
957
+ relevant_tools=[],
958
+ tool_mapping={},
959
+ missing_capabilities=["Analysis failed"]
960
+ )
961
+ elif response_format == TodoList:
962
+ return TodoList(tasks=[])
963
+ elif response_format == GoalEvaluation:
964
+ return GoalEvaluation(
965
+ goal_achieved=False,
966
+ completion_percentage=0,
967
+ completed_criteria=[],
968
+ remaining_criteria=["Evaluation failed"],
969
+ feedback=f"Goal evaluation failed: {error_msg}"
970
+ )
971
+ elif response_format == Result:
972
+ return Result(result=f"Result generation failed: {error_msg}")
973
+ else:
974
+ # Try to create with default values
975
+ return response_format()
976
+ except Exception:
977
+ # Last resort - return basic result
978
+ return Result(result=f"Failed to create response: {error_msg}")
979
+
980
+ def _extract_token_usage(self, response: Any) -> TokenUsage:
981
+ """Extract token usage information from an AI response."""
982
+ try:
983
+ if hasattr(response, 'usage') and response.usage:
984
+ usage = response.usage
985
+ return TokenUsage(
986
+ prompt_tokens=getattr(usage, 'prompt_tokens', 0),
987
+ completion_tokens=getattr(usage, 'completion_tokens', 0),
988
+ total_tokens=getattr(usage, 'total_tokens', 0)
989
+ )
990
+ except (AttributeError, TypeError) as e:
991
+ logger.debug(f"Could not extract token usage: {e}")
992
+
993
+ return TokenUsage()
994
+
995
+ # Session management methods (kept for compatibility)
996
+ def get_session_info(self, agent_id: str) -> dict[str, Any]:
997
+ """Get information about an agent session."""
998
+ session = self._session_handler.get_session(agent_id)
999
+ if not session:
774
1000
  raise ValueError(f"Agent session {agent_id} not found")
775
1001
 
776
- session = self._agent_sessions[agent_id].copy()
777
- # Remove sensitive information and add summary
1002
+ session = session.copy()
778
1003
  session["conversation_length"] = len(session.get("conversation_history", []))
779
1004
  return session
780
1005
 
781
1006
  def delete_session(self, agent_id: str) -> bool:
782
- """
783
- Delete an agent session.
784
-
785
- Args:
786
- agent_id: The agent session identifier
787
-
788
- Returns:
789
- True if session was deleted, False if it didn't exist
790
- """
791
- if agent_id in self._agent_sessions:
792
- del self._agent_sessions[agent_id]
1007
+ """Delete an agent session."""
1008
+ deleted = self._session_handler.delete_session(agent_id)
1009
+ if deleted:
793
1010
  logger.info(f"🗑️ Deleted agent session {agent_id}")
794
- return True
795
- return False
1011
+ return deleted
796
1012
 
797
1013
  def list_sessions(self) -> list[str]:
798
- """
799
- List all active agent session IDs.
800
-
801
- Returns:
802
- List of agent session IDs
803
- """
804
- return list(self._agent_sessions.keys())
1014
+ """List all active agent session IDs."""
1015
+ return self._session_handler.list_sessions()