agnt5 0.3.2a1__cp310-abi3-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agnt5 might be problematic. Click here for more details.

agnt5/agent/core.py ADDED
@@ -0,0 +1,1782 @@
1
+ """Agent class - core LLM-driven agent with tool orchestration."""
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING, Any, AsyncGenerator, Callable, Dict, List, Optional, Tuple, Union
7
+
8
+ from ..context import Context, get_current_context, set_current_context
9
+ from .. import lm
10
+ from ..lm import GenerateRequest, GenerateResponse, LanguageModel, Message, ModelConfig, ToolDefinition
11
+ from ..tool import Tool, ToolRegistry
12
+ from .._telemetry import setup_module_logger
13
+ from ..exceptions import WaitingForUserInputException
14
+ from ..events import Event, EventType
15
+
16
+ from .context import AgentContext
17
+ from .result import AgentResult
18
+ from .handoff import Handoff
19
+ from .registry import AgentRegistry
20
+
21
+ logger = setup_module_logger(__name__)
22
+
23
+
24
+ def _serialize_tool_result(result: Any) -> str:
25
+ """Serialize a tool result to JSON string, handling Pydantic models and other complex types.
26
+
27
+ Args:
28
+ result: The tool execution result (may be Pydantic model, dataclass, dict, etc.)
29
+
30
+ Returns:
31
+ JSON string representation of the result
32
+ """
33
+ if result is None:
34
+ return "null"
35
+
36
+ # Handle Pydantic models (v2 API)
37
+ if hasattr(result, 'model_dump'):
38
+ return json.dumps(result.model_dump())
39
+
40
+ # Handle Pydantic models (v1 API)
41
+ if hasattr(result, 'dict') and hasattr(result, '__fields__'):
42
+ return json.dumps(result.dict())
43
+
44
+ # Handle dataclasses
45
+ import dataclasses as dc
46
+ if dc.is_dataclass(result) and not isinstance(result, type):
47
+ return json.dumps(dc.asdict(result))
48
+
49
+ # Default JSON serialization
50
+ return json.dumps(result)
51
+
52
+
53
+ @dataclass
54
+ class _StreamedLMResponse:
55
+ """Result from streaming LLM call - contains collected text and any tool calls."""
56
+ text: str
57
+ tool_calls: List[Dict[str, Any]]
58
+ usage: Optional[Dict[str, int]] = None
59
+
60
+
61
+ class Agent:
62
+ """Autonomous LLM-driven agent with tool orchestration.
63
+
64
+ Current features:
65
+ - LLM integration (OpenAI, Anthropic, etc.)
66
+ - Tool selection and execution
67
+ - Multi-turn reasoning
68
+ - Context and state management
69
+
70
+ Future enhancements:
71
+ - Durable execution with checkpointing
72
+ - Multi-agent coordination
73
+ - Platform-backed tool execution
74
+
75
+ Example:
76
+ ```python
77
+ from agnt5 import Agent, tool
78
+
79
+ @tool
80
+ async def search_web(query: str) -> str:
81
+ '''Search the web for information.'''
82
+ return f"Results for: {query}"
83
+
84
+ agent = Agent(
85
+ name="researcher",
86
+ model="openai/gpt-4o-mini",
87
+ instructions="You are a research assistant.",
88
+ tools=[search_web],
89
+ )
90
+
91
+ result = await agent.run_sync("Find recent AI developments")
92
+ print(result.output)
93
+ ```
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ name: str,
99
+ model: Union[str, LanguageModel],
100
+ instructions: str,
101
+ tools: Optional[List[Any]] = None,
102
+ model_config: Optional[ModelConfig] = None,
103
+ handoffs: Optional[List[Union["Agent", Handoff]]] = None,
104
+ # Legacy parameters (kept for backward compatibility)
105
+ model_name: Optional[str] = None,
106
+ temperature: float = 0.7,
107
+ max_tokens: Optional[int] = None,
108
+ top_p: Optional[float] = None,
109
+ max_iterations: int = 10,
110
+ ):
111
+ """Initialize agent.
112
+
113
+ Args:
114
+ name: Agent identifier
115
+ model: Model specification. Either:
116
+ - String like "openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022"
117
+ - LanguageModel instance (legacy, for backward compatibility)
118
+ instructions: System prompt for the agent
119
+ tools: List of tools, Tool instances, or Agents (used as tools)
120
+ model_config: Model configuration (temperature, max_tokens, etc.)
121
+ handoffs: List of agents to hand off to (creates transfer_to_* tools)
122
+ model_name: Deprecated - use `model` parameter instead
123
+ temperature: LLM temperature (0-1). Legacy parameter - prefer model_config.
124
+ max_tokens: Maximum tokens in response. Legacy parameter - prefer model_config.
125
+ top_p: Top-p sampling. Legacy parameter - prefer model_config.
126
+ max_iterations: Maximum reasoning iterations
127
+ """
128
+ self.name = name
129
+ self.instructions = instructions
130
+ self.max_iterations = max_iterations
131
+ self.logger = logging.getLogger(f"agnt5.agent.{name}")
132
+
133
+ # Handle model parameter: string or LanguageModel
134
+ if isinstance(model, str):
135
+ # New API: model is a string like "openai/gpt-4o-mini"
136
+ self.model = model
137
+ self.model_name = model # For compatibility
138
+ self._language_model = None
139
+ elif isinstance(model, LanguageModel):
140
+ # Legacy API: model is a LanguageModel instance
141
+ self._language_model = model
142
+ self.model = model_name or "mock-model"
143
+ self.model_name = model_name or "mock-model"
144
+ else:
145
+ raise ValueError(f"model must be a string (e.g., 'openai/gpt-4o-mini') or LanguageModel instance")
146
+
147
+ # Model configuration (legacy params take precedence for backward compat)
148
+ self.model_config = model_config
149
+ self.temperature = temperature
150
+ self.max_tokens = max_tokens
151
+ self.top_p = top_p
152
+
153
+ # Cost tracking
154
+ self._cumulative_cost_usd: float = 0.0
155
+
156
+ # Initialize tools registry
157
+ self.tools: Dict[str, Tool] = {}
158
+
159
+ if tools:
160
+ for item in tools:
161
+ if isinstance(item, Tool):
162
+ self.tools[item.name] = item
163
+ elif isinstance(item, Agent):
164
+ # Agent as tool - wrap it
165
+ agent_tool = item.to_tool()
166
+ self.tools[agent_tool.name] = agent_tool
167
+ self.logger.debug(f"Wrapped agent '{item.name}' as tool")
168
+ elif callable(item):
169
+ # Function decorated with @tool
170
+ tool_instance = ToolRegistry.get(item.__name__)
171
+ if tool_instance:
172
+ self.tools[tool_instance.name] = tool_instance
173
+ else:
174
+ self.logger.warning(f"Tool '{item.__name__}' not found in registry")
175
+ else:
176
+ self.logger.warning(f"Skipping unknown tool type: {type(item)}")
177
+
178
+ # Store handoffs for introspection
179
+ self.handoffs: List[Handoff] = []
180
+
181
+ # Process handoffs: create transfer_to_* tools for each target agent
182
+ if handoffs:
183
+ for item in handoffs:
184
+ if isinstance(item, Agent):
185
+ # Auto-wrap Agent in Handoff with defaults
186
+ handoff_config = Handoff(agent=item)
187
+ elif isinstance(item, Handoff):
188
+ handoff_config = item
189
+ else:
190
+ self.logger.warning(f"Skipping unknown handoff type: {type(item)}")
191
+ continue
192
+
193
+ # Store the handoff configuration
194
+ self.handoffs.append(handoff_config)
195
+
196
+ # Create handoff tool
197
+ handoff_tool = self._create_handoff_tool(handoff_config)
198
+ self.tools[handoff_tool.name] = handoff_tool
199
+ self.logger.debug(f"Added handoff tool '{handoff_tool.name}'")
200
+
201
+ # Auto-register agent in registry (similar to Entity auto-registration)
202
+ AgentRegistry.register(self)
203
+ self.logger.debug(f"Auto-registered agent '{self.name}'")
204
+
205
+ @property
206
+ def cumulative_cost_usd(self) -> float:
207
+ """Get cumulative cost of all LLM calls for this agent.
208
+
209
+ Returns:
210
+ Total cost in USD
211
+ """
212
+ return self._cumulative_cost_usd
213
+
214
+ def _track_llm_cost(self, response: GenerateResponse, workflow_ctx: Optional[Any] = None) -> None:
215
+ """Track LLM call cost.
216
+
217
+ Args:
218
+ response: LLM response containing usage/cost info
219
+ workflow_ctx: Optional workflow context for emitting cost events
220
+ """
221
+ cost_usd = getattr(response, 'cost_usd', None)
222
+ if cost_usd:
223
+ self._cumulative_cost_usd += cost_usd
224
+ self.logger.debug(
225
+ f"LLM call cost: ${cost_usd:.6f}, "
226
+ f"cumulative: ${self._cumulative_cost_usd:.6f}"
227
+ )
228
+
229
+ # Emit cost event for observability
230
+ if workflow_ctx:
231
+ usage = getattr(response, 'usage', None)
232
+ workflow_ctx._send_checkpoint("agent.llm_cost", {
233
+ "agent.name": self.name,
234
+ "call_cost_usd": cost_usd,
235
+ "cumulative_cost_usd": self._cumulative_cost_usd,
236
+ "input_tokens": usage.get("input_tokens") if usage else None,
237
+ "output_tokens": usage.get("output_tokens") if usage else None,
238
+ })
239
+
240
+ def to_tool(self) -> Tool:
241
+ """Convert this agent to a tool that can be used by other agents.
242
+
243
+ The tool will run this agent and return its output.
244
+
245
+ Returns:
246
+ Tool instance that wraps this agent
247
+
248
+ Example:
249
+ ```python
250
+ # Create specialist agents
251
+ researcher = Agent(name="researcher", ...)
252
+ analyst = Agent(name="analyst", ...)
253
+
254
+ # Use them as tools
255
+ coordinator = Agent(
256
+ name="coordinator",
257
+ tools=[researcher.to_tool(), analyst.to_tool()]
258
+ )
259
+ ```
260
+ """
261
+ from ..tool import tool as tool_decorator
262
+
263
+ # Capture agent reference
264
+ agent = self
265
+
266
+ @tool_decorator(
267
+ name=f"ask_{agent.name}",
268
+ description=agent.instructions or f"Ask the {agent.name} agent for help"
269
+ )
270
+ async def agent_as_tool(ctx: Context, message: str) -> str:
271
+ """Invoke the agent with a message and return its response."""
272
+ result = await agent.run_sync(message, context=ctx)
273
+ return result.output
274
+
275
+ # Get the tool from registry
276
+ return ToolRegistry.get(f"ask_{agent.name}")
277
+
278
+ def _create_handoff_tool(self, handoff: Handoff) -> Tool:
279
+ """Create a handoff tool for transferring control to another agent.
280
+
281
+ Args:
282
+ handoff: Handoff configuration
283
+
284
+ Returns:
285
+ Tool that performs the handoff
286
+ """
287
+ from ..tool import tool as tool_decorator
288
+
289
+ target_agent = handoff.agent
290
+ pass_history = handoff.pass_full_history
291
+
292
+ @tool_decorator(
293
+ name=handoff.tool_name,
294
+ description=handoff.description
295
+ )
296
+ async def transfer_tool(ctx: Context, message: str) -> Dict[str, Any]:
297
+ """Transfer control to another agent.
298
+
299
+ Args:
300
+ ctx: Execution context (auto-injected)
301
+ message: Message to pass to the target agent
302
+
303
+ Returns:
304
+ Dict with handoff marker and target agent's result
305
+ """
306
+ # Get conversation history if available and requested
307
+ history = None
308
+ if pass_history and ctx:
309
+ if hasattr(ctx, '_agent_data') and "_current_conversation" in ctx._agent_data:
310
+ history = ctx._agent_data["_current_conversation"]
311
+
312
+ # Run target agent (using run_sync for non-streaming invocation)
313
+ result = await target_agent.run_sync(
314
+ message,
315
+ context=ctx,
316
+ history=history
317
+ )
318
+
319
+ # Return with handoff marker
320
+ return {
321
+ "_handoff": True,
322
+ "to_agent": target_agent.name,
323
+ "output": result.output,
324
+ "tool_calls": result.tool_calls,
325
+ }
326
+
327
+ return ToolRegistry.get(handoff.tool_name)
328
+
329
+ def _render_prompt(
330
+ self,
331
+ template: str,
332
+ context_vars: Optional[Dict[str, Any]] = None
333
+ ) -> str:
334
+ """Render system prompt template with context variables.
335
+
336
+ Args:
337
+ template: System prompt with {{variable_name}} placeholders
338
+ context_vars: Variables to substitute
339
+
340
+ Returns:
341
+ Rendered prompt string
342
+ """
343
+ if not context_vars:
344
+ return template
345
+
346
+ rendered = template
347
+ for key, value in context_vars.items():
348
+ placeholder = "{{" + key + "}}"
349
+ if placeholder in rendered:
350
+ rendered = rendered.replace(placeholder, str(value))
351
+
352
+ return rendered
353
+
354
+ def _detect_memory_scope(
355
+ self,
356
+ context: Optional[Context] = None
357
+ ) -> tuple[str, str]:
358
+ """Detect memory scope from context.
359
+
360
+ Priority: user_id > session_id > run_id
361
+
362
+ Returns:
363
+ Tuple of (entity_key, scope) where:
364
+ - entity_key: e.g., "user:user-456", "session:abc-123", "run:xyz-789"
365
+ - scope: "user", "session", or "run"
366
+
367
+ Example:
368
+ entity_key, scope = agent._detect_memory_scope(ctx)
369
+ # If ctx.user_id="user-123": ("user:user-123", "user")
370
+ # If ctx.session_id="sess-456": ("session:sess-456", "session")
371
+ # Otherwise: ("run:run-789", "run")
372
+ """
373
+ # Extract identifiers from context
374
+ user_id = getattr(context, 'user_id', None) if context else None
375
+ session_id = getattr(context, 'session_id', None) if context else None
376
+ run_id = getattr(context, 'run_id', None) if context else None
377
+
378
+ # Priority: user_id > session_id > run_id
379
+ if user_id:
380
+ return (f"user:{user_id}", "user")
381
+ elif session_id and session_id != run_id: # Explicit session (not defaulting to run_id)
382
+ return (f"session:{session_id}", "session")
383
+ elif run_id:
384
+ return (f"run:{run_id}", "run")
385
+ else:
386
+ # Fallback: create ephemeral key
387
+ import uuid
388
+ fallback_run_id = f"agent-{self.name}-{uuid.uuid4().hex[:8]}"
389
+ return (f"run:{fallback_run_id}", "run")
390
+
391
+ async def _run_core(
392
+ self,
393
+ user_message: str,
394
+ context: Optional[Context] = None,
395
+ history: Optional[List[Message]] = None,
396
+ prompt_context: Optional[Dict[str, Any]] = None,
397
+ sequence_start: int = 0,
398
+ ) -> AsyncGenerator[Union[Event, AgentResult], None]:
399
+ """Core streaming execution loop.
400
+
401
+ This async generator yields events during execution and returns
402
+ the final AgentResult as the last yielded item.
403
+
404
+ Yields:
405
+ Event objects (LM events, tool events) during execution
406
+ AgentResult as the final item
407
+
408
+ Used by:
409
+ - run(): Wraps with agent.started/completed events
410
+ - run_sync(): Consumes events and extracts final result
411
+ """
412
+ sequence = sequence_start
413
+
414
+ # Create or adapt context
415
+ if context is None:
416
+ context = get_current_context()
417
+
418
+ # Capture workflow context for checkpoints
419
+ from ..workflow import WorkflowContext
420
+ workflow_ctx = context if isinstance(context, WorkflowContext) else None
421
+
422
+ if context is None:
423
+ import uuid
424
+ run_id = f"agent-{self.name}-{uuid.uuid4().hex[:8]}"
425
+ context = AgentContext(
426
+ run_id=run_id,
427
+ agent_name=self.name,
428
+ )
429
+ elif isinstance(context, AgentContext):
430
+ pass
431
+ elif hasattr(context, '_workflow_entity'):
432
+ entity_key, scope = self._detect_memory_scope(context)
433
+ import uuid
434
+ run_id = f"{context.run_id}:agent:{self.name}"
435
+ detected_session_id = entity_key.split(":", 1)[1] if ":" in entity_key else context.run_id
436
+ context = AgentContext(
437
+ run_id=run_id,
438
+ agent_name=self.name,
439
+ session_id=detected_session_id,
440
+ parent_context=context,
441
+ runtime_context=getattr(context, '_runtime_context', None),
442
+ )
443
+ else:
444
+ import uuid
445
+ run_id = f"{context.run_id}:agent:{self.name}"
446
+ context = AgentContext(
447
+ run_id=run_id,
448
+ agent_name=self.name,
449
+ parent_context=context,
450
+ runtime_context=getattr(context, '_runtime_context', None),
451
+ )
452
+
453
+ # NOTE: agent.started checkpoint is NOT sent here - it's sent by run() which yields Event.agent_started
454
+ # This avoids duplicate agent.started events in the journal
455
+
456
+ # Check for HITL resume
457
+ if workflow_ctx and hasattr(workflow_ctx, "_agent_resume_info"):
458
+ resume_info = workflow_ctx._agent_resume_info
459
+ if resume_info["agent_name"] == self.name:
460
+ self.logger.info("Detected HITL resume, calling resume_from_hitl()")
461
+ delattr(workflow_ctx, "_agent_resume_info")
462
+ result = await self.resume_from_hitl(
463
+ context=workflow_ctx,
464
+ agent_context=resume_info["agent_context"],
465
+ user_response=resume_info["user_response"],
466
+ )
467
+ yield result
468
+ return
469
+
470
+ # Set context in task-local storage
471
+ token = set_current_context(context)
472
+ try:
473
+ # Build conversation messages
474
+ messages: List[Message] = []
475
+
476
+ if history:
477
+ # Convert dicts to Message objects if needed (for JSON history from platform)
478
+ for msg in history:
479
+ if isinstance(msg, Message):
480
+ messages.append(msg)
481
+ elif isinstance(msg, dict):
482
+ role_str = msg.get("role", "user")
483
+ content = msg.get("content", "")
484
+ if role_str == "user":
485
+ messages.append(Message.user(content))
486
+ elif role_str == "assistant":
487
+ messages.append(Message.assistant(content))
488
+ elif role_str == "system":
489
+ messages.append(Message.system(content))
490
+ else:
491
+ messages.append(Message.user(content))
492
+ else:
493
+ # Try to use it as a Message anyway
494
+ messages.append(msg)
495
+ self.logger.debug(f"Prepended {len(history)} messages from explicit history")
496
+
497
+ if isinstance(context, AgentContext):
498
+ stored_messages = await context.get_conversation_history()
499
+ messages.extend(stored_messages)
500
+
501
+ messages.append(Message.user(user_message))
502
+
503
+ if isinstance(context, AgentContext):
504
+ messages_to_save = stored_messages + [Message.user(user_message)] if history else messages
505
+ await context.save_conversation_history(messages_to_save)
506
+
507
+ # Create span for tracing (uses contextvar for async-safe parent-child linking)
508
+ from ..tracing import create_span
509
+
510
+ with create_span(
511
+ self.name,
512
+ "agent",
513
+ context._runtime_context if hasattr(context, "_runtime_context") else None,
514
+ {
515
+ "agent.name": self.name,
516
+ "agent.model": self.model_name,
517
+ "agent.max_iterations": str(self.max_iterations),
518
+ "input.data": _serialize_tool_result({"message": user_message}),
519
+ },
520
+ ) as span:
521
+ all_tool_calls: List[Dict[str, Any]] = []
522
+ import time as _time
523
+
524
+ # Render system prompt
525
+ rendered_instructions = self._render_prompt(self.instructions, prompt_context)
526
+
527
+ # Reasoning loop
528
+ for iteration in range(self.max_iterations):
529
+ iteration_start_time = _time.time()
530
+
531
+ if workflow_ctx:
532
+ workflow_ctx._send_checkpoint("agent.iteration.started", {
533
+ "agent.name": self.name,
534
+ "iteration": iteration + 1,
535
+ "max_iterations": self.max_iterations,
536
+ })
537
+
538
+ # Build tool definitions
539
+ tool_defs = [
540
+ ToolDefinition(
541
+ name=tool.name,
542
+ description=tool.description,
543
+ parameters=tool.input_schema,
544
+ )
545
+ for tool in self.tools.values()
546
+ ]
547
+
548
+ # Build request
549
+ request = GenerateRequest(
550
+ model=self.model if not self._language_model else "mock-model",
551
+ system_prompt=rendered_instructions,
552
+ messages=messages,
553
+ tools=tool_defs if tool_defs else [],
554
+ )
555
+ request.config.temperature = self.temperature
556
+ if self.max_tokens:
557
+ request.config.max_tokens = self.max_tokens
558
+ if self.top_p:
559
+ request.config.top_p = self.top_p
560
+
561
+ # Stream LLM call and yield events
562
+ response_text = ""
563
+ response_tool_calls = []
564
+
565
+ async for item, seq in self._stream_lm_call(request, sequence):
566
+ if isinstance(item, _StreamedLMResponse):
567
+ response_text = item.text
568
+ response_tool_calls = item.tool_calls
569
+ sequence = seq
570
+ else:
571
+ # Yield LM event
572
+ yield item
573
+ sequence = seq
574
+
575
+ # Add assistant response to messages
576
+ messages.append(Message.assistant(response_text))
577
+
578
+ # Check if LLM wants to use tools
579
+ if response_tool_calls:
580
+ self.logger.debug(f"Agent calling {len(response_tool_calls)} tool(s)")
581
+
582
+ if not hasattr(context, '_agent_data'):
583
+ context._agent_data = {}
584
+ context._agent_data["_current_conversation"] = messages
585
+
586
+ # Execute tool calls
587
+ tool_results = []
588
+ for tool_idx, tool_call in enumerate(response_tool_calls):
589
+ tool_name = tool_call["name"]
590
+ tool_args_str = tool_call["arguments"]
591
+ tool_call_id = tool_call.get("id") # From LLM response
592
+
593
+ all_tool_calls.append({
594
+ "name": tool_name,
595
+ "arguments": tool_args_str,
596
+ "iteration": iteration + 1,
597
+ "id": tool_call_id,
598
+ })
599
+
600
+ # Yield tool call started event with unique content_index
601
+ yield Event.agent_tool_call_started(
602
+ tool_name=tool_name,
603
+ arguments=tool_args_str,
604
+ tool_call_id=tool_call_id,
605
+ content_index=tool_idx,
606
+ sequence=sequence,
607
+ )
608
+ sequence += 1
609
+
610
+ try:
611
+ tool_args = json.loads(tool_args_str)
612
+ tool = self.tools.get(tool_name)
613
+
614
+ if not tool:
615
+ result_text = f"Error: Tool '{tool_name}' not found"
616
+ else:
617
+ result = await tool.invoke(context, **tool_args)
618
+
619
+ if isinstance(result, dict) and result.get("_handoff"):
620
+ self.logger.info(f"Handoff to '{result['to_agent']}'")
621
+ if isinstance(context, AgentContext):
622
+ await context.save_conversation_history(messages)
623
+
624
+ # Yield tool completed and final result
625
+ yield Event.agent_tool_call_completed(
626
+ tool_name=tool_name,
627
+ result=_serialize_tool_result(result["output"]),
628
+ tool_call_id=tool_call_id,
629
+ content_index=tool_idx,
630
+ sequence=sequence,
631
+ )
632
+ sequence += 1
633
+
634
+ # Add output data to span for trace visibility
635
+ span.set_attribute("output.data", _serialize_tool_result(result["output"]))
636
+
637
+ yield AgentResult(
638
+ output=result["output"],
639
+ tool_calls=all_tool_calls + result.get("tool_calls", []),
640
+ context=context,
641
+ handoff_to=result["to_agent"],
642
+ handoff_metadata=result,
643
+ )
644
+ return
645
+
646
+ result_text = _serialize_tool_result(result)
647
+
648
+ tool_results.append({
649
+ "tool": tool_name,
650
+ "result": result_text,
651
+ "error": None,
652
+ })
653
+
654
+ # Yield tool completed event
655
+ yield Event.agent_tool_call_completed(
656
+ tool_name=tool_name,
657
+ result=result_text,
658
+ tool_call_id=tool_call_id,
659
+ content_index=tool_idx,
660
+ sequence=sequence,
661
+ )
662
+ sequence += 1
663
+
664
+ except WaitingForUserInputException as e:
665
+ self.logger.info(f"Agent pausing for user input at iteration {iteration}")
666
+ messages_dict = [
667
+ {"role": msg.role.value, "content": msg.content}
668
+ for msg in messages
669
+ ]
670
+ raise WaitingForUserInputException(
671
+ question=e.question,
672
+ input_type=e.input_type,
673
+ options=e.options,
674
+ checkpoint_state=e.checkpoint_state,
675
+ agent_context={
676
+ "agent_name": self.name,
677
+ "iteration": iteration,
678
+ "messages": messages_dict,
679
+ "tool_results": tool_results,
680
+ "pending_tool_call": {
681
+ "name": tool_call["name"],
682
+ "arguments": tool_call["arguments"],
683
+ "tool_call_index": response_tool_calls.index(tool_call),
684
+ },
685
+ "all_tool_calls": all_tool_calls,
686
+ "model_config": {
687
+ "model": self.model,
688
+ "temperature": self.temperature,
689
+ "max_tokens": self.max_tokens,
690
+ "top_p": self.top_p,
691
+ },
692
+ },
693
+ ) from e
694
+
695
+ except Exception as e:
696
+ self.logger.error(f"Tool execution error: {e}")
697
+ tool_results.append({
698
+ "tool": tool_name,
699
+ "result": None,
700
+ "error": str(e),
701
+ })
702
+ yield Event.agent_tool_call_completed(
703
+ tool_name=tool_name,
704
+ result=None,
705
+ error=str(e),
706
+ tool_call_id=tool_call_id,
707
+ content_index=tool_idx,
708
+ sequence=sequence,
709
+ )
710
+ sequence += 1
711
+
712
+ # Add tool results to conversation
713
+ results_text = "\n".join([
714
+ f"Tool: {tr['tool']}\nResult: {tr['result']}"
715
+ if tr["error"] is None
716
+ else f"Tool: {tr['tool']}\nError: {tr['error']}"
717
+ for tr in tool_results
718
+ ])
719
+ messages.append(Message.user(
720
+ f"Tool results:\n{results_text}\n\nPlease provide your final answer based on these results."
721
+ ))
722
+
723
+ iteration_duration_ms = int((_time.time() - iteration_start_time) * 1000)
724
+ if workflow_ctx:
725
+ workflow_ctx._send_checkpoint("agent.iteration.completed", {
726
+ "agent.name": self.name,
727
+ "iteration": iteration + 1,
728
+ "duration_ms": iteration_duration_ms,
729
+ "has_tool_calls": True,
730
+ "tool_calls_count": len(tool_results),
731
+ })
732
+
733
+ else:
734
+ # No tool calls - agent is done
735
+ self.logger.debug(f"Agent completed after {iteration + 1} iterations")
736
+
737
+ iteration_duration_ms = int((_time.time() - iteration_start_time) * 1000)
738
+ if workflow_ctx:
739
+ workflow_ctx._send_checkpoint("agent.iteration.completed", {
740
+ "agent.name": self.name,
741
+ "iteration": iteration + 1,
742
+ "duration_ms": iteration_duration_ms,
743
+ "has_tool_calls": False,
744
+ })
745
+
746
+ if isinstance(context, AgentContext):
747
+ await context.save_conversation_history(messages)
748
+
749
+ # NOTE: agent.completed is NOT sent here - it's sent by run() which yields Event.agent_completed()
750
+ # This avoids duplicate agent.completed events in the journal
751
+
752
+ # Add output data to span for trace visibility
753
+ span.set_attribute("output.data", _serialize_tool_result(response_text))
754
+
755
+ yield AgentResult(
756
+ output=response_text,
757
+ tool_calls=all_tool_calls,
758
+ context=context,
759
+ )
760
+ return
761
+
762
+ # Max iterations reached
763
+ self.logger.warning(f"Agent reached max iterations ({self.max_iterations})")
764
+ final_output = messages[-1].content if messages else "No output generated"
765
+
766
+ if workflow_ctx:
767
+ workflow_ctx._send_checkpoint("agent.max_iterations.reached", {
768
+ "agent.name": self.name,
769
+ "max_iterations": self.max_iterations,
770
+ "tool_calls_count": len(all_tool_calls),
771
+ })
772
+
773
+ if isinstance(context, AgentContext):
774
+ await context.save_conversation_history(messages)
775
+
776
+ # NOTE: agent.completed is NOT sent here - it's sent by run() which yields Event.agent_completed()
777
+ # This avoids duplicate agent.completed events in the journal
778
+
779
+ # Add output data to span for trace visibility
780
+ span.set_attribute("output.data", _serialize_tool_result(final_output))
781
+
782
+ yield AgentResult(
783
+ output=final_output,
784
+ tool_calls=all_tool_calls,
785
+ context=context,
786
+ )
787
+
788
+ except Exception as e:
789
+ if workflow_ctx:
790
+ workflow_ctx._send_checkpoint("agent.failed", {
791
+ "agent.name": self.name,
792
+ "error": str(e),
793
+ "error_type": type(e).__name__,
794
+ })
795
+ raise
796
+ finally:
797
+ from ..context import _current_context
798
+ _current_context.reset(token)
799
+
800
+ async def _stream_lm_call(
801
+ self,
802
+ request: GenerateRequest,
803
+ sequence_start: int = 0,
804
+ ) -> AsyncGenerator[Tuple[Event, int], None]:
805
+ """Stream an LLM call and yield events.
806
+
807
+ This method calls the LLM and yields LM events (start, delta, stop).
808
+ The final response (including tool_calls) is yielded as a special
809
+ _StreamedLMResponse event at the end.
810
+
811
+ When tools are present, uses generate() with synthetic events since
812
+ streaming doesn't yet support tool calls. When no tools, uses real
813
+ streaming which properly exposes thinking blocks for extended thinking.
814
+
815
+ Args:
816
+ request: The generate request with model, messages, tools, etc.
817
+ sequence_start: Starting sequence number for events
818
+
819
+ Yields:
820
+ Tuple of (Event, next_sequence) or (_StreamedLMResponse, next_sequence)
821
+ """
822
+ from ..lm import _LanguageModel
823
+
824
+ sequence = sequence_start
825
+ collected_text = ""
826
+ usage_dict = None
827
+ tool_calls = []
828
+
829
+ # When tools are present, use generate() since streaming doesn't support tool calls
830
+ # When no tools, use real streaming for proper thinking block support
831
+ has_tools = bool(request.tools)
832
+
833
+ if has_tools:
834
+ # Use generate() - streaming doesn't support tool calls yet
835
+ if self._language_model is not None:
836
+ response = await self._language_model.generate(request)
837
+ else:
838
+ provider, model_name = self.model.split('/', 1)
839
+ internal_lm = _LanguageModel(provider=provider.lower(), default_model=None)
840
+ response = await internal_lm.generate(request)
841
+
842
+ # Emit synthetic LM events for compatibility
843
+ yield (Event.message_start(index=0, sequence=sequence), sequence + 1)
844
+ sequence += 1
845
+ if response.text:
846
+ yield (Event.message_delta(content=response.text, index=0, sequence=sequence), sequence + 1)
847
+ sequence += 1
848
+ yield (Event.message_stop(index=0, sequence=sequence), sequence + 1)
849
+ sequence += 1
850
+
851
+ collected_text = response.text
852
+ tool_calls = response.tool_calls or []
853
+ if response.usage:
854
+ usage_dict = {
855
+ "input_tokens": getattr(response.usage, 'input_tokens', getattr(response.usage, 'prompt_tokens', 0)),
856
+ "output_tokens": getattr(response.usage, 'output_tokens', getattr(response.usage, 'completion_tokens', 0)),
857
+ }
858
+ else:
859
+ # Use real streaming - properly exposes thinking blocks
860
+ if self._language_model is not None:
861
+ # Legacy LanguageModel - use stream() method
862
+ async for event in self._language_model.stream(request):
863
+ if event.event_type == EventType.LM_STREAM_COMPLETED:
864
+ # Extract final text and usage from completion event
865
+ collected_text = event.data.get("text", "")
866
+ if "usage" in event.data:
867
+ usage_dict = event.data["usage"]
868
+ else:
869
+ # Forward LM events (thinking/message start/delta/stop)
870
+ event.sequence = sequence
871
+ yield (event, sequence + 1)
872
+ sequence += 1
873
+ # Collect text from message deltas (not thinking)
874
+ if event.event_type == EventType.LM_MESSAGE_DELTA:
875
+ # data is raw content string for delta events
876
+ if event.data:
877
+ collected_text += event.data
878
+ else:
879
+ # New API: model is a string, create internal LM instance
880
+ provider, model_name = self.model.split('/', 1)
881
+ internal_lm = _LanguageModel(provider=provider.lower(), default_model=None)
882
+ async for event in internal_lm.stream(request):
883
+ if event.event_type == EventType.LM_STREAM_COMPLETED:
884
+ # Extract final text and usage from completion event
885
+ collected_text = event.data.get("text", "")
886
+ if "usage" in event.data:
887
+ usage_dict = event.data["usage"]
888
+ else:
889
+ # Forward LM events (thinking/message start/delta/stop)
890
+ event.sequence = sequence
891
+ yield (event, sequence + 1)
892
+ sequence += 1
893
+ # Collect text from message deltas (not thinking)
894
+ if event.event_type == EventType.LM_MESSAGE_DELTA:
895
+ # data is raw content string for delta events
896
+ if event.data:
897
+ collected_text += event.data
898
+
899
+ # Yield the final response
900
+ yield (_StreamedLMResponse(
901
+ text=collected_text,
902
+ tool_calls=tool_calls,
903
+ usage=usage_dict,
904
+ ), sequence)
905
+
906
+ async def run(
907
+ self,
908
+ user_message: str,
909
+ context: Optional[Context] = None,
910
+ history: Optional[List[Message]] = None,
911
+ prompt_context: Optional[Dict[str, Any]] = None,
912
+ ) -> AsyncGenerator[Event, None]:
913
+ """Run agent with streaming events.
914
+
915
+ This is an async generator that yields Event objects during execution.
916
+ Use `async for event in agent.run(...)` to process events in real-time.
917
+
918
+ Args:
919
+ user_message: User's input message
920
+ context: Optional execution context (auto-created if not provided)
921
+ history: Optional conversation history to include
922
+ prompt_context: Optional context variables for system prompt template
923
+
924
+ Yields:
925
+ Event objects during execution:
926
+ - agent.started: When agent begins execution
927
+ - lm.message.start/delta/stop: During LLM generation
928
+ - agent.tool_call.started/completed: During tool execution
929
+ - agent.completed: When agent finishes (contains final output)
930
+
931
+ Example:
932
+ ```python
933
+ # Streaming execution
934
+ async for event in agent.run("Analyze recent tech news"):
935
+ if event.event_type == EventType.LM_MESSAGE_DELTA:
936
+ print(event.data, end="", flush=True) # data is raw content for deltas
937
+ elif event.event_type == EventType.AGENT_COMPLETED:
938
+ print(f"\\nFinal: {event.data['output']}")
939
+
940
+ # Non-streaming (use run_sync instead)
941
+ result = await agent.run_sync("Analyze recent tech news")
942
+ print(result.output)
943
+ ```
944
+ """
945
+ # Track sequence number for events
946
+ sequence = 0
947
+
948
+ # Yield agent.started event
949
+ yield Event.agent_started(
950
+ agent_name=self.name,
951
+ model=self.model_name,
952
+ tools=list(self.tools.keys()),
953
+ max_iterations=self.max_iterations,
954
+ sequence=sequence,
955
+ )
956
+ sequence += 1
957
+
958
+ try:
959
+ # Run the streaming core loop - yields LM events, tool events, and final result
960
+ result = None
961
+ async for item in self._run_core(
962
+ user_message=user_message,
963
+ context=context,
964
+ history=history,
965
+ prompt_context=prompt_context,
966
+ sequence_start=sequence,
967
+ ):
968
+ if isinstance(item, AgentResult):
969
+ # Final result - convert to agent.completed event
970
+ result = item
971
+ sequence = getattr(item, '_last_sequence', sequence)
972
+ elif isinstance(item, Event):
973
+ # Forward LM and tool events
974
+ yield item
975
+ sequence = item.sequence + 1 if hasattr(item, 'sequence') else sequence
976
+
977
+ # Yield agent.completed event with the result
978
+ if result:
979
+ yield Event.agent_completed(
980
+ output=result.output,
981
+ iterations=len(result.tool_calls) // 2 + 1 if result.tool_calls else 1,
982
+ tool_calls=result.tool_calls,
983
+ handoff_to=result.handoff_to,
984
+ max_iterations_reached=False,
985
+ sequence=sequence,
986
+ )
987
+
988
+ except Exception as e:
989
+ # Yield agent.failed event
990
+ yield Event.agent_failed(
991
+ error=str(e),
992
+ error_type=type(e).__name__,
993
+ agent_name=self.name,
994
+ sequence=sequence,
995
+ )
996
+ raise
997
+
998
+ async def run_sync(
999
+ self,
1000
+ user_message: str,
1001
+ context: Optional[Context] = None,
1002
+ history: Optional[List[Message]] = None,
1003
+ prompt_context: Optional[Dict[str, Any]] = None,
1004
+ ) -> AgentResult:
1005
+ """Run agent to completion (non-streaming).
1006
+
1007
+ This is the synchronous version that returns an AgentResult directly.
1008
+ Use this when you don't need streaming events.
1009
+
1010
+ Args:
1011
+ user_message: User's input message
1012
+ context: Optional execution context
1013
+ history: Optional conversation history
1014
+ prompt_context: Optional context variables
1015
+
1016
+ Returns:
1017
+ AgentResult with output and execution details
1018
+
1019
+ Example:
1020
+ ```python
1021
+ result = await agent.run_sync("Analyze recent tech news")
1022
+ print(result.output)
1023
+ ```
1024
+ """
1025
+ result = None
1026
+ async for event in self.run(user_message, context, history, prompt_context):
1027
+ if event.event_type == EventType.AGENT_COMPLETED:
1028
+ # Extract result from the completed event
1029
+ result = AgentResult(
1030
+ output=event.data["output"],
1031
+ tool_calls=event.data.get("tool_calls", []),
1032
+ context=context,
1033
+ handoff_to=event.data.get("handoff_to"),
1034
+ )
1035
+ elif event.event_type == EventType.AGENT_FAILED:
1036
+ # Re-raise the error (it was already raised in run())
1037
+ pass
1038
+
1039
+ if result is None:
1040
+ # This shouldn't happen, but handle gracefully
1041
+ raise RuntimeError("Agent completed without producing a result")
1042
+
1043
+ return result
1044
+
1045
+ async def _run_impl(
1046
+ self,
1047
+ user_message: str,
1048
+ context: Optional[Context] = None,
1049
+ history: Optional[List[Message]] = None,
1050
+ prompt_context: Optional[Dict[str, Any]] = None,
1051
+ ) -> AgentResult:
1052
+ """Internal implementation of agent execution.
1053
+
1054
+ This contains the core agent loop logic. Called by both run() and run_sync().
1055
+ """
1056
+ # Create or adapt context
1057
+ if context is None:
1058
+ # Try to get context from task-local storage (set by workflow/function decorator)
1059
+ context = get_current_context()
1060
+
1061
+ # IMPORTANT: Capture workflow context NOW before we replace it with AgentContext
1062
+ # This allows LM calls inside the agent to emit workflow checkpoints
1063
+ from ..workflow import WorkflowContext
1064
+ workflow_ctx = context if isinstance(context, WorkflowContext) else None
1065
+
1066
+ if context is None:
1067
+ # Standalone execution - create AgentContext
1068
+ import uuid
1069
+ run_id = f"agent-{self.name}-{uuid.uuid4().hex[:8]}"
1070
+ context = AgentContext(
1071
+ run_id=run_id,
1072
+ agent_name=self.name,
1073
+ )
1074
+ elif isinstance(context, AgentContext):
1075
+ # Already AgentContext - use as-is
1076
+ pass
1077
+ elif hasattr(context, '_workflow_entity'):
1078
+ # WorkflowContext - create AgentContext that inherits state
1079
+ # Auto-detect memory scope based on user_id/session_id/run_id priority
1080
+ entity_key, scope = self._detect_memory_scope(context)
1081
+
1082
+ import uuid
1083
+ run_id = f"{context.run_id}:agent:{self.name}"
1084
+ # Extract the ID from entity_key (e.g., "session:abc-123" → "abc-123")
1085
+ detected_session_id = entity_key.split(":", 1)[1] if ":" in entity_key else context.run_id
1086
+
1087
+ context = AgentContext(
1088
+ run_id=run_id,
1089
+ agent_name=self.name,
1090
+ session_id=detected_session_id, # Use auto-detected scope
1091
+ parent_context=context,
1092
+ runtime_context=getattr(context, '_runtime_context', None), # Inherit trace context
1093
+ )
1094
+ else:
1095
+ # FunctionContext or other - create new AgentContext
1096
+ import uuid
1097
+ run_id = f"{context.run_id}:agent:{self.name}"
1098
+ context = AgentContext(
1099
+ run_id=run_id,
1100
+ agent_name=self.name,
1101
+ parent_context=context, # Inherit streaming context
1102
+ runtime_context=getattr(context, '_runtime_context', None), # Inherit trace context
1103
+ )
1104
+
1105
+ # NOTE: agent.started checkpoint is NOT sent here
1106
+ # run_sync() calls run() which yields Event.agent_started
1107
+ # The worker processes this and sends the checkpoint
1108
+
1109
+ # NEW: Check if this is a resume from HITL
1110
+ if workflow_ctx and hasattr(workflow_ctx, "_agent_resume_info"):
1111
+ resume_info = workflow_ctx._agent_resume_info
1112
+ if resume_info["agent_name"] == self.name:
1113
+ self.logger.info("Detected HITL resume, calling resume_from_hitl()")
1114
+
1115
+ # Clear resume info to avoid re-entry
1116
+ delattr(workflow_ctx, "_agent_resume_info")
1117
+
1118
+ # Resume from checkpoint (context setup happens inside resume_from_hitl)
1119
+ return await self.resume_from_hitl(
1120
+ context=workflow_ctx,
1121
+ agent_context=resume_info["agent_context"],
1122
+ user_response=resume_info["user_response"],
1123
+ )
1124
+
1125
+ # Set context in task-local storage for automatic propagation to tools and LM calls
1126
+ token = set_current_context(context)
1127
+ try:
1128
+ try:
1129
+ # Build conversation messages
1130
+ messages: List[Message] = []
1131
+
1132
+ # 1. Start with explicitly provided history (if any)
1133
+ if history:
1134
+ messages.extend(history)
1135
+ self.logger.debug(f"Prepended {len(history)} messages from explicit history")
1136
+
1137
+ # 2. Load conversation history from state (if AgentContext)
1138
+ if isinstance(context, AgentContext):
1139
+ stored_messages = await context.get_conversation_history()
1140
+ messages.extend(stored_messages)
1141
+
1142
+ # 3. Add new user message
1143
+ messages.append(Message.user(user_message))
1144
+
1145
+ # 4. Save updated conversation to context storage
1146
+ if isinstance(context, AgentContext):
1147
+ # Only save the stored + new message (not the explicit history)
1148
+ messages_to_save = stored_messages + [Message.user(user_message)] if history else messages
1149
+ await context.save_conversation_history(messages_to_save)
1150
+
1151
+ # Create span for agent execution (uses contextvar for async-safe parent-child linking)
1152
+ from ..tracing import create_span
1153
+
1154
+ with create_span(
1155
+ self.name,
1156
+ "agent",
1157
+ context._runtime_context if hasattr(context, "_runtime_context") else None,
1158
+ {
1159
+ "agent.name": self.name,
1160
+ "agent.model": self.model_name, # Use model_name (always a string)
1161
+ "agent.max_iterations": str(self.max_iterations),
1162
+ "input.data": _serialize_tool_result({"message": user_message}),
1163
+ },
1164
+ ) as span:
1165
+ all_tool_calls: List[Dict[str, Any]] = []
1166
+ import time as _time
1167
+
1168
+ # NOTE: agent.started checkpoint is NOT sent here
1169
+ # The caller (run()) yields Event.agent_started which the worker processes
1170
+
1171
+ # Render system prompt with context variables
1172
+ rendered_instructions = self._render_prompt(self.instructions, prompt_context)
1173
+ if prompt_context:
1174
+ self.logger.debug(f"Rendered system prompt with {len(prompt_context)} context variables")
1175
+
1176
+ # Reasoning loop
1177
+ for iteration in range(self.max_iterations):
1178
+ iteration_start_time = _time.time()
1179
+
1180
+ # Emit iteration started checkpoint
1181
+ if workflow_ctx:
1182
+ workflow_ctx._send_checkpoint("agent.iteration.started", {
1183
+ "agent.name": self.name,
1184
+ "iteration": iteration + 1,
1185
+ "max_iterations": self.max_iterations,
1186
+ })
1187
+
1188
+ # Build tool definitions for LLM
1189
+ tool_defs = [
1190
+ ToolDefinition(
1191
+ name=tool.name,
1192
+ description=tool.description,
1193
+ parameters=tool.input_schema,
1194
+ )
1195
+ for tool in self.tools.values()
1196
+ ]
1197
+
1198
+ # Convert messages to dict format for lm.generate()
1199
+ messages_dict = []
1200
+ for msg in messages:
1201
+ messages_dict.append({
1202
+ "role": msg.role.value,
1203
+ "content": msg.content
1204
+ })
1205
+
1206
+ # Call LLM
1207
+ # Check if we have a legacy LanguageModel instance or need to create one
1208
+ if self._language_model is not None:
1209
+ # Legacy API: use provided LanguageModel instance
1210
+ request = GenerateRequest(
1211
+ model="mock-model", # Not used by MockLanguageModel
1212
+ system_prompt=rendered_instructions,
1213
+ messages=messages,
1214
+ tools=tool_defs if tool_defs else [],
1215
+ )
1216
+ request.config.temperature = self.temperature
1217
+ if self.max_tokens:
1218
+ request.config.max_tokens = self.max_tokens
1219
+ if self.top_p:
1220
+ request.config.top_p = self.top_p
1221
+ response = await self._language_model.generate(request)
1222
+
1223
+ # Track cost for this LLM call
1224
+ self._track_llm_cost(response, workflow_ctx)
1225
+ else:
1226
+ # New API: model is a string, create internal LM instance
1227
+ request = GenerateRequest(
1228
+ model=self.model,
1229
+ system_prompt=rendered_instructions,
1230
+ messages=messages,
1231
+ tools=tool_defs if tool_defs else [],
1232
+ )
1233
+ request.config.temperature = self.temperature
1234
+ if self.max_tokens:
1235
+ request.config.max_tokens = self.max_tokens
1236
+ if self.top_p:
1237
+ request.config.top_p = self.top_p
1238
+
1239
+ # Create internal LM instance for generation
1240
+ # TODO: Use model_config when provided
1241
+ from ..lm import _LanguageModel
1242
+ provider, model_name = self.model.split('/', 1)
1243
+ internal_lm = _LanguageModel(provider=provider.lower(), default_model=None)
1244
+ response = await internal_lm.generate(request)
1245
+
1246
+ # Track cost for this LLM call
1247
+ self._track_llm_cost(response, workflow_ctx)
1248
+
1249
+ # Add assistant response to messages
1250
+ messages.append(Message.assistant(response.text))
1251
+
1252
+ # Check if LLM wants to use tools
1253
+ if response.tool_calls:
1254
+ self.logger.debug(f"Agent calling {len(response.tool_calls)} tool(s)")
1255
+
1256
+ # Store current conversation in context for potential handoffs
1257
+ # Use a simple dict attribute since we don't need full state persistence for this
1258
+ if not hasattr(context, '_agent_data'):
1259
+ context._agent_data = {}
1260
+ context._agent_data["_current_conversation"] = messages
1261
+
1262
+ # Execute tool calls
1263
+ tool_results = []
1264
+ for tool_call in response.tool_calls:
1265
+ tool_name = tool_call["name"]
1266
+ tool_args_str = tool_call["arguments"]
1267
+
1268
+ # Track tool call
1269
+ all_tool_calls.append(
1270
+ {
1271
+ "name": tool_name,
1272
+ "arguments": tool_args_str,
1273
+ "iteration": iteration + 1,
1274
+ }
1275
+ )
1276
+
1277
+ # Execute tool
1278
+ try:
1279
+ # Parse arguments
1280
+ tool_args = json.loads(tool_args_str)
1281
+
1282
+ # Get tool
1283
+ tool = self.tools.get(tool_name)
1284
+ if not tool:
1285
+ result_text = f"Error: Tool '{tool_name}' not found"
1286
+ else:
1287
+ # Execute tool
1288
+ result = await tool.invoke(context, **tool_args)
1289
+
1290
+ # Check if this was a handoff
1291
+ if isinstance(result, dict) and result.get("_handoff"):
1292
+ self.logger.info(
1293
+ f"Handoff detected to '{result['to_agent']}', "
1294
+ f"terminating current agent"
1295
+ )
1296
+ # Save conversation before returning
1297
+ if isinstance(context, AgentContext):
1298
+ await context.save_conversation_history(messages)
1299
+ # Add output data to span for trace visibility
1300
+ span.set_attribute("output.data", _serialize_tool_result(result["output"]))
1301
+ # Return immediately with handoff result
1302
+ return AgentResult(
1303
+ output=result["output"],
1304
+ tool_calls=all_tool_calls + result.get("tool_calls", []),
1305
+ context=context,
1306
+ handoff_to=result["to_agent"],
1307
+ handoff_metadata=result,
1308
+ )
1309
+
1310
+ result_text = _serialize_tool_result(result)
1311
+
1312
+ tool_results.append(
1313
+ {"tool": tool_name, "result": result_text, "error": None}
1314
+ )
1315
+
1316
+ except WaitingForUserInputException as e:
1317
+ # HITL PAUSE: Capture agent state and propagate exception
1318
+ self.logger.info(f"Agent pausing for user input at iteration {iteration}")
1319
+
1320
+ # Serialize messages to dict format
1321
+ messages_dict = [
1322
+ {"role": msg.role.value, "content": msg.content}
1323
+ for msg in messages
1324
+ ]
1325
+
1326
+ # Enhance exception with agent execution context
1327
+ raise WaitingForUserInputException(
1328
+ question=e.question,
1329
+ input_type=e.input_type,
1330
+ options=e.options,
1331
+ checkpoint_state=e.checkpoint_state,
1332
+ agent_context={
1333
+ "agent_name": self.name,
1334
+ "iteration": iteration,
1335
+ "messages": messages_dict,
1336
+ "tool_results": tool_results,
1337
+ "pending_tool_call": {
1338
+ "name": tool_call["name"],
1339
+ "arguments": tool_call["arguments"],
1340
+ "tool_call_index": response.tool_calls.index(tool_call),
1341
+ },
1342
+ "all_tool_calls": all_tool_calls,
1343
+ "model_config": {
1344
+ "model": self.model,
1345
+ "temperature": self.temperature,
1346
+ "max_tokens": self.max_tokens,
1347
+ "top_p": self.top_p,
1348
+ },
1349
+ },
1350
+ ) from e
1351
+
1352
+ except Exception as e:
1353
+ # Regular tool errors - log and continue
1354
+ self.logger.error(f"Tool execution error: {e}")
1355
+ tool_results.append(
1356
+ {"tool": tool_name, "result": None, "error": str(e)}
1357
+ )
1358
+
1359
+ # Add tool results to conversation
1360
+ results_text = "\n".join(
1361
+ [
1362
+ f"Tool: {tr['tool']}\nResult: {tr['result']}"
1363
+ if tr["error"] is None
1364
+ else f"Tool: {tr['tool']}\nError: {tr['error']}"
1365
+ for tr in tool_results
1366
+ ]
1367
+ )
1368
+ messages.append(Message.user(f"Tool results:\n{results_text}\n\nPlease provide your final answer based on these results."))
1369
+
1370
+ # Emit iteration completed checkpoint (with tool calls)
1371
+ iteration_duration_ms = int((_time.time() - iteration_start_time) * 1000)
1372
+ if workflow_ctx:
1373
+ workflow_ctx._send_checkpoint("agent.iteration.completed", {
1374
+ "agent.name": self.name,
1375
+ "iteration": iteration + 1,
1376
+ "duration_ms": iteration_duration_ms,
1377
+ "has_tool_calls": True,
1378
+ "tool_calls_count": len(tool_results),
1379
+ })
1380
+
1381
+ # Continue loop for agent to process results
1382
+
1383
+ else:
1384
+ # No tool calls - agent is done
1385
+ self.logger.debug(f"Agent completed after {iteration + 1} iterations")
1386
+
1387
+ # Emit iteration completed checkpoint
1388
+ iteration_duration_ms = int((_time.time() - iteration_start_time) * 1000)
1389
+ if workflow_ctx:
1390
+ workflow_ctx._send_checkpoint("agent.iteration.completed", {
1391
+ "agent.name": self.name,
1392
+ "iteration": iteration + 1,
1393
+ "duration_ms": iteration_duration_ms,
1394
+ "has_tool_calls": False,
1395
+ })
1396
+
1397
+ # Save conversation before returning
1398
+ if isinstance(context, AgentContext):
1399
+ await context.save_conversation_history(messages)
1400
+
1401
+ # Emit completion checkpoint
1402
+ if workflow_ctx:
1403
+ workflow_ctx._send_checkpoint("agent.completed", {
1404
+ "agent.name": self.name,
1405
+ "agent.iterations": iteration + 1,
1406
+ "agent.tool_calls_count": len(all_tool_calls),
1407
+ "output_length": len(response.text),
1408
+ })
1409
+
1410
+ # Add output data to span for trace visibility
1411
+ span.set_attribute("output.data", _serialize_tool_result(response.text))
1412
+
1413
+ return AgentResult(
1414
+ output=response.text,
1415
+ tool_calls=all_tool_calls,
1416
+ context=context,
1417
+ )
1418
+
1419
+ # Max iterations reached
1420
+ self.logger.warning(f"Agent reached max iterations ({self.max_iterations})")
1421
+ final_output = messages[-1].content if messages else "No output generated"
1422
+
1423
+ # Emit max iterations reached checkpoint (separate event for metrics)
1424
+ if workflow_ctx:
1425
+ workflow_ctx._send_checkpoint("agent.max_iterations.reached", {
1426
+ "agent.name": self.name,
1427
+ "max_iterations": self.max_iterations,
1428
+ "tool_calls_count": len(all_tool_calls),
1429
+ })
1430
+
1431
+ # Save conversation before returning
1432
+ if isinstance(context, AgentContext):
1433
+ await context.save_conversation_history(messages)
1434
+
1435
+ # Emit completion checkpoint with max iterations flag
1436
+ if workflow_ctx:
1437
+ workflow_ctx._send_checkpoint("agent.completed", {
1438
+ "agent.name": self.name,
1439
+ "agent.iterations": self.max_iterations,
1440
+ "agent.tool_calls_count": len(all_tool_calls),
1441
+ "agent.max_iterations_reached": True,
1442
+ "output_length": len(final_output),
1443
+ })
1444
+
1445
+ # Add output data to span for trace visibility
1446
+ span.set_attribute("output.data", _serialize_tool_result(final_output))
1447
+
1448
+ return AgentResult(
1449
+ output=final_output,
1450
+ tool_calls=all_tool_calls,
1451
+ context=context,
1452
+ )
1453
+ except Exception as e:
1454
+ # Emit error checkpoint for observability
1455
+ if workflow_ctx:
1456
+ workflow_ctx._send_checkpoint("agent.failed", {
1457
+ "agent.name": self.name,
1458
+ "error": str(e),
1459
+ "error_type": type(e).__name__,
1460
+ })
1461
+ raise
1462
+ finally:
1463
+ # Always reset context to prevent leakage between agent executions
1464
+ from ..context import _current_context
1465
+ _current_context.reset(token)
1466
+
1467
+ async def resume_from_hitl(
1468
+ self,
1469
+ context: Context,
1470
+ agent_context: Dict,
1471
+ user_response: str,
1472
+ ) -> AgentResult:
1473
+ """
1474
+ Resume agent execution after HITL pause.
1475
+
1476
+ This method reconstructs agent state from the checkpoint and injects
1477
+ the user's response as the successful tool result, then continues
1478
+ the conversation loop.
1479
+
1480
+ Args:
1481
+ context: Current execution context (workflow or agent)
1482
+ agent_context: Agent state from WaitingForUserInputException.agent_context
1483
+ user_response: User's answer to the HITL question
1484
+
1485
+ Returns:
1486
+ AgentResult with final output and tool calls
1487
+ """
1488
+ self.logger.info(f"Resuming agent '{self.name}' from HITL pause")
1489
+
1490
+ # 1. Restore conversation state
1491
+ messages = [
1492
+ Message(role=lm.MessageRole(msg["role"]), content=msg["content"])
1493
+ for msg in agent_context["messages"]
1494
+ ]
1495
+ iteration = agent_context["iteration"]
1496
+ all_tool_calls = agent_context["all_tool_calls"]
1497
+
1498
+ # 2. Restore partial tool results for current iteration
1499
+ tool_results = agent_context["tool_results"]
1500
+
1501
+ # 3. Inject user response as successful tool result
1502
+ pending_tool = agent_context["pending_tool_call"]
1503
+ tool_results.append({
1504
+ "tool": pending_tool["name"],
1505
+ "result": json.dumps(user_response),
1506
+ "error": None,
1507
+ })
1508
+
1509
+ self.logger.debug(
1510
+ f"Injected user response for tool '{pending_tool['name']}': {user_response}"
1511
+ )
1512
+
1513
+ # 4. Add tool results to conversation
1514
+ results_text = "\n".join([
1515
+ f"Tool: {tr['tool']}\nResult: {tr['result']}"
1516
+ if tr["error"] is None
1517
+ else f"Tool: {tr['tool']}\nError: {tr['error']}"
1518
+ for tr in tool_results
1519
+ ])
1520
+ messages.append(Message.user(
1521
+ f"Tool results:\n{results_text}\n\n"
1522
+ f"Please provide your final answer based on these results."
1523
+ ))
1524
+
1525
+ # 5. Continue agent execution loop from next iteration
1526
+ return await self._continue_execution_from_iteration(
1527
+ context=context,
1528
+ messages=messages,
1529
+ iteration=iteration + 1, # Next iteration
1530
+ all_tool_calls=all_tool_calls,
1531
+ )
1532
+
1533
+ async def _continue_execution_from_iteration(
1534
+ self,
1535
+ context: Context,
1536
+ messages: List[Message],
1537
+ iteration: int,
1538
+ all_tool_calls: List[Dict],
1539
+ ) -> AgentResult:
1540
+ """
1541
+ Continue agent execution from a specific iteration.
1542
+
1543
+ This is the core execution loop extracted to support both:
1544
+ 1. Normal execution (starting from iteration 0)
1545
+ 2. Resume after HITL (starting from iteration N)
1546
+
1547
+ Args:
1548
+ context: Execution context
1549
+ messages: Conversation history
1550
+ iteration: Starting iteration number
1551
+ all_tool_calls: Accumulated tool calls
1552
+
1553
+ Returns:
1554
+ AgentResult with output and tool calls
1555
+ """
1556
+ # Extract workflow context for checkpointing
1557
+ workflow_ctx = None
1558
+ if hasattr(context, "_workflow_entity"):
1559
+ workflow_ctx = context
1560
+ elif hasattr(context, "_agent_data") and "_workflow_ctx" in context._agent_data:
1561
+ workflow_ctx = context._agent_data["_workflow_ctx"]
1562
+
1563
+ # Prepare tool definitions
1564
+ tool_defs = [
1565
+ ToolDefinition(
1566
+ name=name,
1567
+ description=tool.description or f"Tool: {name}",
1568
+ parameters=tool.input_schema if hasattr(tool, "input_schema") else {},
1569
+ )
1570
+ for name, tool in self.tools.items()
1571
+ ]
1572
+
1573
+ # Main iteration loop (continue from specified iteration)
1574
+ while iteration < self.max_iterations:
1575
+ self.logger.debug(f"Agent iteration {iteration + 1}/{self.max_iterations}")
1576
+
1577
+ # Call LLM for next response
1578
+ if self._language_model:
1579
+ # Legacy API: model is a LanguageModel instance
1580
+ request = GenerateRequest(
1581
+ system_prompt=self.instructions,
1582
+ messages=messages,
1583
+ tools=tool_defs if tool_defs else [],
1584
+ )
1585
+ request.config.temperature = self.temperature
1586
+ if self.max_tokens:
1587
+ request.config.max_tokens = self.max_tokens
1588
+ if self.top_p:
1589
+ request.config.top_p = self.top_p
1590
+ response = await self._language_model.generate(request)
1591
+
1592
+ # Track cost for this LLM call
1593
+ self._track_llm_cost(response, workflow_ctx)
1594
+ else:
1595
+ # New API: model is a string, create internal LM instance
1596
+ request = GenerateRequest(
1597
+ model=self.model,
1598
+ system_prompt=self.instructions,
1599
+ messages=messages,
1600
+ tools=tool_defs if tool_defs else [],
1601
+ )
1602
+ request.config.temperature = self.temperature
1603
+ if self.max_tokens:
1604
+ request.config.max_tokens = self.max_tokens
1605
+ if self.top_p:
1606
+ request.config.top_p = self.top_p
1607
+
1608
+ # Create internal LM instance for generation
1609
+ from ..lm import _LanguageModel
1610
+ provider, model_name = self.model.split('/', 1)
1611
+ internal_lm = _LanguageModel(provider=provider.lower(), default_model=None)
1612
+ response = await internal_lm.generate(request)
1613
+
1614
+ # Track cost for this LLM call
1615
+ self._track_llm_cost(response, workflow_ctx)
1616
+
1617
+ # Add assistant response to messages
1618
+ messages.append(Message.assistant(response.text))
1619
+
1620
+ # Check if LLM wants to use tools
1621
+ if response.tool_calls:
1622
+ self.logger.debug(f"Agent calling {len(response.tool_calls)} tool(s)")
1623
+
1624
+ # Store current conversation in context for potential handoffs
1625
+ if not hasattr(context, '_agent_data'):
1626
+ context._agent_data = {}
1627
+ context._agent_data["_current_conversation"] = messages
1628
+
1629
+ # Execute tool calls
1630
+ tool_results = []
1631
+ for tool_call in response.tool_calls:
1632
+ tool_name = tool_call["name"]
1633
+ tool_args_str = tool_call["arguments"]
1634
+
1635
+ # Track tool call
1636
+ all_tool_calls.append({
1637
+ "name": tool_name,
1638
+ "arguments": tool_args_str,
1639
+ "iteration": iteration + 1,
1640
+ })
1641
+
1642
+ # Execute tool
1643
+ try:
1644
+ # Parse arguments
1645
+ tool_args = json.loads(tool_args_str)
1646
+
1647
+ # Get tool
1648
+ tool = self.tools.get(tool_name)
1649
+ if not tool:
1650
+ result_text = f"Error: Tool '{tool_name}' not found"
1651
+ else:
1652
+ # Execute tool
1653
+ result = await tool.invoke(context, **tool_args)
1654
+
1655
+ # Check if this was a handoff
1656
+ if isinstance(result, dict) and result.get("_handoff"):
1657
+ self.logger.info(
1658
+ f"Handoff detected to '{result['to_agent']}', "
1659
+ f"terminating current agent"
1660
+ )
1661
+ # Save conversation before returning
1662
+ if isinstance(context, AgentContext):
1663
+ await context.save_conversation_history(messages)
1664
+ # Return immediately with handoff result
1665
+ return AgentResult(
1666
+ output=result["output"],
1667
+ tool_calls=all_tool_calls + result.get("tool_calls", []),
1668
+ context=context,
1669
+ handoff_to=result["to_agent"],
1670
+ handoff_metadata=result,
1671
+ )
1672
+
1673
+ result_text = _serialize_tool_result(result)
1674
+
1675
+ tool_results.append(
1676
+ {"tool": tool_name, "result": result_text, "error": None}
1677
+ )
1678
+
1679
+ except WaitingForUserInputException as e:
1680
+ # HITL PAUSE: Capture agent state and propagate exception
1681
+ self.logger.info(f"Agent pausing for user input at iteration {iteration}")
1682
+
1683
+ # Serialize messages to dict format
1684
+ messages_dict = [
1685
+ {"role": msg.role.value, "content": msg.content}
1686
+ for msg in messages
1687
+ ]
1688
+
1689
+ # Enhance exception with agent execution context
1690
+ from ..exceptions import WaitingForUserInputException
1691
+ raise WaitingForUserInputException(
1692
+ question=e.question,
1693
+ input_type=e.input_type,
1694
+ options=e.options,
1695
+ checkpoint_state=e.checkpoint_state,
1696
+ agent_context={
1697
+ "agent_name": self.name,
1698
+ "iteration": iteration,
1699
+ "messages": messages_dict,
1700
+ "tool_results": tool_results,
1701
+ "pending_tool_call": {
1702
+ "name": tool_call["name"],
1703
+ "arguments": tool_call["arguments"],
1704
+ "tool_call_index": response.tool_calls.index(tool_call),
1705
+ },
1706
+ "all_tool_calls": all_tool_calls,
1707
+ "model_config": {
1708
+ "model": self.model,
1709
+ "temperature": self.temperature,
1710
+ "max_tokens": self.max_tokens,
1711
+ "top_p": self.top_p,
1712
+ },
1713
+ },
1714
+ ) from e
1715
+
1716
+ except Exception as e:
1717
+ # Regular tool errors - log and continue
1718
+ self.logger.error(f"Tool execution error: {e}")
1719
+ tool_results.append(
1720
+ {"tool": tool_name, "result": None, "error": str(e)}
1721
+ )
1722
+
1723
+ # Add tool results to conversation
1724
+ results_text = "\n".join([
1725
+ f"Tool: {tr['tool']}\nResult: {tr['result']}"
1726
+ if tr["error"] is None
1727
+ else f"Tool: {tr['tool']}\nError: {tr['error']}"
1728
+ for tr in tool_results
1729
+ ])
1730
+ messages.append(Message.user(
1731
+ f"Tool results:\n{results_text}\n\n"
1732
+ f"Please provide your final answer based on these results."
1733
+ ))
1734
+
1735
+ # Continue loop for agent to process results
1736
+
1737
+ else:
1738
+ # No tool calls - agent is done
1739
+ self.logger.debug(f"Agent completed after {iteration + 1} iterations")
1740
+ # Save conversation before returning
1741
+ if isinstance(context, AgentContext):
1742
+ await context.save_conversation_history(messages)
1743
+
1744
+ # Emit completion checkpoint
1745
+ if workflow_ctx:
1746
+ workflow_ctx._send_checkpoint("agent.completed", {
1747
+ "agent.name": self.name,
1748
+ "agent.iterations": iteration + 1,
1749
+ "agent.tool_calls_count": len(all_tool_calls),
1750
+ "output_length": len(response.text),
1751
+ })
1752
+
1753
+ return AgentResult(
1754
+ output=response.text,
1755
+ tool_calls=all_tool_calls,
1756
+ context=context,
1757
+ )
1758
+
1759
+ iteration += 1
1760
+
1761
+ # Max iterations reached
1762
+ self.logger.warning(f"Agent reached max iterations ({self.max_iterations})")
1763
+ final_output = messages[-1].content if messages else "No output generated"
1764
+ # Save conversation before returning
1765
+ if isinstance(context, AgentContext):
1766
+ await context.save_conversation_history(messages)
1767
+
1768
+ # Emit completion checkpoint with max iterations flag
1769
+ if workflow_ctx:
1770
+ workflow_ctx._send_checkpoint("agent.completed", {
1771
+ "agent.name": self.name,
1772
+ "agent.iterations": self.max_iterations,
1773
+ "agent.tool_calls_count": len(all_tool_calls),
1774
+ "agent.max_iterations_reached": True,
1775
+ "output_length": len(final_output),
1776
+ })
1777
+
1778
+ return AgentResult(
1779
+ output=final_output,
1780
+ tool_calls=all_tool_calls,
1781
+ context=context,
1782
+ )