daita-agents 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. daita/__init__.py +216 -0
  2. daita/agents/__init__.py +33 -0
  3. daita/agents/base.py +743 -0
  4. daita/agents/substrate.py +1141 -0
  5. daita/cli/__init__.py +145 -0
  6. daita/cli/__main__.py +7 -0
  7. daita/cli/ascii_art.py +44 -0
  8. daita/cli/core/__init__.py +0 -0
  9. daita/cli/core/create.py +254 -0
  10. daita/cli/core/deploy.py +473 -0
  11. daita/cli/core/deployments.py +309 -0
  12. daita/cli/core/import_detector.py +219 -0
  13. daita/cli/core/init.py +481 -0
  14. daita/cli/core/logs.py +239 -0
  15. daita/cli/core/managed_deploy.py +709 -0
  16. daita/cli/core/run.py +648 -0
  17. daita/cli/core/status.py +421 -0
  18. daita/cli/core/test.py +239 -0
  19. daita/cli/core/webhooks.py +172 -0
  20. daita/cli/main.py +588 -0
  21. daita/cli/utils.py +541 -0
  22. daita/config/__init__.py +62 -0
  23. daita/config/base.py +159 -0
  24. daita/config/settings.py +184 -0
  25. daita/core/__init__.py +262 -0
  26. daita/core/decision_tracing.py +701 -0
  27. daita/core/exceptions.py +480 -0
  28. daita/core/focus.py +251 -0
  29. daita/core/interfaces.py +76 -0
  30. daita/core/plugin_tracing.py +550 -0
  31. daita/core/relay.py +779 -0
  32. daita/core/reliability.py +381 -0
  33. daita/core/scaling.py +459 -0
  34. daita/core/tools.py +554 -0
  35. daita/core/tracing.py +770 -0
  36. daita/core/workflow.py +1144 -0
  37. daita/display/__init__.py +1 -0
  38. daita/display/console.py +160 -0
  39. daita/execution/__init__.py +58 -0
  40. daita/execution/client.py +856 -0
  41. daita/execution/exceptions.py +92 -0
  42. daita/execution/models.py +317 -0
  43. daita/llm/__init__.py +60 -0
  44. daita/llm/anthropic.py +291 -0
  45. daita/llm/base.py +530 -0
  46. daita/llm/factory.py +101 -0
  47. daita/llm/gemini.py +355 -0
  48. daita/llm/grok.py +219 -0
  49. daita/llm/mock.py +172 -0
  50. daita/llm/openai.py +220 -0
  51. daita/plugins/__init__.py +141 -0
  52. daita/plugins/base.py +37 -0
  53. daita/plugins/base_db.py +167 -0
  54. daita/plugins/elasticsearch.py +849 -0
  55. daita/plugins/mcp.py +481 -0
  56. daita/plugins/mongodb.py +520 -0
  57. daita/plugins/mysql.py +362 -0
  58. daita/plugins/postgresql.py +342 -0
  59. daita/plugins/redis_messaging.py +500 -0
  60. daita/plugins/rest.py +537 -0
  61. daita/plugins/s3.py +770 -0
  62. daita/plugins/slack.py +729 -0
  63. daita/utils/__init__.py +18 -0
  64. daita_agents-0.2.0.dist-info/METADATA +409 -0
  65. daita_agents-0.2.0.dist-info/RECORD +69 -0
  66. daita_agents-0.2.0.dist-info/WHEEL +5 -0
  67. daita_agents-0.2.0.dist-info/entry_points.txt +2 -0
  68. daita_agents-0.2.0.dist-info/licenses/LICENSE +56 -0
  69. daita_agents-0.2.0.dist-info/top_level.txt +1 -0
daita/agents/base.py ADDED
@@ -0,0 +1,743 @@
1
+ """
2
+ Updated BaseAgent with Unified Tracing Integration
3
+
4
+ This replaces the old BaseAgent to use the new unified tracing system.
5
+ All operations are automatically traced without user configuration.
6
+
7
+ Key Changes:
8
+ - Removed old metrics system completely
9
+ - Integrated automatic tracing for all operations
10
+ - Added decision tracing for retry logic
11
+ - Automatic agent lifecycle tracing
12
+ - Zero configuration required
13
+ """
14
+
15
+ import asyncio
16
+ import logging
17
+ import uuid
18
+ import random
19
+ import time
20
+ from datetime import datetime, timezone
21
+ from typing import Any, Dict, List, Optional, Union
22
+
23
+ from ..config.base import AgentConfig, AgentType, RetryStrategy, RetryPolicy
24
+ from ..core.interfaces import Agent, LLMProvider
25
+ from ..core.exceptions import DaitaError, AgentError, LLMError, BackpressureError, TaskTimeoutError
26
+ from ..core.tracing import get_trace_manager, TraceType, TraceStatus
27
+ from ..core.decision_tracing import record_decision_point, DecisionType
28
+ from ..core.reliability import (
29
+ TaskManager, get_global_task_manager, TaskStatus,
30
+ BackpressureController
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ class BaseAgent(Agent):
36
+ """
37
+ Base implementation for all Daita agents with automatic tracing.
38
+
39
+ Every operation is automatically traced and sent to the dashboard.
40
+ Users don't need to configure anything - tracing just works.
41
+
42
+ Features:
43
+ - Automatic operation tracing
44
+ - Retry decision tracing with confidence scores
45
+ - Agent lifecycle tracing
46
+ - LLM integration with automatic token tracking
47
+ - Performance monitoring
48
+ - Error tracking and correlation
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ config: AgentConfig,
54
+ llm_provider: Optional[LLMProvider] = None,
55
+ agent_id: Optional[str] = None,
56
+ name: Optional[str] = None,
57
+ enable_reliability: bool = False,
58
+ max_concurrent_tasks: int = 10,
59
+ max_queue_size: int = 100,
60
+ ):
61
+ self.config = config
62
+ self.llm = llm_provider
63
+ self.name = name or config.name
64
+ self.agent_type = config.type
65
+ self.enable_reliability = enable_reliability
66
+
67
+ # Generate unique ID
68
+ if agent_id:
69
+ self.agent_id = agent_id
70
+ elif self.name:
71
+ slug = self.name.lower().replace(' ', '_').replace('-', '_')
72
+ self.agent_id = f"{slug}_{uuid.uuid4().hex[:8]}"
73
+ else:
74
+ self.agent_id = f"{self.__class__.__name__}_{uuid.uuid4().hex[:8]}"
75
+
76
+ # Runtime state
77
+ self._running = False
78
+ self._tasks = []
79
+
80
+ # Get trace manager for automatic tracing
81
+ self.trace_manager = get_trace_manager()
82
+
83
+ # Reliability features (enabled when reliability is configured)
84
+ self.task_manager = get_global_task_manager() if enable_reliability else None
85
+ self.backpressure_controller = None
86
+ if enable_reliability:
87
+ self.backpressure_controller = BackpressureController(
88
+ max_concurrent_tasks=max_concurrent_tasks,
89
+ max_queue_size=max_queue_size,
90
+ agent_id=self.agent_id
91
+ )
92
+
93
+ # Set agent ID in LLM provider for automatic LLM tracing
94
+ if self.llm:
95
+ self.llm.set_agent_id(self.agent_id)
96
+
97
+ logger.debug(f"Agent {self.name} ({self.agent_id}) initialized with automatic tracing")
98
+
99
+ async def start(self) -> None:
100
+ """Start the agent with automatic lifecycle tracing."""
101
+ if self._running:
102
+ return
103
+
104
+ # Start decision display if enabled
105
+ if hasattr(self, '_decision_display') and self._decision_display:
106
+ self._decision_display.start()
107
+
108
+ # Automatically trace agent lifecycle
109
+ async with self.trace_manager.span(
110
+ operation_name="agent_start",
111
+ trace_type=TraceType.AGENT_LIFECYCLE,
112
+ agent_id=self.agent_id,
113
+ agent_name=self.name,
114
+ agent_type=self.agent_type.value,
115
+ retry_enabled=str(self.config.retry_enabled)
116
+ ):
117
+ self._running = True
118
+ logger.info(f"Agent {self.name} started")
119
+
120
+ async def stop(self) -> None:
121
+ """Stop the agent with automatic lifecycle tracing."""
122
+ if not self._running:
123
+ return
124
+
125
+ # Stop decision display if enabled
126
+ if hasattr(self, '_decision_display') and self._decision_display:
127
+ self._decision_display.stop()
128
+ # Cleanup decision streaming registration
129
+ try:
130
+ from ..core.decision_tracing import unregister_agent_decision_stream
131
+ unregister_agent_decision_stream(
132
+ agent_id=self.agent_id,
133
+ callback=self._decision_display.handle_event
134
+ )
135
+ except Exception as e:
136
+ logger.debug(f"Failed to cleanup decision display: {e}")
137
+
138
+ # Automatically trace agent lifecycle
139
+ async with self.trace_manager.span(
140
+ operation_name="agent_stop",
141
+ trace_type=TraceType.AGENT_LIFECYCLE,
142
+ agent_id=self.agent_id,
143
+ agent_name=self.name,
144
+ tasks_completed=str(len(self._tasks))
145
+ ):
146
+ # Cancel running tasks
147
+ for task in self._tasks:
148
+ if not task.done():
149
+ task.cancel()
150
+
151
+ if self._tasks:
152
+ await asyncio.gather(*self._tasks, return_exceptions=True)
153
+ self._tasks.clear()
154
+
155
+ self._running = False
156
+ logger.info(f"Agent {self.name} stopped")
157
+
158
+ async def _process(
159
+ self,
160
+ task: str,
161
+ data: Any = None,
162
+ context: Optional[Dict[str, Any]] = None,
163
+ **kwargs
164
+ ) -> Dict[str, Any]:
165
+ """
166
+ INTERNAL: Process a task with reliability features and automatic tracing.
167
+
168
+ This is the internal infrastructure layer that provides:
169
+ - Retry logic with decision tracing
170
+ - Reliability features (backpressure, task tracking)
171
+ - Automatic tracing (AGENT_EXECUTION spans)
172
+ - Performance tracking
173
+ - Structured error handling
174
+
175
+ Users should NOT call this directly. Use public APIs:
176
+ - run() / run_detailed() for direct execution
177
+ - receive_message() for workflow integration
178
+ - on_webhook() for webhook triggers
179
+ - on_schedule() for scheduled tasks
180
+
181
+ Args:
182
+ task: Internal task identifier
183
+ data: Input data
184
+ context: Execution context with metadata
185
+ **kwargs: Additional arguments
186
+
187
+ Returns:
188
+ Task results with automatic tracing metadata
189
+ """
190
+ # Build full context
191
+ full_context = {
192
+ 'agent_id': self.agent_id,
193
+ 'agent_name': self.name,
194
+ 'agent_type': self.agent_type.value,
195
+ 'task': task,
196
+ 'retry_enabled': self.config.retry_enabled,
197
+ 'reliability_enabled': self.enable_reliability,
198
+ **(context or {}),
199
+ **kwargs
200
+ }
201
+
202
+ # Handle reliability features if enabled
203
+ if self.enable_reliability:
204
+ return await self._process_with_reliability(task, data, full_context)
205
+ else:
206
+ return await self._process_without_reliability(task, data, full_context)
207
+
208
+ async def _process_with_reliability(
209
+ self,
210
+ task: str,
211
+ data: Any,
212
+ context: Dict[str, Any]
213
+ ) -> Dict[str, Any]:
214
+ """Process task with full reliability features."""
215
+ # Track processing time
216
+ start_time = time.time()
217
+
218
+ # Check backpressure first
219
+ if self.backpressure_controller and not await self.backpressure_controller.acquire_processing_slot():
220
+ raise BackpressureError(
221
+ "Unable to acquire processing slot",
222
+ agent_id=self.agent_id,
223
+ queue_size=self.backpressure_controller.task_queue.qsize()
224
+ )
225
+
226
+ # Create task in task manager
227
+ task_id = None
228
+ if self.task_manager:
229
+ task_id = await self.task_manager.create_task(
230
+ agent_id=self.agent_id,
231
+ task_type=task,
232
+ data=data,
233
+ context=context
234
+ )
235
+ context['task_id'] = task_id
236
+ # Update task status to running
237
+ await self.task_manager.update_status(task_id, TaskStatus.RUNNING)
238
+
239
+ try:
240
+ # Automatically trace the entire operation
241
+ async with self.trace_manager.span(
242
+ operation_name=f"agent_process_{task}",
243
+ trace_type=TraceType.AGENT_EXECUTION,
244
+ agent_id=self.agent_id,
245
+ input_data=data,
246
+ agent_name=self.name,
247
+ task=task,
248
+ task_id=task_id,
249
+ retry_enabled=str(self.config.retry_enabled),
250
+ reliability_enabled="true"
251
+ ) as span_id:
252
+
253
+ # Execute with or without retry logic
254
+ if self.config.retry_enabled:
255
+ result = await self._process_with_retry(span_id, task, data, context)
256
+ else:
257
+ result = await self._process_fail_fast(span_id, task, data, context)
258
+
259
+ # Add processing time to result
260
+ processing_time_ms = (time.time() - start_time) * 1000
261
+ if isinstance(result, dict):
262
+ result['processing_time_ms'] = processing_time_ms
263
+
264
+ # Update task status to completed
265
+ if task_id and self.task_manager:
266
+ await self.task_manager.update_status(task_id, TaskStatus.COMPLETED)
267
+
268
+ return result
269
+
270
+ except Exception as e:
271
+ # Update task status to failed
272
+ if task_id and self.task_manager:
273
+ await self.task_manager.update_status(task_id, TaskStatus.FAILED, error=str(e))
274
+ raise
275
+
276
+ finally:
277
+ # Always release the processing slot
278
+ if self.backpressure_controller:
279
+ self.backpressure_controller.release_processing_slot()
280
+
281
+ async def _process_without_reliability(
282
+ self,
283
+ task: str,
284
+ data: Any,
285
+ context: Dict[str, Any]
286
+ ) -> Dict[str, Any]:
287
+ """Process task without reliability features (original behavior)."""
288
+ # Track processing time
289
+ start_time = time.time()
290
+
291
+ # Automatically trace the entire operation
292
+ async with self.trace_manager.span(
293
+ operation_name=f"agent_process_{task}",
294
+ trace_type=TraceType.AGENT_EXECUTION,
295
+ agent_id=self.agent_id,
296
+ input_data=data,
297
+ agent_name=self.name,
298
+ task=task,
299
+ retry_enabled=str(self.config.retry_enabled),
300
+ reliability_enabled="false"
301
+ ) as span_id:
302
+
303
+ # Execute with or without retry logic
304
+ if self.config.retry_enabled:
305
+ result = await self._process_with_retry(span_id, task, data, context)
306
+ else:
307
+ result = await self._process_fail_fast(span_id, task, data, context)
308
+
309
+ # Add processing time to result
310
+ processing_time_ms = (time.time() - start_time) * 1000
311
+ if isinstance(result, dict):
312
+ result['processing_time_ms'] = processing_time_ms
313
+
314
+ return result
315
+
316
+ async def _process_with_retry(
317
+ self,
318
+ parent_span_id: str,
319
+ task: str,
320
+ data: Any,
321
+ context: Dict[str, Any]
322
+ ) -> Dict[str, Any]:
323
+ """Process task with retry logic and automatic retry decision tracing."""
324
+ retry_policy = self.config.retry_policy
325
+ max_attempts = retry_policy.max_retries + 1
326
+ last_exception = None
327
+
328
+ for attempt in range(1, max_attempts + 1):
329
+ # Create a child span for each retry attempt
330
+ async with self.trace_manager.span(
331
+ operation_name=f"retry_attempt_{attempt}",
332
+ trace_type=TraceType.AGENT_EXECUTION,
333
+ agent_id=self.agent_id,
334
+ parent_span_id=parent_span_id,
335
+ attempt=str(attempt),
336
+ max_attempts=str(max_attempts),
337
+ is_retry=str(attempt > 1)
338
+ ) as attempt_span_id:
339
+
340
+ try:
341
+ # Add attempt info to context
342
+ attempt_context = {
343
+ **context,
344
+ 'attempt_number': attempt,
345
+ 'max_attempts': max_attempts,
346
+ 'is_retry': attempt > 1
347
+ }
348
+
349
+ # Execute the task
350
+ result = await self._process_once(task, data, attempt_context, attempt, max_attempts)
351
+
352
+ # Success!
353
+ if attempt > 1:
354
+ logger.info(f"Agent {self.name} succeeded on attempt {attempt}")
355
+
356
+ return self._format_success_response(result, attempt_context, attempt, max_attempts)
357
+
358
+ except Exception as e:
359
+ last_exception = e
360
+
361
+ # Should we retry? Use decision tracing to record the retry decision
362
+ if attempt < max_attempts:
363
+ should_retry = await self._should_retry_error_with_tracing(
364
+ e, attempt, max_attempts, attempt_span_id
365
+ )
366
+
367
+ if should_retry:
368
+ # Calculate delay and wait
369
+ delay = self._calculate_retry_delay(attempt - 1, retry_policy)
370
+ logger.debug(f"Agent {self.name} retrying in {delay:.2f}s")
371
+ await asyncio.sleep(delay)
372
+ continue
373
+
374
+ # Don't retry or no more attempts
375
+ logger.debug(f"Agent {self.name} not retrying: {type(e).__name__}")
376
+ return self._format_error_response(last_exception, context, attempt, max_attempts)
377
+
378
+ # All attempts exhausted
379
+ return self._format_error_response(
380
+ last_exception or Exception("Unknown error"), context, max_attempts, max_attempts
381
+ )
382
+
383
+ async def _process_fail_fast(
384
+ self,
385
+ span_id: str,
386
+ task: str,
387
+ data: Any,
388
+ context: Dict[str, Any]
389
+ ) -> Dict[str, Any]:
390
+ """Process task in fail-fast mode with error tracing."""
391
+ try:
392
+ result = await self._process_once(task, data, context, attempt=1, max_attempts=1)
393
+ return self._format_success_response(result, context, 1, 1)
394
+ except Exception as e:
395
+ logger.error(f"Error in agent {self.name} (fail-fast mode): {str(e)}")
396
+ return self._format_error_response(e, context, 1, 1)
397
+
398
+ async def _process_once(
399
+ self,
400
+ task: str,
401
+ data: Any,
402
+ context: Dict[str, Any],
403
+ attempt: int,
404
+ max_attempts: int
405
+ ) -> Dict[str, Any]:
406
+ """
407
+ Execute the task once without retry logic.
408
+
409
+ Subclasses should override this method for their specific behavior.
410
+ The automatic tracing happens at higher levels.
411
+ """
412
+ # Default implementation for base agent
413
+ return {
414
+ 'message': f'Agent {self.name} processed task "{task}"',
415
+ 'task': task,
416
+ 'data': data,
417
+ 'agent_id': self.agent_id,
418
+ 'agent_name': self.name,
419
+ 'attempt': attempt,
420
+ 'timestamp': datetime.now(timezone.utc).isoformat()
421
+ }
422
+
423
+ async def _should_retry_error_with_tracing(
424
+ self,
425
+ error: Exception,
426
+ attempt: int,
427
+ max_attempts: int,
428
+ span_id: str
429
+ ) -> bool:
430
+ """
431
+ Determine if an error should be retried with decision tracing.
432
+
433
+ This traces the retry decision-making process including confidence
434
+ scores and reasoning for better observability.
435
+ """
436
+ # Use decision tracing to record retry logic
437
+ async with record_decision_point("retry_decision", DecisionType.VALIDATION, self.agent_id) as decision:
438
+
439
+ # Import here to avoid circular imports
440
+ from ..core.exceptions import (
441
+ TransientError, RetryableError, PermanentError,
442
+ classify_exception
443
+ )
444
+
445
+ # Classify the error
446
+ error_class = classify_exception(error)
447
+ error_type = type(error).__name__
448
+
449
+ # Decision logic with reasoning
450
+ reasoning = []
451
+ should_retry = False
452
+ confidence = 0.0
453
+
454
+ # Check attempt limit
455
+ if attempt >= max_attempts:
456
+ reasoning.append(f"Max attempts reached ({attempt}/{max_attempts})")
457
+ should_retry = False
458
+ confidence = 1.0 # Certain we shouldn't retry
459
+
460
+ # Error classification logic
461
+ elif error_class == "transient":
462
+ reasoning.append(f"Transient error detected: {error_type}")
463
+ reasoning.append("Transient errors are typically safe to retry")
464
+ should_retry = True
465
+ confidence = 0.9
466
+
467
+ elif error_class == "retryable":
468
+ reasoning.append(f"Retryable error detected: {error_type}")
469
+ reasoning.append("Error may resolve on retry")
470
+ should_retry = True
471
+ confidence = 0.7
472
+
473
+ elif error_class == "permanent":
474
+ reasoning.append(f"Permanent error detected: {error_type}")
475
+ reasoning.append("Permanent errors should not be retried")
476
+ should_retry = False
477
+ confidence = 0.95
478
+
479
+ else:
480
+ # Unknown error - use heuristics
481
+ reasoning.append(f"Unknown error type: {error_type}")
482
+
483
+ if isinstance(error, (ValueError, TypeError, KeyError)):
484
+ reasoning.append("Logic/data error - likely permanent")
485
+ should_retry = False
486
+ confidence = 0.8
487
+ else:
488
+ reasoning.append("Unknown error - defaulting to retry")
489
+ should_retry = True
490
+ confidence = 0.5
491
+
492
+ # Record the decision
493
+ decision.set_confidence(confidence)
494
+ for reason in reasoning:
495
+ decision.add_reasoning(reason)
496
+
497
+ decision.set_factor("error_type", error_type)
498
+ decision.set_factor("error_class", error_class)
499
+ decision.set_factor("attempt", attempt)
500
+ decision.set_factor("max_attempts", max_attempts)
501
+
502
+ # Add alternatives considered
503
+ decision.add_alternative("retry" if not should_retry else "fail")
504
+
505
+ logger.debug(f"Retry decision for {error_type}: {should_retry} (confidence: {confidence:.2f})")
506
+ return should_retry
507
+
508
+ def _calculate_retry_delay(self, attempt: int, retry_policy) -> float:
509
+ """Calculate retry delay with jitter."""
510
+ if hasattr(retry_policy, 'calculate_delay'):
511
+ # Use the RetryPolicy's built-in delay calculation
512
+ return retry_policy.calculate_delay(attempt)
513
+
514
+ # Legacy fallback for old-style retry policies
515
+ if retry_policy.strategy in [RetryStrategy.IMMEDIATE, "immediate"]:
516
+ delay = 0.0
517
+ elif retry_policy.strategy in [RetryStrategy.FIXED, RetryStrategy.FIXED_DELAY, "fixed", "fixed_delay"]:
518
+ delay = getattr(retry_policy, 'base_delay', getattr(retry_policy, 'initial_delay', 1.0))
519
+ else: # EXPONENTIAL (default)
520
+ base_delay = getattr(retry_policy, 'base_delay', getattr(retry_policy, 'initial_delay', 1.0))
521
+ delay = base_delay * (2 ** attempt)
522
+
523
+ # Add small random jitter to prevent thundering herd
524
+ jitter = delay * 0.1 * random.random()
525
+ delay += jitter
526
+
527
+ return delay
528
+
529
+ def _format_success_response(
530
+ self,
531
+ result: Any,
532
+ context: Dict[str, Any],
533
+ attempt: int,
534
+ max_attempts: int
535
+ ) -> Dict[str, Any]:
536
+ """Format successful response with tracing metadata (flattened for better DX)."""
537
+ # Build response with framework metadata
538
+ response = {
539
+ 'status': 'success',
540
+ 'agent_id': self.agent_id,
541
+ 'agent_name': self.name,
542
+ 'context': context,
543
+ 'retry_info': {
544
+ 'attempt': attempt,
545
+ 'max_attempts': max_attempts,
546
+ 'retry_enabled': self.config.retry_enabled
547
+ } if self.config.retry_enabled else None,
548
+ 'timestamp': datetime.now(timezone.utc).isoformat()
549
+ }
550
+
551
+ # Flatten execution result into top level for better DX
552
+ # If result is a dict, merge it; otherwise add as 'result' key
553
+ if isinstance(result, dict):
554
+ # Merge execution result at top level (result keys won't overwrite framework keys)
555
+ response.update(result)
556
+ else:
557
+ # Non-dict results stored under 'result' key
558
+ response['result'] = result
559
+
560
+ return response
561
+
562
+ def _format_error_response(
563
+ self,
564
+ error: Exception,
565
+ context: Dict[str, Any],
566
+ attempt: int,
567
+ max_attempts: int
568
+ ) -> Dict[str, Any]:
569
+ """Format error response with tracing metadata."""
570
+ return {
571
+ 'status': 'error',
572
+ 'error': str(error),
573
+ 'error_type': error.__class__.__name__,
574
+ 'agent_id': self.agent_id,
575
+ 'agent_name': self.name,
576
+ 'context': context,
577
+ 'result': None, # Ensure result field exists for relay compatibility
578
+ 'retry_info': {
579
+ 'attempt': attempt,
580
+ 'max_attempts': max_attempts,
581
+ 'retry_enabled': self.config.retry_enabled,
582
+ 'retry_exhausted': attempt >= max_attempts
583
+ } if self.config.retry_enabled else None,
584
+ 'timestamp': datetime.now(timezone.utc).isoformat()
585
+ }
586
+
587
+ @property
588
+ def health(self) -> Dict[str, Any]:
589
+ """Get agent health information from unified tracing system."""
590
+ # Get real-time metrics from trace manager
591
+ metrics = self.trace_manager.get_agent_metrics(self.agent_id)
592
+
593
+ return {
594
+ 'id': self.agent_id,
595
+ 'name': self.name,
596
+ 'type': self.agent_type.value,
597
+ 'running': self._running,
598
+ 'metrics': metrics,
599
+ 'retry_config': {
600
+ 'enabled': self.config.retry_enabled,
601
+ 'max_retries': self.config.retry_policy.max_retries if self.config.retry_enabled else None,
602
+ 'strategy': self.config.retry_policy.strategy.value if self.config.retry_enabled else None,
603
+ },
604
+ 'tracing': {
605
+ 'enabled': True,
606
+ 'trace_manager_available': self.trace_manager is not None
607
+ }
608
+ }
609
+
610
+ @property
611
+ def trace_id(self) -> Optional[str]:
612
+ """Get current trace ID for this agent."""
613
+ return self.trace_manager.trace_context.current_trace_id
614
+
615
+ @property
616
+ def current_span_id(self) -> Optional[str]:
617
+ """Get current span ID for this agent."""
618
+ return self.trace_manager.trace_context.current_span_id
619
+
620
+ def get_recent_operations(self, limit: int = 10) -> List[Dict[str, Any]]:
621
+ """Get recent operations for this agent from unified tracing."""
622
+ return self.trace_manager.get_recent_operations(agent_id=self.agent_id, limit=limit)
623
+
624
+ def get_trace_stats(self) -> Dict[str, Any]:
625
+ """Get comprehensive tracing statistics for this agent."""
626
+ return self.trace_manager.get_agent_metrics(self.agent_id)
627
+
628
+ def get_recent_decisions(self, limit: int = 10) -> List[Dict[str, Any]]:
629
+ """Get recent decision traces for this agent."""
630
+ from ..core.decision_tracing import get_recent_decisions
631
+ return get_recent_decisions(agent_id=self.agent_id, limit=limit)
632
+
633
+ def get_decision_stats(self) -> Dict[str, Any]:
634
+ """Get decision statistics for this agent."""
635
+ from ..core.decision_tracing import get_decision_stats
636
+ return get_decision_stats(agent_id=self.agent_id)
637
+
638
+ # Reliability management methods
639
+
640
+ def enable_reliability_features(
641
+ self,
642
+ max_concurrent_tasks: int = 10,
643
+ max_queue_size: int = 100
644
+ ) -> None:
645
+ """
646
+ Enable reliability features for this agent.
647
+
648
+ Args:
649
+ max_concurrent_tasks: Maximum concurrent tasks
650
+ max_queue_size: Maximum queue size for backpressure control
651
+ """
652
+ if self.enable_reliability:
653
+ logger.warning(f"Reliability already enabled for agent {self.name}")
654
+ return
655
+
656
+ self.enable_reliability = True
657
+ self.task_manager = get_global_task_manager()
658
+ self.backpressure_controller = BackpressureController(
659
+ max_concurrent_tasks=max_concurrent_tasks,
660
+ max_queue_size=max_queue_size,
661
+ agent_id=self.agent_id
662
+ )
663
+
664
+ logger.info(f"Enabled reliability features for agent {self.name}")
665
+
666
+ def disable_reliability_features(self) -> None:
667
+ """Disable reliability features for this agent."""
668
+ self.enable_reliability = False
669
+ self.task_manager = None
670
+ self.backpressure_controller = None
671
+
672
+ logger.info(f"Disabled reliability features for agent {self.name}")
673
+
674
+ async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
675
+ """Get status of a specific task."""
676
+ if not self.task_manager:
677
+ return None
678
+ return await self.task_manager.get_task_status(task_id)
679
+
680
+ async def get_agent_tasks(self, status: Optional[TaskStatus] = None) -> List[Dict[str, Any]]:
681
+ """Get all tasks for this agent, optionally filtered by status."""
682
+ if not self.task_manager:
683
+ return []
684
+
685
+ tasks = await self.task_manager.get_agent_tasks(self.agent_id, status)
686
+ return [
687
+ {
688
+ "id": task.id,
689
+ "status": task.status.value,
690
+ "progress": task.progress,
691
+ "error": task.error,
692
+ "duration": task.duration(),
693
+ "age": task.age(),
694
+ "retry_count": task.retry_count
695
+ }
696
+ for task in tasks
697
+ ]
698
+
699
+ def get_backpressure_stats(self) -> Dict[str, Any]:
700
+ """Get current backpressure statistics."""
701
+ if not self.backpressure_controller:
702
+ return {"enabled": False}
703
+
704
+ stats = self.backpressure_controller.get_stats()
705
+ stats["enabled"] = True
706
+ return stats
707
+
708
+ async def cancel_task(self, task_id: str) -> bool:
709
+ """Cancel a specific task."""
710
+ if not self.task_manager:
711
+ return False
712
+ return await self.task_manager.cancel_task(task_id)
713
+
714
+ # Integration helpers
715
+
716
+ def create_child_agent(self, name: str, config_overrides: Optional[Dict[str, Any]] = None) -> "BaseAgent":
717
+ """Create a child agent that inherits tracing context."""
718
+ # Create new config based on current config
719
+ from ..config.base import AgentConfig
720
+
721
+ child_config = AgentConfig(
722
+ name=name,
723
+ type=self.config.type,
724
+ enable_retry=self.config.enable_retry,
725
+ retry_policy=self.config.retry_policy,
726
+ **(config_overrides or {})
727
+ )
728
+
729
+ # Create child agent
730
+ child = self.__class__(
731
+ config=child_config,
732
+ llm_provider=self.llm,
733
+ name=name
734
+ )
735
+
736
+ logger.debug(f"Created child agent {name} from parent {self.name}")
737
+ return child
738
+
739
+ def __repr__(self) -> str:
740
+ return f"BaseAgent(name='{self.name}', id='{self.agent_id}', running={self._running})"
741
+
742
+ def __str__(self) -> str:
743
+ return f"BaseAgent '{self.name}'"