daita-agents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of daita-agents might be problematic. Click here for more details.

Files changed (69) hide show
  1. daita/__init__.py +208 -0
  2. daita/agents/__init__.py +33 -0
  3. daita/agents/base.py +722 -0
  4. daita/agents/substrate.py +895 -0
  5. daita/cli/__init__.py +145 -0
  6. daita/cli/__main__.py +7 -0
  7. daita/cli/ascii_art.py +44 -0
  8. daita/cli/core/__init__.py +0 -0
  9. daita/cli/core/create.py +254 -0
  10. daita/cli/core/deploy.py +473 -0
  11. daita/cli/core/deployments.py +309 -0
  12. daita/cli/core/import_detector.py +219 -0
  13. daita/cli/core/init.py +382 -0
  14. daita/cli/core/logs.py +239 -0
  15. daita/cli/core/managed_deploy.py +709 -0
  16. daita/cli/core/run.py +648 -0
  17. daita/cli/core/status.py +421 -0
  18. daita/cli/core/test.py +239 -0
  19. daita/cli/core/webhooks.py +172 -0
  20. daita/cli/main.py +588 -0
  21. daita/cli/utils.py +541 -0
  22. daita/config/__init__.py +62 -0
  23. daita/config/base.py +159 -0
  24. daita/config/settings.py +184 -0
  25. daita/core/__init__.py +262 -0
  26. daita/core/decision_tracing.py +701 -0
  27. daita/core/exceptions.py +480 -0
  28. daita/core/focus.py +251 -0
  29. daita/core/interfaces.py +76 -0
  30. daita/core/plugin_tracing.py +550 -0
  31. daita/core/relay.py +695 -0
  32. daita/core/reliability.py +381 -0
  33. daita/core/scaling.py +444 -0
  34. daita/core/tools.py +402 -0
  35. daita/core/tracing.py +770 -0
  36. daita/core/workflow.py +1084 -0
  37. daita/display/__init__.py +1 -0
  38. daita/display/console.py +160 -0
  39. daita/execution/__init__.py +58 -0
  40. daita/execution/client.py +856 -0
  41. daita/execution/exceptions.py +92 -0
  42. daita/execution/models.py +317 -0
  43. daita/llm/__init__.py +60 -0
  44. daita/llm/anthropic.py +166 -0
  45. daita/llm/base.py +373 -0
  46. daita/llm/factory.py +101 -0
  47. daita/llm/gemini.py +152 -0
  48. daita/llm/grok.py +114 -0
  49. daita/llm/mock.py +135 -0
  50. daita/llm/openai.py +109 -0
  51. daita/plugins/__init__.py +141 -0
  52. daita/plugins/base.py +37 -0
  53. daita/plugins/base_db.py +167 -0
  54. daita/plugins/elasticsearch.py +844 -0
  55. daita/plugins/mcp.py +481 -0
  56. daita/plugins/mongodb.py +510 -0
  57. daita/plugins/mysql.py +351 -0
  58. daita/plugins/postgresql.py +331 -0
  59. daita/plugins/redis_messaging.py +500 -0
  60. daita/plugins/rest.py +529 -0
  61. daita/plugins/s3.py +761 -0
  62. daita/plugins/slack.py +729 -0
  63. daita/utils/__init__.py +18 -0
  64. daita_agents-0.1.0.dist-info/METADATA +350 -0
  65. daita_agents-0.1.0.dist-info/RECORD +69 -0
  66. daita_agents-0.1.0.dist-info/WHEEL +5 -0
  67. daita_agents-0.1.0.dist-info/entry_points.txt +2 -0
  68. daita_agents-0.1.0.dist-info/licenses/LICENSE +56 -0
  69. daita_agents-0.1.0.dist-info/top_level.txt +1 -0
daita/agents/base.py ADDED
@@ -0,0 +1,722 @@
1
+ """
2
+ Updated BaseAgent with Unified Tracing Integration
3
+
4
+ This replaces the old BaseAgent to use the new unified tracing system.
5
+ All operations are automatically traced without user configuration.
6
+
7
+ Key Changes:
8
+ - Removed old metrics system completely
9
+ - Integrated automatic tracing for all operations
10
+ - Added decision tracing for retry logic
11
+ - Automatic agent lifecycle tracing
12
+ - Zero configuration required
13
+ """
14
+
15
+ import asyncio
16
+ import logging
17
+ import uuid
18
+ import random
19
+ import time
20
+ from datetime import datetime, timezone
21
+ from typing import Any, Dict, List, Optional, Union
22
+
23
+ from ..config.base import AgentConfig, AgentType, RetryStrategy, RetryPolicy
24
+ from ..core.interfaces import Agent, LLMProvider
25
+ from ..core.exceptions import DaitaError, AgentError, LLMError, BackpressureError, TaskTimeoutError
26
+ from ..core.tracing import get_trace_manager, TraceType, TraceStatus
27
+ from ..core.decision_tracing import record_decision_point, DecisionType
28
+ from ..core.reliability import (
29
+ TaskManager, get_global_task_manager, TaskStatus,
30
+ BackpressureController
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ class BaseAgent(Agent):
36
+ """
37
+ Base implementation for all Daita agents with automatic tracing.
38
+
39
+ Every operation is automatically traced and sent to the dashboard.
40
+ Users don't need to configure anything - tracing just works.
41
+
42
+ Features:
43
+ - Automatic operation tracing
44
+ - Retry decision tracing with confidence scores
45
+ - Agent lifecycle tracing
46
+ - LLM integration with automatic token tracking
47
+ - Performance monitoring
48
+ - Error tracking and correlation
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ config: AgentConfig,
54
+ llm_provider: Optional[LLMProvider] = None,
55
+ agent_id: Optional[str] = None,
56
+ name: Optional[str] = None,
57
+ enable_reliability: bool = False,
58
+ max_concurrent_tasks: int = 10,
59
+ max_queue_size: int = 100,
60
+ ):
61
+ self.config = config
62
+ self.llm = llm_provider
63
+ self.name = name or config.name
64
+ self.agent_type = config.type
65
+ self.enable_reliability = enable_reliability
66
+
67
+ # Generate unique ID
68
+ if agent_id:
69
+ self.agent_id = agent_id
70
+ elif self.name:
71
+ slug = self.name.lower().replace(' ', '_').replace('-', '_')
72
+ self.agent_id = f"{slug}_{uuid.uuid4().hex[:8]}"
73
+ else:
74
+ self.agent_id = f"{self.__class__.__name__}_{uuid.uuid4().hex[:8]}"
75
+
76
+ # Runtime state
77
+ self._running = False
78
+ self._tasks = []
79
+
80
+ # Get trace manager for automatic tracing
81
+ self.trace_manager = get_trace_manager()
82
+
83
+ # Reliability features (enabled when reliability is configured)
84
+ self.task_manager = get_global_task_manager() if enable_reliability else None
85
+ self.backpressure_controller = None
86
+ if enable_reliability:
87
+ self.backpressure_controller = BackpressureController(
88
+ max_concurrent_tasks=max_concurrent_tasks,
89
+ max_queue_size=max_queue_size,
90
+ agent_id=self.agent_id
91
+ )
92
+
93
+ # Set agent ID in LLM provider for automatic LLM tracing
94
+ if self.llm:
95
+ self.llm.set_agent_id(self.agent_id)
96
+
97
+ logger.debug(f"Agent {self.name} ({self.agent_id}) initialized with automatic tracing")
98
+
99
+ async def start(self) -> None:
100
+ """Start the agent with automatic lifecycle tracing."""
101
+ if self._running:
102
+ return
103
+
104
+ # Start decision display if enabled
105
+ if hasattr(self, '_decision_display') and self._decision_display:
106
+ self._decision_display.start()
107
+
108
+ # Automatically trace agent lifecycle
109
+ async with self.trace_manager.span(
110
+ operation_name="agent_start",
111
+ trace_type=TraceType.AGENT_LIFECYCLE,
112
+ agent_id=self.agent_id,
113
+ agent_name=self.name,
114
+ agent_type=self.agent_type.value,
115
+ retry_enabled=str(self.config.retry_enabled)
116
+ ):
117
+ self._running = True
118
+ logger.info(f"Agent {self.name} started")
119
+
120
+ async def stop(self) -> None:
121
+ """Stop the agent with automatic lifecycle tracing."""
122
+ if not self._running:
123
+ return
124
+
125
+ # Stop decision display if enabled
126
+ if hasattr(self, '_decision_display') and self._decision_display:
127
+ self._decision_display.stop()
128
+ # Cleanup decision streaming registration
129
+ try:
130
+ from ..core.decision_tracing import unregister_agent_decision_stream
131
+ unregister_agent_decision_stream(
132
+ agent_id=self.agent_id,
133
+ callback=self._decision_display.handle_event
134
+ )
135
+ except Exception as e:
136
+ logger.debug(f"Failed to cleanup decision display: {e}")
137
+
138
+ # Automatically trace agent lifecycle
139
+ async with self.trace_manager.span(
140
+ operation_name="agent_stop",
141
+ trace_type=TraceType.AGENT_LIFECYCLE,
142
+ agent_id=self.agent_id,
143
+ agent_name=self.name,
144
+ tasks_completed=str(len(self._tasks))
145
+ ):
146
+ # Cancel running tasks
147
+ for task in self._tasks:
148
+ if not task.done():
149
+ task.cancel()
150
+
151
+ if self._tasks:
152
+ await asyncio.gather(*self._tasks, return_exceptions=True)
153
+ self._tasks.clear()
154
+
155
+ self._running = False
156
+ logger.info(f"Agent {self.name} stopped")
157
+
158
+ async def process(
159
+ self,
160
+ task: str,
161
+ data: Any = None,
162
+ context: Optional[Dict[str, Any]] = None,
163
+ **kwargs
164
+ ) -> Dict[str, Any]:
165
+ """
166
+ Process a task with optional reliability features and automatic tracing.
167
+
168
+ Every call to process() is automatically traced with:
169
+ - Input/output data
170
+ - Performance metrics
171
+ - Error tracking
172
+ - Retry attempts and decisions
173
+ - LLM calls (if any)
174
+ - Task lifecycle (if reliability enabled)
175
+
176
+ Args:
177
+ task: Task to execute
178
+ data: Input data
179
+ context: Additional context
180
+ **kwargs: Additional arguments
181
+
182
+ Returns:
183
+ Task results with automatic tracing metadata
184
+ """
185
+ # Build full context
186
+ full_context = {
187
+ 'agent_id': self.agent_id,
188
+ 'agent_name': self.name,
189
+ 'agent_type': self.agent_type.value,
190
+ 'task': task,
191
+ 'retry_enabled': self.config.retry_enabled,
192
+ 'reliability_enabled': self.enable_reliability,
193
+ **(context or {}),
194
+ **kwargs
195
+ }
196
+
197
+ # Handle reliability features if enabled
198
+ if self.enable_reliability:
199
+ return await self._process_with_reliability(task, data, full_context)
200
+ else:
201
+ return await self._process_without_reliability(task, data, full_context)
202
+
203
+ async def _process_with_reliability(
204
+ self,
205
+ task: str,
206
+ data: Any,
207
+ context: Dict[str, Any]
208
+ ) -> Dict[str, Any]:
209
+ """Process task with full reliability features."""
210
+ # Check backpressure first
211
+ if self.backpressure_controller and not await self.backpressure_controller.acquire_processing_slot():
212
+ raise BackpressureError(
213
+ "Unable to acquire processing slot",
214
+ agent_id=self.agent_id,
215
+ queue_size=self.backpressure_controller.task_queue.qsize()
216
+ )
217
+
218
+ # Create task in task manager
219
+ task_id = None
220
+ if self.task_manager:
221
+ task_id = await self.task_manager.create_task(
222
+ agent_id=self.agent_id,
223
+ task_type=task,
224
+ data=data,
225
+ context=context
226
+ )
227
+ context['task_id'] = task_id
228
+ # Update task status to running
229
+ await self.task_manager.update_status(task_id, TaskStatus.RUNNING)
230
+
231
+ try:
232
+ # Automatically trace the entire operation
233
+ async with self.trace_manager.span(
234
+ operation_name=f"agent_process_{task}",
235
+ trace_type=TraceType.AGENT_EXECUTION,
236
+ agent_id=self.agent_id,
237
+ input_data=data,
238
+ agent_name=self.name,
239
+ task=task,
240
+ task_id=task_id,
241
+ retry_enabled=str(self.config.retry_enabled),
242
+ reliability_enabled="true"
243
+ ) as span_id:
244
+
245
+ # Execute with or without retry logic
246
+ if self.config.retry_enabled:
247
+ result = await self._process_with_retry(span_id, task, data, context)
248
+ else:
249
+ result = await self._process_fail_fast(span_id, task, data, context)
250
+
251
+ # Update task status to completed
252
+ if task_id and self.task_manager:
253
+ await self.task_manager.update_status(task_id, TaskStatus.COMPLETED)
254
+
255
+ return result
256
+
257
+ except Exception as e:
258
+ # Update task status to failed
259
+ if task_id and self.task_manager:
260
+ await self.task_manager.update_status(task_id, TaskStatus.FAILED, error=str(e))
261
+ raise
262
+
263
+ finally:
264
+ # Always release the processing slot
265
+ if self.backpressure_controller:
266
+ self.backpressure_controller.release_processing_slot()
267
+
268
+ async def _process_without_reliability(
269
+ self,
270
+ task: str,
271
+ data: Any,
272
+ context: Dict[str, Any]
273
+ ) -> Dict[str, Any]:
274
+ """Process task without reliability features (original behavior)."""
275
+ # Automatically trace the entire operation
276
+ async with self.trace_manager.span(
277
+ operation_name=f"agent_process_{task}",
278
+ trace_type=TraceType.AGENT_EXECUTION,
279
+ agent_id=self.agent_id,
280
+ input_data=data,
281
+ agent_name=self.name,
282
+ task=task,
283
+ retry_enabled=str(self.config.retry_enabled),
284
+ reliability_enabled="false"
285
+ ) as span_id:
286
+
287
+ # Execute with or without retry logic
288
+ if self.config.retry_enabled:
289
+ result = await self._process_with_retry(span_id, task, data, context)
290
+ else:
291
+ result = await self._process_fail_fast(span_id, task, data, context)
292
+
293
+ return result
294
+
295
+ async def _process_with_retry(
296
+ self,
297
+ parent_span_id: str,
298
+ task: str,
299
+ data: Any,
300
+ context: Dict[str, Any]
301
+ ) -> Dict[str, Any]:
302
+ """Process task with retry logic and automatic retry decision tracing."""
303
+ retry_policy = self.config.retry_policy
304
+ max_attempts = retry_policy.max_retries + 1
305
+ last_exception = None
306
+
307
+ for attempt in range(1, max_attempts + 1):
308
+ # Create a child span for each retry attempt
309
+ async with self.trace_manager.span(
310
+ operation_name=f"retry_attempt_{attempt}",
311
+ trace_type=TraceType.AGENT_EXECUTION,
312
+ agent_id=self.agent_id,
313
+ parent_span_id=parent_span_id,
314
+ attempt=str(attempt),
315
+ max_attempts=str(max_attempts),
316
+ is_retry=str(attempt > 1)
317
+ ) as attempt_span_id:
318
+
319
+ try:
320
+ # Add attempt info to context
321
+ attempt_context = {
322
+ **context,
323
+ 'attempt_number': attempt,
324
+ 'max_attempts': max_attempts,
325
+ 'is_retry': attempt > 1
326
+ }
327
+
328
+ # Execute the task
329
+ result = await self._process_once(task, data, attempt_context, attempt, max_attempts)
330
+
331
+ # Success!
332
+ if attempt > 1:
333
+ logger.info(f"Agent {self.name} succeeded on attempt {attempt}")
334
+
335
+ return self._format_success_response(result, attempt_context, attempt, max_attempts)
336
+
337
+ except Exception as e:
338
+ last_exception = e
339
+
340
+ # Should we retry? Use decision tracing to record the retry decision
341
+ if attempt < max_attempts:
342
+ should_retry = await self._should_retry_error_with_tracing(
343
+ e, attempt, max_attempts, attempt_span_id
344
+ )
345
+
346
+ if should_retry:
347
+ # Calculate delay and wait
348
+ delay = self._calculate_retry_delay(attempt - 1, retry_policy)
349
+ logger.debug(f"Agent {self.name} retrying in {delay:.2f}s")
350
+ await asyncio.sleep(delay)
351
+ continue
352
+
353
+ # Don't retry or no more attempts
354
+ logger.debug(f"Agent {self.name} not retrying: {type(e).__name__}")
355
+ return self._format_error_response(last_exception, context, attempt, max_attempts)
356
+
357
+ # All attempts exhausted
358
+ return self._format_error_response(
359
+ last_exception or Exception("Unknown error"), context, max_attempts, max_attempts
360
+ )
361
+
362
+ async def _process_fail_fast(
363
+ self,
364
+ span_id: str,
365
+ task: str,
366
+ data: Any,
367
+ context: Dict[str, Any]
368
+ ) -> Dict[str, Any]:
369
+ """Process task in fail-fast mode with error tracing."""
370
+ try:
371
+ result = await self._process_once(task, data, context, attempt=1, max_attempts=1)
372
+ return self._format_success_response(result, context, 1, 1)
373
+ except Exception as e:
374
+ logger.error(f"Error in agent {self.name} (fail-fast mode): {str(e)}")
375
+ return self._format_error_response(e, context, 1, 1)
376
+
377
+ async def _process_once(
378
+ self,
379
+ task: str,
380
+ data: Any,
381
+ context: Dict[str, Any],
382
+ attempt: int,
383
+ max_attempts: int
384
+ ) -> Dict[str, Any]:
385
+ """
386
+ Execute the task once without retry logic.
387
+
388
+ Subclasses should override this method for their specific behavior.
389
+ The automatic tracing happens at higher levels.
390
+ """
391
+ # Default implementation for base agent
392
+ return {
393
+ 'message': f'Agent {self.name} processed task "{task}"',
394
+ 'task': task,
395
+ 'data': data,
396
+ 'agent_id': self.agent_id,
397
+ 'agent_name': self.name,
398
+ 'attempt': attempt,
399
+ 'timestamp': datetime.now(timezone.utc).isoformat()
400
+ }
401
+
402
+ async def _should_retry_error_with_tracing(
403
+ self,
404
+ error: Exception,
405
+ attempt: int,
406
+ max_attempts: int,
407
+ span_id: str
408
+ ) -> bool:
409
+ """
410
+ Determine if an error should be retried with decision tracing.
411
+
412
+ This traces the retry decision-making process including confidence
413
+ scores and reasoning for better observability.
414
+ """
415
+ # Use decision tracing to record retry logic
416
+ async with record_decision_point("retry_decision", DecisionType.VALIDATION, self.agent_id) as decision:
417
+
418
+ # Import here to avoid circular imports
419
+ from ..core.exceptions import (
420
+ TransientError, RetryableError, PermanentError,
421
+ classify_exception
422
+ )
423
+
424
+ # Classify the error
425
+ error_class = classify_exception(error)
426
+ error_type = type(error).__name__
427
+
428
+ # Decision logic with reasoning
429
+ reasoning = []
430
+ should_retry = False
431
+ confidence = 0.0
432
+
433
+ # Check attempt limit
434
+ if attempt >= max_attempts:
435
+ reasoning.append(f"Max attempts reached ({attempt}/{max_attempts})")
436
+ should_retry = False
437
+ confidence = 1.0 # Certain we shouldn't retry
438
+
439
+ # Error classification logic
440
+ elif error_class == "transient":
441
+ reasoning.append(f"Transient error detected: {error_type}")
442
+ reasoning.append("Transient errors are typically safe to retry")
443
+ should_retry = True
444
+ confidence = 0.9
445
+
446
+ elif error_class == "retryable":
447
+ reasoning.append(f"Retryable error detected: {error_type}")
448
+ reasoning.append("Error may resolve on retry")
449
+ should_retry = True
450
+ confidence = 0.7
451
+
452
+ elif error_class == "permanent":
453
+ reasoning.append(f"Permanent error detected: {error_type}")
454
+ reasoning.append("Permanent errors should not be retried")
455
+ should_retry = False
456
+ confidence = 0.95
457
+
458
+ else:
459
+ # Unknown error - use heuristics
460
+ reasoning.append(f"Unknown error type: {error_type}")
461
+
462
+ if isinstance(error, (ValueError, TypeError, KeyError)):
463
+ reasoning.append("Logic/data error - likely permanent")
464
+ should_retry = False
465
+ confidence = 0.8
466
+ else:
467
+ reasoning.append("Unknown error - defaulting to retry")
468
+ should_retry = True
469
+ confidence = 0.5
470
+
471
+ # Record the decision
472
+ decision.set_confidence(confidence)
473
+ for reason in reasoning:
474
+ decision.add_reasoning(reason)
475
+
476
+ decision.set_factor("error_type", error_type)
477
+ decision.set_factor("error_class", error_class)
478
+ decision.set_factor("attempt", attempt)
479
+ decision.set_factor("max_attempts", max_attempts)
480
+
481
+ # Add alternatives considered
482
+ decision.add_alternative("retry" if not should_retry else "fail")
483
+
484
+ logger.debug(f"Retry decision for {error_type}: {should_retry} (confidence: {confidence:.2f})")
485
+ return should_retry
486
+
487
+ def _calculate_retry_delay(self, attempt: int, retry_policy) -> float:
488
+ """Calculate retry delay with jitter."""
489
+ if hasattr(retry_policy, 'calculate_delay'):
490
+ # Use the RetryPolicy's built-in delay calculation
491
+ return retry_policy.calculate_delay(attempt)
492
+
493
+ # Legacy fallback for old-style retry policies
494
+ if retry_policy.strategy in [RetryStrategy.IMMEDIATE, "immediate"]:
495
+ delay = 0.0
496
+ elif retry_policy.strategy in [RetryStrategy.FIXED, RetryStrategy.FIXED_DELAY, "fixed", "fixed_delay"]:
497
+ delay = getattr(retry_policy, 'base_delay', getattr(retry_policy, 'initial_delay', 1.0))
498
+ else: # EXPONENTIAL (default)
499
+ base_delay = getattr(retry_policy, 'base_delay', getattr(retry_policy, 'initial_delay', 1.0))
500
+ delay = base_delay * (2 ** attempt)
501
+
502
+ # Add small random jitter to prevent thundering herd
503
+ jitter = delay * 0.1 * random.random()
504
+ delay += jitter
505
+
506
+ return delay
507
+
508
+ def _format_success_response(
509
+ self,
510
+ result: Any,
511
+ context: Dict[str, Any],
512
+ attempt: int,
513
+ max_attempts: int
514
+ ) -> Dict[str, Any]:
515
+ """Format successful response with tracing metadata (flattened for better DX)."""
516
+ # Build response with framework metadata
517
+ response = {
518
+ 'status': 'success',
519
+ 'agent_id': self.agent_id,
520
+ 'agent_name': self.name,
521
+ 'context': context,
522
+ 'retry_info': {
523
+ 'attempt': attempt,
524
+ 'max_attempts': max_attempts,
525
+ 'retry_enabled': self.config.retry_enabled
526
+ } if self.config.retry_enabled else None,
527
+ 'timestamp': datetime.now(timezone.utc).isoformat()
528
+ }
529
+
530
+ # Flatten handler result into top level for better DX
531
+ # If result is a dict, merge it; otherwise add as 'result' key
532
+ if isinstance(result, dict):
533
+ # Merge handler result at top level (handler keys won't overwrite framework keys)
534
+ response.update(result)
535
+ else:
536
+ # Non-dict results stored under 'result' key
537
+ response['result'] = result
538
+
539
+ return response
540
+
541
+ def _format_error_response(
542
+ self,
543
+ error: Exception,
544
+ context: Dict[str, Any],
545
+ attempt: int,
546
+ max_attempts: int
547
+ ) -> Dict[str, Any]:
548
+ """Format error response with tracing metadata."""
549
+ return {
550
+ 'status': 'error',
551
+ 'error': str(error),
552
+ 'error_type': error.__class__.__name__,
553
+ 'agent_id': self.agent_id,
554
+ 'agent_name': self.name,
555
+ 'context': context,
556
+ 'result': None, # Ensure result field exists for relay compatibility
557
+ 'retry_info': {
558
+ 'attempt': attempt,
559
+ 'max_attempts': max_attempts,
560
+ 'retry_enabled': self.config.retry_enabled,
561
+ 'retry_exhausted': attempt >= max_attempts
562
+ } if self.config.retry_enabled else None,
563
+ 'timestamp': datetime.now(timezone.utc).isoformat()
564
+ }
565
+
566
+ @property
567
+ def health(self) -> Dict[str, Any]:
568
+ """Get agent health information from unified tracing system."""
569
+ # Get real-time metrics from trace manager
570
+ metrics = self.trace_manager.get_agent_metrics(self.agent_id)
571
+
572
+ return {
573
+ 'id': self.agent_id,
574
+ 'name': self.name,
575
+ 'type': self.agent_type.value,
576
+ 'running': self._running,
577
+ 'metrics': metrics,
578
+ 'retry_config': {
579
+ 'enabled': self.config.retry_enabled,
580
+ 'max_retries': self.config.retry_policy.max_retries if self.config.retry_enabled else None,
581
+ 'strategy': self.config.retry_policy.strategy.value if self.config.retry_enabled else None,
582
+ },
583
+ 'tracing': {
584
+ 'enabled': True,
585
+ 'trace_manager_available': self.trace_manager is not None
586
+ }
587
+ }
588
+
589
+ @property
590
+ def trace_id(self) -> Optional[str]:
591
+ """Get current trace ID for this agent."""
592
+ return self.trace_manager.trace_context.current_trace_id
593
+
594
+ @property
595
+ def current_span_id(self) -> Optional[str]:
596
+ """Get current span ID for this agent."""
597
+ return self.trace_manager.trace_context.current_span_id
598
+
599
+ def get_recent_operations(self, limit: int = 10) -> List[Dict[str, Any]]:
600
+ """Get recent operations for this agent from unified tracing."""
601
+ return self.trace_manager.get_recent_operations(agent_id=self.agent_id, limit=limit)
602
+
603
+ def get_trace_stats(self) -> Dict[str, Any]:
604
+ """Get comprehensive tracing statistics for this agent."""
605
+ return self.trace_manager.get_agent_metrics(self.agent_id)
606
+
607
+ def get_recent_decisions(self, limit: int = 10) -> List[Dict[str, Any]]:
608
+ """Get recent decision traces for this agent."""
609
+ from ..core.decision_tracing import get_recent_decisions
610
+ return get_recent_decisions(agent_id=self.agent_id, limit=limit)
611
+
612
+ def get_decision_stats(self) -> Dict[str, Any]:
613
+ """Get decision statistics for this agent."""
614
+ from ..core.decision_tracing import get_decision_stats
615
+ return get_decision_stats(agent_id=self.agent_id)
616
+
617
+ # Reliability management methods
618
+
619
+ def enable_reliability_features(
620
+ self,
621
+ max_concurrent_tasks: int = 10,
622
+ max_queue_size: int = 100
623
+ ) -> None:
624
+ """
625
+ Enable reliability features for this agent.
626
+
627
+ Args:
628
+ max_concurrent_tasks: Maximum concurrent tasks
629
+ max_queue_size: Maximum queue size for backpressure control
630
+ """
631
+ if self.enable_reliability:
632
+ logger.warning(f"Reliability already enabled for agent {self.name}")
633
+ return
634
+
635
+ self.enable_reliability = True
636
+ self.task_manager = get_global_task_manager()
637
+ self.backpressure_controller = BackpressureController(
638
+ max_concurrent_tasks=max_concurrent_tasks,
639
+ max_queue_size=max_queue_size,
640
+ agent_id=self.agent_id
641
+ )
642
+
643
+ logger.info(f"Enabled reliability features for agent {self.name}")
644
+
645
+ def disable_reliability_features(self) -> None:
646
+ """Disable reliability features for this agent."""
647
+ self.enable_reliability = False
648
+ self.task_manager = None
649
+ self.backpressure_controller = None
650
+
651
+ logger.info(f"Disabled reliability features for agent {self.name}")
652
+
653
+ async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
654
+ """Get status of a specific task."""
655
+ if not self.task_manager:
656
+ return None
657
+ return await self.task_manager.get_task_status(task_id)
658
+
659
+ async def get_agent_tasks(self, status: Optional[TaskStatus] = None) -> List[Dict[str, Any]]:
660
+ """Get all tasks for this agent, optionally filtered by status."""
661
+ if not self.task_manager:
662
+ return []
663
+
664
+ tasks = await self.task_manager.get_agent_tasks(self.agent_id, status)
665
+ return [
666
+ {
667
+ "id": task.id,
668
+ "status": task.status.value,
669
+ "progress": task.progress,
670
+ "error": task.error,
671
+ "duration": task.duration(),
672
+ "age": task.age(),
673
+ "retry_count": task.retry_count
674
+ }
675
+ for task in tasks
676
+ ]
677
+
678
+ def get_backpressure_stats(self) -> Dict[str, Any]:
679
+ """Get current backpressure statistics."""
680
+ if not self.backpressure_controller:
681
+ return {"enabled": False}
682
+
683
+ stats = self.backpressure_controller.get_stats()
684
+ stats["enabled"] = True
685
+ return stats
686
+
687
+ async def cancel_task(self, task_id: str) -> bool:
688
+ """Cancel a specific task."""
689
+ if not self.task_manager:
690
+ return False
691
+ return await self.task_manager.cancel_task(task_id)
692
+
693
+ # Integration helpers
694
+
695
+ def create_child_agent(self, name: str, config_overrides: Optional[Dict[str, Any]] = None) -> "BaseAgent":
696
+ """Create a child agent that inherits tracing context."""
697
+ # Create new config based on current config
698
+ from ..config.base import AgentConfig
699
+
700
+ child_config = AgentConfig(
701
+ name=name,
702
+ type=self.config.type,
703
+ enable_retry=self.config.enable_retry,
704
+ retry_policy=self.config.retry_policy,
705
+ **(config_overrides or {})
706
+ )
707
+
708
+ # Create child agent
709
+ child = self.__class__(
710
+ config=child_config,
711
+ llm_provider=self.llm,
712
+ name=name
713
+ )
714
+
715
+ logger.debug(f"Created child agent {name} from parent {self.name}")
716
+ return child
717
+
718
+ def __repr__(self) -> str:
719
+ return f"BaseAgent(name='{self.name}', id='{self.agent_id}', running={self._running})"
720
+
721
+ def __str__(self) -> str:
722
+ return f"BaseAgent '{self.name}'"