daita-agents 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. daita/__init__.py +216 -0
  2. daita/agents/__init__.py +33 -0
  3. daita/agents/base.py +743 -0
  4. daita/agents/substrate.py +1141 -0
  5. daita/cli/__init__.py +145 -0
  6. daita/cli/__main__.py +7 -0
  7. daita/cli/ascii_art.py +44 -0
  8. daita/cli/core/__init__.py +0 -0
  9. daita/cli/core/create.py +254 -0
  10. daita/cli/core/deploy.py +473 -0
  11. daita/cli/core/deployments.py +309 -0
  12. daita/cli/core/import_detector.py +219 -0
  13. daita/cli/core/init.py +481 -0
  14. daita/cli/core/logs.py +239 -0
  15. daita/cli/core/managed_deploy.py +709 -0
  16. daita/cli/core/run.py +648 -0
  17. daita/cli/core/status.py +421 -0
  18. daita/cli/core/test.py +239 -0
  19. daita/cli/core/webhooks.py +172 -0
  20. daita/cli/main.py +588 -0
  21. daita/cli/utils.py +541 -0
  22. daita/config/__init__.py +62 -0
  23. daita/config/base.py +159 -0
  24. daita/config/settings.py +184 -0
  25. daita/core/__init__.py +262 -0
  26. daita/core/decision_tracing.py +701 -0
  27. daita/core/exceptions.py +480 -0
  28. daita/core/focus.py +251 -0
  29. daita/core/interfaces.py +76 -0
  30. daita/core/plugin_tracing.py +550 -0
  31. daita/core/relay.py +779 -0
  32. daita/core/reliability.py +381 -0
  33. daita/core/scaling.py +459 -0
  34. daita/core/tools.py +554 -0
  35. daita/core/tracing.py +770 -0
  36. daita/core/workflow.py +1144 -0
  37. daita/display/__init__.py +1 -0
  38. daita/display/console.py +160 -0
  39. daita/execution/__init__.py +58 -0
  40. daita/execution/client.py +856 -0
  41. daita/execution/exceptions.py +92 -0
  42. daita/execution/models.py +317 -0
  43. daita/llm/__init__.py +60 -0
  44. daita/llm/anthropic.py +291 -0
  45. daita/llm/base.py +530 -0
  46. daita/llm/factory.py +101 -0
  47. daita/llm/gemini.py +355 -0
  48. daita/llm/grok.py +219 -0
  49. daita/llm/mock.py +172 -0
  50. daita/llm/openai.py +220 -0
  51. daita/plugins/__init__.py +141 -0
  52. daita/plugins/base.py +37 -0
  53. daita/plugins/base_db.py +167 -0
  54. daita/plugins/elasticsearch.py +849 -0
  55. daita/plugins/mcp.py +481 -0
  56. daita/plugins/mongodb.py +520 -0
  57. daita/plugins/mysql.py +362 -0
  58. daita/plugins/postgresql.py +342 -0
  59. daita/plugins/redis_messaging.py +500 -0
  60. daita/plugins/rest.py +537 -0
  61. daita/plugins/s3.py +770 -0
  62. daita/plugins/slack.py +729 -0
  63. daita/utils/__init__.py +18 -0
  64. daita_agents-0.2.0.dist-info/METADATA +409 -0
  65. daita_agents-0.2.0.dist-info/RECORD +69 -0
  66. daita_agents-0.2.0.dist-info/WHEEL +5 -0
  67. daita_agents-0.2.0.dist-info/entry_points.txt +2 -0
  68. daita_agents-0.2.0.dist-info/licenses/LICENSE +56 -0
  69. daita_agents-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1141 @@
1
+ """
2
+ Substrate Agent - The foundational agent for Daita Agents.
3
+
4
+ This agent provides a blank slate that users can build upon to create
5
+ custom agents for any task, with simplified error handling and retry capabilities.
6
+ All operations are automatically traced without any configuration required.
7
+ """
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ from datetime import datetime
12
+ from typing import Dict, Any, Optional, List, Union, Callable
13
+
14
+ from ..config.base import AgentConfig, AgentType
15
+ from ..core.interfaces import LLMProvider
16
+ from ..core.exceptions import (
17
+ DaitaError, AgentError, LLMError, PluginError,
18
+ ValidationError, InvalidDataError, NotFoundError
19
+ )
20
+ from ..core.tracing import TraceStatus
21
+ from .base import BaseAgent
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Import unified plugin access
26
+ from ..plugins import PluginAccess
27
+ from ..llm.factory import create_llm_provider
28
+ from ..config.settings import settings
29
+ from ..core.tools import AgentTool, ToolRegistry
30
+
31
+
32
+ class FocusedTool:
33
+ """
34
+ Wrapper that applies focus filtering to tool results.
35
+
36
+ This reduces token usage and latency by filtering tool outputs
37
+ BEFORE they reach the LLM. This is critical for DAITA's data
38
+ operations efficiency.
39
+
40
+ Example:
41
+ Original tool returns 10KB of data -> Focus filters to 1KB
42
+ -> LLM only processes 1KB (90% token reduction!)
43
+ """
44
+
45
+ def __init__(self, tool: AgentTool, focus_config):
46
+ """
47
+ Wrap a tool with focus filtering.
48
+
49
+ Args:
50
+ tool: The original AgentTool to wrap
51
+ focus_config: Focus configuration (JSONPath, columns, etc.)
52
+ """
53
+ self._tool = tool
54
+ self._focus = focus_config
55
+
56
+ async def handler(self, arguments: Dict[str, Any]) -> Any:
57
+ """
58
+ Execute tool handler and apply focus to result.
59
+
60
+ This is where the magic happens - tool results are filtered
61
+ before being sent to the LLM, reducing tokens and latency.
62
+
63
+ The LLM layer calls tool.handler() to execute tools.
64
+ """
65
+ # Execute original tool handler
66
+ result = await self._tool.handler(arguments)
67
+
68
+ # Apply focus to result (if applicable)
69
+ if self._focus and result is not None:
70
+ try:
71
+ from ..core.focus import apply_focus
72
+ from ..config.base import FocusConfig
73
+
74
+ # Convert FocusConfig to format apply_focus expects
75
+ focus_param = self._focus
76
+ if isinstance(self._focus, FocusConfig):
77
+ # Convert FocusConfig to dict/str/list format
78
+ if self._focus.type == "column":
79
+ focus_param = self._focus.columns or []
80
+ elif self._focus.type == "jsonpath":
81
+ focus_param = self._focus.path
82
+ elif self._focus.type == "xpath":
83
+ focus_param = self._focus.path
84
+ elif self._focus.type == "css":
85
+ focus_param = self._focus.selector
86
+ elif self._focus.type == "regex":
87
+ focus_param = self._focus.pattern
88
+ else:
89
+ # For other types, convert to dict
90
+ focus_param = self._focus.dict()
91
+
92
+ focused_result = apply_focus(result, focus_param)
93
+ logger.debug(
94
+ f"Applied focus to {self.name} result: "
95
+ f"{type(result).__name__} -> {type(focused_result).__name__}"
96
+ )
97
+ return focused_result
98
+ except Exception as e:
99
+ logger.warning(f"Focus application failed for {self.name}: {e}")
100
+ # Return original result if focus fails
101
+ return result
102
+
103
+ return result
104
+
105
+ def __getattr__(self, name):
106
+ """Delegate all other attributes to the wrapped tool."""
107
+ return getattr(self._tool, name)
108
+
109
+ def __repr__(self):
110
+ return f"FocusedTool({self._tool.name}, focus={self._focus})"
111
+
112
+ class SubstrateAgent(BaseAgent):
113
+ """
114
+ Substrate Agent - DAITA's primary agent implementation.
115
+
116
+ A flexible, tool-enabled agent for data operations with autonomous
117
+ LLM-driven task execution.
118
+
119
+ ## Quick Start
120
+
121
+ ```python
122
+ from daita import SubstrateAgent
123
+ from daita.core.tools import tool
124
+
125
+ # Define tools for your agent
126
+ @tool
127
+ async def query_database(sql: str) -> list:
128
+ '''Execute SQL query and return results.'''
129
+ return await db.execute(sql)
130
+
131
+ # Create agent with tools
132
+ agent = SubstrateAgent(
133
+ name="Data Analyst",
134
+ model="gpt-4o-mini",
135
+ prompt="You are a data analyst. Help users query and analyze data."
136
+ )
137
+ agent.register_tool(query_database)
138
+
139
+ # Use the clean API
140
+ await agent.start()
141
+
142
+ # Simple execution - just get the answer
143
+ answer = await agent.run("What were total sales last month?")
144
+ print(answer)
145
+
146
+ # Detailed execution - get full metadata
147
+ result = await agent.run_detailed("Show me top 10 customers")
148
+ print(f"Answer: {result['result']}")
149
+ print(f"Time: {result['processing_time_ms']}ms")
150
+ print(f"Cost: ${result['cost']}")
151
+ ```
152
+
153
+ ## Architecture
154
+
155
+ SubstrateAgent uses autonomous tool calling:
156
+ 1. You give the agent tools and a natural language instruction
157
+ 2. The LLM autonomously decides which tools to use and when
158
+ 3. Tools are executed and results fed back to the LLM
159
+ 4. The LLM produces a final answer
160
+
161
+ This is the modern agent paradigm - autonomous, tool-driven execution.
162
+
163
+ ## Extending with Tools
164
+
165
+ Tools are the primary way to extend agent capabilities:
166
+
167
+ ```python
168
+ from daita.core.tools import tool
169
+
170
+ @tool
171
+ async def calculate_metrics(data: list) -> dict:
172
+ '''Calculate statistical metrics for data.'''
173
+ return {
174
+ 'mean': sum(data) / len(data),
175
+ 'max': max(data),
176
+ 'min': min(data)
177
+ }
178
+
179
+ agent.register_tool(calculate_metrics)
180
+ ```
181
+
182
+ ## Focus System (Data Filtering)
183
+
184
+ DAITA's unique focus system filters tool results BEFORE they reach the LLM,
185
+ reducing token usage and latency:
186
+
187
+ ```python
188
+ from daita.config.base import FocusConfig
189
+
190
+ agent = SubstrateAgent(
191
+ name="Sales Analyzer",
192
+ focus=FocusConfig(
193
+ type="jsonpath",
194
+ path="$.sales[*].amount" # Only extract amounts
195
+ )
196
+ )
197
+ ```
198
+
199
+ Focus types: jsonpath, column, xpath, css, regex
200
+
201
+ ## System Integration
202
+
203
+ SubstrateAgent integrates with workflows, webhooks, and schedules:
204
+
205
+ - `receive_message()` - Handle workflow communication
206
+ - `on_webhook()` - Handle webhook triggers
207
+ - `on_schedule()` - Handle scheduled tasks
208
+
209
+ These are called automatically by the DAITA infrastructure.
210
+ """
211
+
212
+ # Class-level defaults for smart constructor
213
+ _default_llm_provider = "openai"
214
+ _default_model = "gpt-4"
215
+
216
+ @classmethod
217
+ def configure_defaults(cls, **kwargs):
218
+ """Set global defaults for all SubstrateAgent instances."""
219
+ for key, value in kwargs.items():
220
+ setattr(cls, f'_default_{key}', value)
221
+
222
+ def __new__(cls, name=None, **kwargs):
223
+ """Smart constructor with auto-configuration."""
224
+ # Auto-configuration from environment and defaults
225
+ if not kwargs.get('llm_provider'):
226
+ kwargs['llm_provider'] = getattr(cls, '_default_llm_provider', 'openai')
227
+ if not kwargs.get('model'):
228
+ kwargs['model'] = getattr(cls, '_default_model', 'gpt-4')
229
+ if not kwargs.get('api_key'):
230
+ provider = kwargs.get('llm_provider', 'openai')
231
+ # Only try to get API key if provider is a string (not an object)
232
+ if isinstance(provider, str):
233
+ kwargs['api_key'] = settings.get_llm_api_key(provider)
234
+
235
+ return super().__new__(cls)
236
+
237
+ def __init__(
238
+ self,
239
+ name: Optional[str] = None,
240
+ llm_provider: Optional[Union[str, LLMProvider]] = None,
241
+ model: Optional[str] = None,
242
+ api_key: Optional[str] = None,
243
+ config: Optional[AgentConfig] = None,
244
+ agent_id: Optional[str] = None,
245
+ prompt: Optional[Union[str, Dict[str, str]]] = None,
246
+ focus: Optional[Union[List[str], str, Dict[str, Any]]] = None,
247
+ relay: Optional[str] = None,
248
+ mcp: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
249
+ display_reasoning: bool = False,
250
+ **kwargs
251
+ ):
252
+ """
253
+ Initialize the Substrate Agent with smart constructor pattern.
254
+
255
+ This constructor auto-creates LLM providers and provides sensible
256
+ defaults while preserving all functionality.
257
+
258
+ Args:
259
+ name: Agent name (required for direct instantiation)
260
+ llm_provider: LLM provider name ("openai", "anthropic") or instance
261
+ model: Model name ("gpt-4", "claude-3-sonnet-20240229")
262
+ api_key: API key for LLM provider (auto-detected from env if not provided)
263
+ config: Agent configuration (auto-generated if not provided)
264
+ agent_id: Unique identifier for the agent
265
+ prompt: Custom prompt or prompt templates
266
+ focus: Default focus configuration for data processing
267
+ relay: Name of relay channel for publishing results
268
+ mcp: MCP server(s) for tool integration - single dict or list of dicts
269
+ display_reasoning: Enable minimal decision display in console
270
+ **kwargs: Additional configuration options (can include 'tools' parameter)
271
+ """
272
+ # Auto-create LLM provider if needed
273
+ if isinstance(llm_provider, str) or llm_provider is None:
274
+ provider_name = llm_provider or self._default_llm_provider
275
+ model_name = model or self._default_model
276
+ api_key_to_use = api_key or settings.get_llm_api_key(provider_name)
277
+
278
+ if api_key_to_use:
279
+ llm_provider = create_llm_provider(
280
+ provider=provider_name,
281
+ model=model_name,
282
+ api_key=api_key_to_use,
283
+ agent_id=agent_id
284
+ )
285
+ else:
286
+ logger.warning(f"No API key found for {provider_name}. LLM functionality will be disabled.")
287
+ llm_provider = None
288
+ # Create default config if none provided
289
+ if config is None:
290
+ config = AgentConfig(
291
+ name=name or "Substrate Agent",
292
+ type=AgentType.SUBSTRATE,
293
+ **kwargs
294
+ )
295
+
296
+ # Initialize base agent (which handles automatic tracing)
297
+ super().__init__(config, llm_provider, agent_id, name)
298
+
299
+ # Store customization options
300
+ self.prompt = prompt
301
+ self.default_focus = focus
302
+ self.relay = relay
303
+
304
+ # Decision display setup
305
+ self.display_reasoning = display_reasoning
306
+ self._decision_display = None
307
+
308
+ if display_reasoning:
309
+ self._setup_decision_display()
310
+
311
+ # Tool management (unified system)
312
+ self.tool_registry = ToolRegistry()
313
+ self.tool_sources = kwargs.get('tools', []) # Plugins, AgentTool instances, or callables
314
+ self._tools_setup = False
315
+
316
+ # MCP server integration
317
+ self.mcp_registry = None
318
+ self.mcp_tools = []
319
+ if mcp is not None:
320
+ # Normalize to list
321
+ mcp_servers = [mcp] if isinstance(mcp, dict) else mcp
322
+ self._mcp_server_configs = mcp_servers
323
+ # MCP setup happens lazily on first use to avoid blocking init
324
+ else:
325
+ self._mcp_server_configs = []
326
+
327
+ # Plugin access for direct plugin usage
328
+ self.plugins = PluginAccess()
329
+
330
+ logger.debug(f"Substrate Agent {self.name} initialized")
331
+
332
+
333
+ def _setup_decision_display(self):
334
+ """Setup minimal decision display for local development."""
335
+ try:
336
+ from ..display.console import create_console_decision_display
337
+ from ..core.decision_tracing import register_agent_decision_stream
338
+
339
+ # Create display
340
+ self._decision_display = create_console_decision_display(
341
+ agent_name=self.name,
342
+ agent_id=self.agent_id
343
+ )
344
+
345
+ # Register with decision streaming system
346
+ register_agent_decision_stream(
347
+ agent_id=self.agent_id,
348
+ callback=self._decision_display.handle_event
349
+ )
350
+
351
+ logger.debug(f"Decision display enabled for agent {self.name}")
352
+
353
+ except Exception as e:
354
+ logger.warning(f"Failed to setup decision display: {e}")
355
+ self.display_reasoning = False
356
+ self._decision_display = None
357
+
358
+ async def _setup_mcp_tools(self):
359
+ """
360
+ Setup MCP servers and discover available tools.
361
+
362
+ This is called lazily on first agent.process() to avoid blocking
363
+ agent initialization with MCP server connections.
364
+ """
365
+ if self.mcp_registry is not None:
366
+ # Already setup
367
+ return
368
+
369
+ if not self._mcp_server_configs:
370
+ # No MCP servers configured
371
+ return
372
+
373
+ try:
374
+ from ..plugins.mcp import MCPServer, MCPToolRegistry
375
+
376
+ logger.info(f"Setting up {len(self._mcp_server_configs)} MCP server(s) for {self.name}")
377
+
378
+ # Create registry
379
+ self.mcp_registry = MCPToolRegistry()
380
+
381
+ # Connect to each server and register tools
382
+ for server_config in self._mcp_server_configs:
383
+ server = MCPServer(
384
+ command=server_config.get("command"),
385
+ args=server_config.get("args", []),
386
+ env=server_config.get("env", {}),
387
+ server_name=server_config.get("name")
388
+ )
389
+
390
+ # Add to registry (automatically connects and discovers tools)
391
+ await self.mcp_registry.add_server(server)
392
+
393
+ # Get all tools from registry
394
+ self.mcp_tools = self.mcp_registry.get_all_tools()
395
+
396
+ logger.info(f"MCP setup complete: {self.mcp_registry.tool_count} tools from {self.mcp_registry.server_count} server(s)")
397
+
398
+ except ImportError:
399
+ logger.error(
400
+ "MCP SDK not installed. Install with: pip install mcp\n"
401
+ "See: https://github.com/modelcontextprotocol/python-sdk"
402
+ )
403
+ raise
404
+
405
+ except Exception as e:
406
+ logger.error(f"Failed to setup MCP servers: {str(e)}")
407
+ raise
408
+
409
+ async def _setup_tools(self):
410
+ """
411
+ Discover and register tools from all sources.
412
+
413
+ Called lazily on first process() call to avoid blocking initialization.
414
+ Sources can be:
415
+ - Plugin instances with get_tools() method
416
+ - AgentTool instances directly
417
+ - MCP server configurations
418
+ """
419
+ if self._tools_setup:
420
+ return # Already setup
421
+
422
+ # 1. Setup MCP tools first
423
+ if self._mcp_server_configs and self.mcp_registry is None:
424
+ await self._setup_mcp_tools()
425
+ # Convert MCP tools to AgentTool format
426
+ for mcp_tool in self.mcp_tools:
427
+ agent_tool = AgentTool.from_mcp_tool(mcp_tool, self.mcp_registry)
428
+ self.tool_registry.register(agent_tool)
429
+
430
+ # 2. Register plugin tools
431
+ for source in self.tool_sources:
432
+ if isinstance(source, AgentTool):
433
+ # Direct AgentTool registration
434
+ self.tool_registry.register(source)
435
+ logger.debug(f"Registered tool: {source.name}")
436
+
437
+ elif hasattr(source, 'get_tools'):
438
+ # Plugin with get_tools() method
439
+ plugin_tools = source.get_tools()
440
+ if plugin_tools:
441
+ self.tool_registry.register_many(plugin_tools)
442
+ logger.info(
443
+ f"Registered {len(plugin_tools)} tools from "
444
+ f"{source.__class__.__name__}"
445
+ )
446
+ else:
447
+ logger.warning(
448
+ f"Invalid tool source: {source}. "
449
+ f"Expected AgentTool or plugin with get_tools() method."
450
+ )
451
+
452
+ self._tools_setup = True
453
+ logger.info(
454
+ f"Agent {self.name} initialized with {self.tool_registry.tool_count} tools"
455
+ )
456
+
457
+ # ========================================================================
458
+ # USER API - What developers call directly
459
+ # ========================================================================
460
+
461
+ async def run(
462
+ self,
463
+ prompt: str,
464
+ tools: Optional[List[Union[str, AgentTool]]] = None,
465
+ max_iterations: int = 5,
466
+ **kwargs
467
+ ) -> str:
468
+ """
469
+ Run an instruction or query with autonomous tool calling.
470
+
471
+ This is the simplest way to use the agent - give it an instruction
472
+ and let it figure out which tools to use.
473
+
474
+ All execution is automatically traced without any user configuration.
475
+
476
+ Args:
477
+ prompt: The instruction or question
478
+ tools: Optional list of tool names or AgentTool instances.
479
+ If None, uses all registered tools.
480
+ max_iterations: Max number of tool calling iterations
481
+ **kwargs: Additional LLM parameters (temperature, etc.)
482
+
483
+ Returns:
484
+ The agent's final answer as a string
485
+
486
+ Examples:
487
+ Questions:
488
+ ```python
489
+ answer = await agent.run("What's the weather in Seattle?")
490
+ # "It's 55°F and rainy in Seattle."
491
+ ```
492
+
493
+ Commands:
494
+ ```python
495
+ answer = await agent.run("Calculate 127 times 45")
496
+ # "The result is 5,715."
497
+
498
+ answer = await agent.run("Process this CSV and generate a report")
499
+ # "I've processed the CSV file and generated a summary report..."
500
+ ```
501
+
502
+ With specific tools:
503
+ ```python
504
+ answer = await agent.run(
505
+ "Calculate 127 times 45",
506
+ tools=["calculator"]
507
+ )
508
+ # "The result is 5,715."
509
+ ```
510
+
511
+ With LLM parameters:
512
+ ```python
513
+ answer = await agent.run(
514
+ "Write a creative story",
515
+ temperature=0.9,
516
+ max_tokens=500
517
+ )
518
+ ```
519
+ """
520
+ result = await self._run_traced(prompt, tools, max_iterations, **kwargs)
521
+ return result['result']
522
+
523
+ async def run_detailed(
524
+ self,
525
+ prompt: str,
526
+ tools: Optional[List[Union[str, AgentTool]]] = None,
527
+ max_iterations: int = 5,
528
+ **kwargs
529
+ ) -> Dict[str, Any]:
530
+ """
531
+ Like run(), but returns full execution details.
532
+
533
+ All execution is automatically traced without any user configuration.
534
+
535
+ Returns:
536
+ {
537
+ "result": str, # Final answer
538
+ "tool_calls": [...], # All tools called
539
+ "iterations": int, # Number of iterations
540
+ "tokens": {...}, # Token usage
541
+ "cost": float, # Estimated cost
542
+ "processing_time_ms": float, # Execution time
543
+ "agent_id": str, # Agent identifier
544
+ "agent_name": str # Agent name
545
+ }
546
+
547
+ Examples:
548
+ ```python
549
+ result = await agent.run_detailed("Complex calculation task")
550
+
551
+ print(f"Answer: {result['result']}")
552
+ print(f"Used {len(result['tool_calls'])} tools")
553
+ print(f"Cost: ${result['cost']:.4f}")
554
+ print(f"Time: {result['processing_time_ms']:.0f}ms")
555
+ ```
556
+ """
557
+ return await self._run_traced(prompt, tools, max_iterations, **kwargs)
558
+
559
+ async def _run_traced(
560
+ self,
561
+ prompt: str,
562
+ tools: Optional[List[Union[str, AgentTool]]],
563
+ max_iterations: int,
564
+ **kwargs
565
+ ) -> Dict[str, Any]:
566
+ """
567
+ Internal helper: Execute with automatic tracing.
568
+
569
+ This method handles all tracing automatically - users never call this directly.
570
+ Creates an AGENT_EXECUTION trace span that encompasses the entire operation,
571
+ with nested LLM_CALL spans for individual LLM interactions.
572
+ """
573
+ import time
574
+ from ..core.tracing import TraceType
575
+
576
+ start_time = time.time()
577
+
578
+ # Create agent-level trace span (automatic, invisible to users)
579
+ async with self.trace_manager.span(
580
+ operation_name="agent_run",
581
+ trace_type=TraceType.AGENT_EXECUTION,
582
+ agent_id=self.agent_id,
583
+ agent_name=self.name,
584
+ prompt=prompt[:200], # Truncate for storage
585
+ tools_requested=tools,
586
+ max_iterations=max_iterations,
587
+ entry_point="run" # Distinguishes from _process() calls
588
+ ):
589
+ # Execute LLM with tools (creates nested LLM_CALL traces)
590
+ result = await self._call_llm_with_tools(
591
+ prompt=prompt,
592
+ tools=tools,
593
+ max_iterations=max_iterations,
594
+ **kwargs
595
+ )
596
+
597
+ # Enrich result with metadata
598
+ result['processing_time_ms'] = (time.time() - start_time) * 1000
599
+ result['agent_id'] = self.agent_id
600
+ result['agent_name'] = self.name
601
+
602
+ return result
603
+
604
+ # ========================================================================
605
+ # INTERNAL - Backward compatibility for system integration
606
+ # ========================================================================
607
+
608
+ async def _process(
609
+ self,
610
+ task: str,
611
+ data: Any = None,
612
+ context: Optional[Dict[str, Any]] = None,
613
+ **kwargs
614
+ ) -> Dict[str, Any]:
615
+ """
616
+ INTERNAL: Process a task with data and context.
617
+
618
+ This method is used internally by the framework for:
619
+ - Workflow communication (receive_message calls this)
620
+ - Lambda execution routing
621
+ - System integration
622
+
623
+ Users should NOT call this directly. Use:
624
+ - run() for simple execution
625
+ - run_detailed() for execution with metadata
626
+ - receive_message() for workflow communication
627
+ - on_webhook() for webhook triggers
628
+ - on_schedule() for scheduled tasks
629
+
630
+ Args:
631
+ task: Task description or instruction
632
+ data: Optional data payload
633
+ context: Execution context metadata
634
+ **kwargs: Additional parameters
635
+
636
+ Returns:
637
+ Execution result with metadata
638
+ """
639
+ # Convert task/data to prompt
640
+ if data is not None:
641
+ prompt = f"{task}: {data}"
642
+ else:
643
+ prompt = task
644
+
645
+ # Use run_detailed as the core execution
646
+ result = await self.run_detailed(
647
+ prompt=prompt,
648
+ **kwargs
649
+ )
650
+
651
+ # Merge context if provided (for internal tracking)
652
+ if context:
653
+ result['context'] = {**result.get('context', {}), **context}
654
+
655
+ # Add legacy fields for backward compatibility with internal systems
656
+ result['task'] = task
657
+ result['status'] = 'success' if 'result' in result else 'error'
658
+
659
+ return result
660
+
661
+ # ========================================================================
662
+ # SYSTEM INTEGRATION API - What infrastructure calls
663
+ # ========================================================================
664
+
665
+ async def receive_message(
666
+ self,
667
+ data: Any,
668
+ source_agent: str,
669
+ channel: str,
670
+ workflow_name: Optional[str] = None
671
+ ) -> Dict[str, Any]:
672
+ """
673
+ Handle workflow relay message from another agent.
674
+
675
+ This method is called automatically by the workflow system when
676
+ this agent receives a message from another agent via a relay channel.
677
+
678
+ DO NOT call this directly unless you're building workflow infrastructure.
679
+ For direct agent execution, use run() or run_detailed().
680
+
681
+ Args:
682
+ data: Message data from source agent
683
+ source_agent: Name of the sending agent
684
+ channel: Relay channel name
685
+ workflow_name: Name of the workflow
686
+
687
+ Returns:
688
+ Execution result with workflow metadata
689
+
690
+ Examples:
691
+ Custom routing:
692
+ ```python
693
+ class MyAgent(SubstrateAgent):
694
+ async def receive_message(self, data, source, channel, workflow=None):
695
+ if channel == "urgent":
696
+ return await self.run(
697
+ "URGENT: Process this immediately",
698
+ tools=["priority_handler"]
699
+ )
700
+ elif channel == "batch":
701
+ return await self.run(
702
+ "Batch process these records",
703
+ tools=["batch_processor"]
704
+ )
705
+ else:
706
+ return await super().receive_message(data, source, channel, workflow)
707
+ ```
708
+ """
709
+ # Default implementation: autonomous processing with context
710
+ prompt = f"Process message from {source_agent} via {channel}"
711
+
712
+ # If data is structured, include it in context
713
+ if isinstance(data, dict):
714
+ prompt = f"{prompt}. Data: {data}"
715
+ elif isinstance(data, list):
716
+ prompt = f"{prompt}. Processing {len(data)} items."
717
+
718
+ result = await self.run_detailed(prompt)
719
+
720
+ # Add workflow metadata to result
721
+ result['workflow_metadata'] = {
722
+ 'source_agent': source_agent,
723
+ 'channel': channel,
724
+ 'workflow': workflow_name,
725
+ 'entry_point': 'receive_message'
726
+ }
727
+
728
+ return result
729
+
730
+ async def on_webhook(
731
+ self,
732
+ payload: Dict[str, Any],
733
+ webhook_config: Dict[str, Any]
734
+ ) -> Dict[str, Any]:
735
+ """
736
+ Handle webhook trigger from external service.
737
+
738
+ This method is called automatically by the webhook system when an
739
+ external service (GitHub, Slack, etc.) triggers this agent.
740
+
741
+ DO NOT call this directly unless you're building webhook infrastructure.
742
+ For direct agent execution, use run() or run_detailed().
743
+
744
+ Args:
745
+ payload: Webhook payload from external service
746
+ webhook_config: Webhook configuration (instructions, field mapping, etc.)
747
+
748
+ Returns:
749
+ Processing result with webhook metadata
750
+
751
+ Examples:
752
+ Custom webhook handling:
753
+ ```python
754
+ class MyAgent(SubstrateAgent):
755
+ async def on_webhook(self, payload, webhook_config):
756
+ event_type = payload.get('event')
757
+
758
+ if event_type == 'push':
759
+ return await self.run(
760
+ f"Analyze code push: {payload['commits']}",
761
+ tools=["code_analyzer", "lint"]
762
+ )
763
+ elif event_type == 'issue':
764
+ return await self.run(
765
+ f"Triage issue: {payload['issue']['title']}",
766
+ tools=["issue_classifier"]
767
+ )
768
+ ```
769
+ """
770
+ instructions = webhook_config.get('instructions', 'Process webhook data')
771
+
772
+ result = await self.run_detailed(instructions)
773
+
774
+ result['webhook_metadata'] = {
775
+ 'webhook_id': webhook_config.get('webhook_id'),
776
+ 'webhook_slug': webhook_config.get('webhook_slug'),
777
+ 'entry_point': 'on_webhook'
778
+ }
779
+
780
+ return result
781
+
782
+ async def on_schedule(
783
+ self,
784
+ schedule_config: Dict[str, Any]
785
+ ) -> Dict[str, Any]:
786
+ """
787
+ Handle scheduled task execution (cron jobs).
788
+
789
+ This method is called automatically by the scheduler when a
790
+ scheduled task triggers.
791
+
792
+ DO NOT call this directly unless you're building scheduler infrastructure.
793
+ For direct agent execution, use run() or run_detailed().
794
+
795
+ Args:
796
+ schedule_config: Schedule configuration (task, cron, etc.)
797
+
798
+ Returns:
799
+ Processing result with schedule metadata
800
+
801
+ Examples:
802
+ Custom schedule handling:
803
+ ```python
804
+ class MyAgent(SubstrateAgent):
805
+ async def on_schedule(self, schedule_config):
806
+ task = schedule_config['task']
807
+
808
+ if 'daily' in task.lower():
809
+ return await self.run(
810
+ "Run daily reports",
811
+ tools=["report_generator", "email"]
812
+ )
813
+ elif 'hourly' in task.lower():
814
+ return await self.run(
815
+ "Quick health check",
816
+ tools=["monitor"]
817
+ )
818
+ ```
819
+ """
820
+ task = schedule_config.get('task', 'Execute scheduled task')
821
+
822
+ result = await self.run_detailed(task)
823
+
824
+ result['schedule_metadata'] = {
825
+ 'schedule_id': schedule_config.get('schedule_id'),
826
+ 'cron': schedule_config.get('cron'),
827
+ 'entry_point': 'on_schedule'
828
+ }
829
+
830
+ return result
831
+
832
+ async def call_mcp_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
833
+ """
834
+ Manually call an MCP tool.
835
+
836
+ This allows users to explicitly call MCP tools for testing purposes.
837
+
838
+ Args:
839
+ tool_name: Name of the MCP tool to call
840
+ arguments: Arguments for the tool
841
+
842
+ Returns:
843
+ Tool execution result
844
+
845
+ Example:
846
+ ```python
847
+ result = await agent.call_mcp_tool("read_file", {"path": "/data/file.txt"})
848
+ ```
849
+ """
850
+ if not self.mcp_registry:
851
+ raise RuntimeError("No MCP servers configured. Add mcp parameter to SubstrateAgent.")
852
+
853
+ return await self.mcp_registry.call_tool(tool_name, arguments)
854
+
855
+ async def _call_llm_with_tools(
856
+ self,
857
+ prompt: str,
858
+ tools: Optional[list[Union[str, AgentTool]]] = None,
859
+ **kwargs
860
+ ) -> Dict[str, Any]:
861
+ """
862
+ Internal helper for handlers to call LLM with autonomous tool execution.
863
+
864
+ This is meant to be called FROM WITHIN HANDLERS, not directly by users.
865
+ Users should call agent.process() which routes to handlers.
866
+
867
+ Args:
868
+ prompt: Natural language instruction
869
+ tools: List of tool names (strings) or AgentTool instances.
870
+ If None, uses all registered tools.
871
+ **kwargs: Passed to LLM provider
872
+
873
+ Returns:
874
+ {
875
+ "result": str, # Final answer
876
+ "tool_calls": List[Dict], # Tools that were called
877
+ "iterations": int # Number of LLM turns
878
+ }
879
+
880
+ Example (within handler):
881
+ async def my_handler(data, context, agent):
882
+ result = await agent._call_llm_with_tools(
883
+ prompt="Check data quality and suggest fixes",
884
+ tools=['validate_schema', 'detect_anomalies']
885
+ )
886
+ return result
887
+ """
888
+ from ..core.tracing import TraceType
889
+
890
+ if not self.llm:
891
+ raise AgentError("LLM provider not configured for this agent")
892
+
893
+ # Setup tools if needed
894
+ await self._setup_tools()
895
+
896
+ # Resolve tool names to AgentTool instances
897
+ if tools is None:
898
+ # Use all registered tools
899
+ tool_list = list(self.tool_registry.tools)
900
+ else:
901
+ tool_list = []
902
+ for t in tools:
903
+ if isinstance(t, str):
904
+ # Tool name - look up in registry
905
+ tool = self.tool_registry.get(t)
906
+ if not tool:
907
+ raise ValueError(f"Tool '{t}' not found in registry")
908
+ tool_list.append(tool)
909
+ else:
910
+ # Already an AgentTool instance
911
+ tool_list.append(t)
912
+
913
+ # Apply focus wrapper to all tools if focus is configured
914
+ # This filters tool results BEFORE they reach the LLM (token reduction!)
915
+ if self.default_focus and tool_list:
916
+ tool_list = [FocusedTool(tool, self.default_focus) for tool in tool_list]
917
+ logger.debug(
918
+ f"Wrapped {len(tool_list)} tools with focus filter: {self.default_focus}"
919
+ )
920
+
921
+ # If no tools available, use simple LLM call
922
+ if not tool_list:
923
+ # No tools - use simple LLM generation
924
+ span_id = self.trace_manager.start_span(
925
+ operation_name="llm_simple_generation",
926
+ trace_type=TraceType.LLM_CALL,
927
+ agent_id=self.agent_id,
928
+ prompt=prompt,
929
+ tools_available=[]
930
+ )
931
+
932
+ try:
933
+ # Simple LLM call without tools
934
+ response = await self.llm.generate(prompt, **kwargs)
935
+
936
+ # Return in expected format
937
+ self.trace_manager.end_span(
938
+ span_id=span_id,
939
+ status=TraceStatus.SUCCESS,
940
+ result=response[:200]
941
+ )
942
+
943
+ return {
944
+ 'result': response,
945
+ 'tool_calls': [],
946
+ 'iterations': 1,
947
+ 'cost': getattr(self.llm, 'last_call_cost', 0.0),
948
+ 'tokens': getattr(self.llm, 'last_call_tokens', {})
949
+ }
950
+
951
+ except Exception as e:
952
+ self.trace_manager.end_span(
953
+ span_id=span_id,
954
+ status=TraceStatus.ERROR,
955
+ error=str(e)
956
+ )
957
+ raise
958
+
959
+ # Start trace for tool-enabled execution
960
+ span_id = self.trace_manager.start_span(
961
+ operation_name="llm_autonomous_execution",
962
+ trace_type=TraceType.LLM_CALL,
963
+ agent_id=self.agent_id,
964
+ prompt=prompt,
965
+ tools_available=[t.name for t in tool_list]
966
+ )
967
+
968
+ try:
969
+ # Call LLM with tools
970
+ result = await self.llm.generate_with_tools(
971
+ prompt=prompt,
972
+ tools=tool_list,
973
+ **kwargs
974
+ )
975
+
976
+ # End trace
977
+ self.trace_manager.end_span(
978
+ span_id,
979
+ status=TraceStatus.SUCCESS,
980
+ output_data={
981
+ "tools_called": [tc["tool"] for tc in result["tool_calls"]],
982
+ "iterations": result["iterations"],
983
+ "result_preview": result["result"][:200] if result.get("result") else None
984
+ }
985
+ )
986
+
987
+ return result
988
+
989
+ except Exception as e:
990
+ # End trace with error
991
+ self.trace_manager.end_span(
992
+ span_id,
993
+ status=TraceStatus.ERROR,
994
+ error_message=str(e)
995
+ )
996
+ raise
997
+
998
+ # User customization methods
999
+
1000
+ def add_plugin(self, plugin: Any):
1001
+ """
1002
+ Add a plugin to the agent's tool sources.
1003
+
1004
+ The plugin's tools will be registered on next tool setup.
1005
+ """
1006
+ self.tool_sources.append(plugin)
1007
+ logger.debug(f"Added plugin: {plugin.__class__.__name__}")
1008
+
1009
+ def register_tool(self, tool: AgentTool) -> None:
1010
+ """
1011
+ Register a single tool manually.
1012
+
1013
+ Useful for adding custom tools after agent initialization.
1014
+
1015
+ Args:
1016
+ tool: AgentTool instance to register
1017
+
1018
+ Example:
1019
+ ```python
1020
+ from daita import tool
1021
+ agent = SubstrateAgent(name="my_agent")
1022
+
1023
+ custom_tool = tool(my_custom_function)
1024
+ agent.register_tool(custom_tool)
1025
+ ```
1026
+ """
1027
+ self.tool_registry.register(tool)
1028
+
1029
+ def register_tools(self, tools: List[AgentTool]) -> None:
1030
+ """
1031
+ Register multiple tools manually.
1032
+
1033
+ Args:
1034
+ tools: List of AgentTool instances
1035
+ """
1036
+ self.tool_registry.register_many(tools)
1037
+
1038
+ async def call_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
1039
+ """
1040
+ Execute a tool by name.
1041
+
1042
+ Provides manual tool execution for testing or custom handlers.
1043
+
1044
+ Args:
1045
+ name: Tool name
1046
+ arguments: Tool arguments dict
1047
+
1048
+ Returns:
1049
+ Tool execution result
1050
+
1051
+ Example:
1052
+ ```python
1053
+ result = await agent.call_tool("query_database", {"sql": "SELECT 1"})
1054
+ ```
1055
+ """
1056
+ await self._setup_tools()
1057
+ return await self.tool_registry.execute(name, arguments)
1058
+
1059
+ @property
1060
+ def available_tools(self) -> List[AgentTool]:
1061
+ """
1062
+ Get list of all available tools.
1063
+
1064
+ Returns:
1065
+ List of AgentTool instances
1066
+ """
1067
+ return self.tool_registry.tools.copy()
1068
+
1069
+ @property
1070
+ def tool_names(self) -> List[str]:
1071
+ """Get list of all tool names"""
1072
+ return self.tool_registry.tool_names
1073
+
1074
+ async def stop(self) -> None:
1075
+ """Stop agent and clean up all resources including MCP connections."""
1076
+ # Clean up MCP connections first
1077
+ if self.mcp_registry:
1078
+ try:
1079
+ await self.mcp_registry.disconnect_all()
1080
+ logger.info(f"Cleaned up MCP connections for agent {self.name}")
1081
+ except Exception as e:
1082
+ logger.warning(f"Error cleaning up MCP connections: {e}")
1083
+
1084
+ # Call parent stop for standard cleanup
1085
+ await super().stop()
1086
+
1087
+ def get_token_usage(self) -> Dict[str, int]:
1088
+ """
1089
+ Get token usage for this agent using automatic tracing.
1090
+
1091
+ Returns comprehensive token statistics from the unified tracing system.
1092
+ """
1093
+ if not self.llm or not hasattr(self.llm, 'get_token_stats'):
1094
+ # Fallback for agents without LLM or tracing
1095
+ return {
1096
+ 'total_tokens': 0,
1097
+ 'prompt_tokens': 0,
1098
+ 'completion_tokens': 0,
1099
+ 'requests': 0
1100
+ }
1101
+
1102
+ return self.llm.get_token_stats()
1103
+
1104
+ async def _publish_to_relay(self, result: Dict[str, Any], context: Dict[str, Any]):
1105
+ """Publish result to relay channel."""
1106
+ try:
1107
+ from ..core.relay import publish
1108
+
1109
+ await publish(
1110
+ channel=self.relay,
1111
+ agent_response=result,
1112
+ publisher=self.name
1113
+ )
1114
+ logger.debug(f"Published result to relay channel: {self.relay}")
1115
+ except Exception as e:
1116
+ logger.warning(f"Failed to publish to relay channel {self.relay}: {str(e)}")
1117
+ # Don't re-raise - relay failures shouldn't break main processing
1118
+
1119
+ @property
1120
+ def health(self) -> Dict[str, Any]:
1121
+ """Enhanced health information for SubstrateAgent."""
1122
+ base_health = super().health
1123
+
1124
+ # Add SubstrateAgent-specific health info
1125
+ base_health.update({
1126
+ 'tools': {
1127
+ 'count': self.tool_registry.tool_count,
1128
+ 'setup': self._tools_setup,
1129
+ 'names': self.tool_registry.tool_names if self._tools_setup else []
1130
+ },
1131
+ 'relay': {
1132
+ 'enabled': self.relay is not None,
1133
+ 'channel': self.relay
1134
+ },
1135
+ 'llm': {
1136
+ 'available': self.llm is not None,
1137
+ 'provider': self.llm.provider_name if self.llm and hasattr(self.llm, 'provider_name') else None
1138
+ }
1139
+ })
1140
+
1141
+ return base_health