daita-agents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of daita-agents might be problematic. Click here for more details.
- daita/__init__.py +208 -0
- daita/agents/__init__.py +33 -0
- daita/agents/base.py +722 -0
- daita/agents/substrate.py +895 -0
- daita/cli/__init__.py +145 -0
- daita/cli/__main__.py +7 -0
- daita/cli/ascii_art.py +44 -0
- daita/cli/core/__init__.py +0 -0
- daita/cli/core/create.py +254 -0
- daita/cli/core/deploy.py +473 -0
- daita/cli/core/deployments.py +309 -0
- daita/cli/core/import_detector.py +219 -0
- daita/cli/core/init.py +382 -0
- daita/cli/core/logs.py +239 -0
- daita/cli/core/managed_deploy.py +709 -0
- daita/cli/core/run.py +648 -0
- daita/cli/core/status.py +421 -0
- daita/cli/core/test.py +239 -0
- daita/cli/core/webhooks.py +172 -0
- daita/cli/main.py +588 -0
- daita/cli/utils.py +541 -0
- daita/config/__init__.py +62 -0
- daita/config/base.py +159 -0
- daita/config/settings.py +184 -0
- daita/core/__init__.py +262 -0
- daita/core/decision_tracing.py +701 -0
- daita/core/exceptions.py +480 -0
- daita/core/focus.py +251 -0
- daita/core/interfaces.py +76 -0
- daita/core/plugin_tracing.py +550 -0
- daita/core/relay.py +695 -0
- daita/core/reliability.py +381 -0
- daita/core/scaling.py +444 -0
- daita/core/tools.py +402 -0
- daita/core/tracing.py +770 -0
- daita/core/workflow.py +1084 -0
- daita/display/__init__.py +1 -0
- daita/display/console.py +160 -0
- daita/execution/__init__.py +58 -0
- daita/execution/client.py +856 -0
- daita/execution/exceptions.py +92 -0
- daita/execution/models.py +317 -0
- daita/llm/__init__.py +60 -0
- daita/llm/anthropic.py +166 -0
- daita/llm/base.py +373 -0
- daita/llm/factory.py +101 -0
- daita/llm/gemini.py +152 -0
- daita/llm/grok.py +114 -0
- daita/llm/mock.py +135 -0
- daita/llm/openai.py +109 -0
- daita/plugins/__init__.py +141 -0
- daita/plugins/base.py +37 -0
- daita/plugins/base_db.py +167 -0
- daita/plugins/elasticsearch.py +844 -0
- daita/plugins/mcp.py +481 -0
- daita/plugins/mongodb.py +510 -0
- daita/plugins/mysql.py +351 -0
- daita/plugins/postgresql.py +331 -0
- daita/plugins/redis_messaging.py +500 -0
- daita/plugins/rest.py +529 -0
- daita/plugins/s3.py +761 -0
- daita/plugins/slack.py +729 -0
- daita/utils/__init__.py +18 -0
- daita_agents-0.1.0.dist-info/METADATA +350 -0
- daita_agents-0.1.0.dist-info/RECORD +69 -0
- daita_agents-0.1.0.dist-info/WHEEL +5 -0
- daita_agents-0.1.0.dist-info/entry_points.txt +2 -0
- daita_agents-0.1.0.dist-info/licenses/LICENSE +56 -0
- daita_agents-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core Reliability Infrastructure for Daita Agents.
|
|
3
|
+
|
|
4
|
+
Provides task management, retry policies, circuit breakers, and other
|
|
5
|
+
reliability patterns for production-grade inter-agent communication.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- TaskManager: Track task lifecycle and state
|
|
9
|
+
- CircuitBreaker: Prevent cascading failures
|
|
10
|
+
- BackpressureController: Manage agent queue capacity
|
|
11
|
+
|
|
12
|
+
Note: RetryPolicy has been moved to config.base for better integration with configuration system.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
import uuid
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from typing import Dict, Any, Optional, List, Callable, Union
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from enum import Enum
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Task Management
|
|
27
|
+
|
|
28
|
+
class TaskStatus(str, Enum):
|
|
29
|
+
"""Status of a task in the system."""
|
|
30
|
+
QUEUED = "queued"
|
|
31
|
+
RUNNING = "running"
|
|
32
|
+
COMPLETED = "completed"
|
|
33
|
+
FAILED = "failed"
|
|
34
|
+
CANCELLED = "cancelled"
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Task:
|
|
38
|
+
"""Represents a task being processed by an agent."""
|
|
39
|
+
id: str
|
|
40
|
+
agent_id: str
|
|
41
|
+
task_type: str
|
|
42
|
+
data: Any
|
|
43
|
+
status: TaskStatus = TaskStatus.QUEUED
|
|
44
|
+
created_at: float = field(default_factory=time.time)
|
|
45
|
+
started_at: Optional[float] = None
|
|
46
|
+
completed_at: Optional[float] = None
|
|
47
|
+
error: Optional[str] = None
|
|
48
|
+
progress: float = 0.0
|
|
49
|
+
retry_count: int = 0
|
|
50
|
+
context: Dict[str, Any] = field(default_factory=dict)
|
|
51
|
+
|
|
52
|
+
def duration(self) -> Optional[float]:
|
|
53
|
+
"""Calculate task duration if started."""
|
|
54
|
+
if not self.started_at:
|
|
55
|
+
return None
|
|
56
|
+
end_time = self.completed_at or time.time()
|
|
57
|
+
return end_time - self.started_at
|
|
58
|
+
|
|
59
|
+
def age(self) -> float:
|
|
60
|
+
"""Calculate task age since creation."""
|
|
61
|
+
return time.time() - self.created_at
|
|
62
|
+
|
|
63
|
+
class TaskManager:
|
|
64
|
+
"""
|
|
65
|
+
Manages task lifecycle and state tracking.
|
|
66
|
+
|
|
67
|
+
Integrates with existing tracing system for automatic task visibility.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(self):
|
|
71
|
+
self._tasks: Dict[str, Task] = {}
|
|
72
|
+
self._agent_tasks: Dict[str, List[str]] = {} # agent_id -> task_ids
|
|
73
|
+
self._lock = asyncio.Lock()
|
|
74
|
+
|
|
75
|
+
async def create_task(
|
|
76
|
+
self,
|
|
77
|
+
agent_id: str,
|
|
78
|
+
task_type: str,
|
|
79
|
+
data: Any,
|
|
80
|
+
context: Optional[Dict[str, Any]] = None
|
|
81
|
+
) -> str:
|
|
82
|
+
"""Create a new task and return its ID."""
|
|
83
|
+
task_id = uuid.uuid4().hex
|
|
84
|
+
task = Task(
|
|
85
|
+
id=task_id,
|
|
86
|
+
agent_id=agent_id,
|
|
87
|
+
task_type=task_type,
|
|
88
|
+
data=data,
|
|
89
|
+
context=context or {}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
async with self._lock:
|
|
93
|
+
self._tasks[task_id] = task
|
|
94
|
+
if agent_id not in self._agent_tasks:
|
|
95
|
+
self._agent_tasks[agent_id] = []
|
|
96
|
+
self._agent_tasks[agent_id].append(task_id)
|
|
97
|
+
|
|
98
|
+
logger.debug(f"Created task {task_id} for agent {agent_id}")
|
|
99
|
+
return task_id
|
|
100
|
+
|
|
101
|
+
async def update_status(
|
|
102
|
+
self,
|
|
103
|
+
task_id: str,
|
|
104
|
+
status: TaskStatus,
|
|
105
|
+
error: Optional[str] = None,
|
|
106
|
+
progress: Optional[float] = None
|
|
107
|
+
) -> bool:
|
|
108
|
+
"""Update task status and metadata."""
|
|
109
|
+
async with self._lock:
|
|
110
|
+
task = self._tasks.get(task_id)
|
|
111
|
+
if not task:
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
task.status = status
|
|
115
|
+
if error:
|
|
116
|
+
task.error = error
|
|
117
|
+
if progress is not None:
|
|
118
|
+
task.progress = progress
|
|
119
|
+
|
|
120
|
+
# Update timestamps
|
|
121
|
+
if status == TaskStatus.RUNNING and not task.started_at:
|
|
122
|
+
task.started_at = time.time()
|
|
123
|
+
elif status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
|
|
124
|
+
task.completed_at = time.time()
|
|
125
|
+
|
|
126
|
+
logger.debug(f"Updated task {task_id} status to {status}")
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
async def get_task(self, task_id: str) -> Optional[Task]:
|
|
130
|
+
"""Get task by ID."""
|
|
131
|
+
async with self._lock:
|
|
132
|
+
return self._tasks.get(task_id)
|
|
133
|
+
|
|
134
|
+
async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
|
|
135
|
+
"""Get task status summary."""
|
|
136
|
+
task = await self.get_task(task_id)
|
|
137
|
+
if not task:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
"id": task.id,
|
|
142
|
+
"status": task.status.value,
|
|
143
|
+
"progress": task.progress,
|
|
144
|
+
"error": task.error,
|
|
145
|
+
"duration": task.duration(),
|
|
146
|
+
"age": task.age(),
|
|
147
|
+
"retry_count": task.retry_count
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async def get_agent_tasks(self, agent_id: str, status: Optional[TaskStatus] = None) -> List[Task]:
|
|
151
|
+
"""Get all tasks for an agent, optionally filtered by status."""
|
|
152
|
+
async with self._lock:
|
|
153
|
+
task_ids = self._agent_tasks.get(agent_id, [])
|
|
154
|
+
tasks = [self._tasks[tid] for tid in task_ids if tid in self._tasks]
|
|
155
|
+
|
|
156
|
+
if status:
|
|
157
|
+
tasks = [t for t in tasks if t.status == status]
|
|
158
|
+
|
|
159
|
+
return tasks
|
|
160
|
+
|
|
161
|
+
async def cancel_task(self, task_id: str) -> bool:
|
|
162
|
+
"""Cancel a task if it's not already completed."""
|
|
163
|
+
async with self._lock:
|
|
164
|
+
task = self._tasks.get(task_id)
|
|
165
|
+
if not task or task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED]:
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
task.status = TaskStatus.CANCELLED
|
|
169
|
+
task.completed_at = time.time()
|
|
170
|
+
|
|
171
|
+
logger.debug(f"Cancelled task {task_id}")
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
async def cleanup_old_tasks(self, max_age_seconds: int = 3600) -> int:
|
|
175
|
+
"""Remove old completed tasks to prevent memory leaks."""
|
|
176
|
+
cutoff_time = time.time() - max_age_seconds
|
|
177
|
+
removed_count = 0
|
|
178
|
+
|
|
179
|
+
async with self._lock:
|
|
180
|
+
to_remove = []
|
|
181
|
+
for task_id, task in self._tasks.items():
|
|
182
|
+
if (task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]
|
|
183
|
+
and task.created_at < cutoff_time):
|
|
184
|
+
to_remove.append(task_id)
|
|
185
|
+
|
|
186
|
+
for task_id in to_remove:
|
|
187
|
+
task = self._tasks.pop(task_id, None)
|
|
188
|
+
if task:
|
|
189
|
+
# Remove from agent tasks
|
|
190
|
+
agent_tasks = self._agent_tasks.get(task.agent_id, [])
|
|
191
|
+
if task_id in agent_tasks:
|
|
192
|
+
agent_tasks.remove(task_id)
|
|
193
|
+
removed_count += 1
|
|
194
|
+
|
|
195
|
+
if removed_count > 0:
|
|
196
|
+
logger.debug(f"Cleaned up {removed_count} old tasks")
|
|
197
|
+
|
|
198
|
+
return removed_count
|
|
199
|
+
|
|
200
|
+
# Circuit Breaker
|
|
201
|
+
|
|
202
|
+
class CircuitState(str, Enum):
|
|
203
|
+
"""Circuit breaker states."""
|
|
204
|
+
CLOSED = "closed" # Normal operation
|
|
205
|
+
OPEN = "open" # Failing, reject requests
|
|
206
|
+
HALF_OPEN = "half_open" # Testing recovery
|
|
207
|
+
|
|
208
|
+
class CircuitBreaker:
|
|
209
|
+
"""
|
|
210
|
+
Circuit breaker pattern implementation.
|
|
211
|
+
|
|
212
|
+
Prevents cascading failures by temporarily stopping calls to failing services.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
def __init__(
|
|
216
|
+
self,
|
|
217
|
+
failure_threshold: int = 5,
|
|
218
|
+
recovery_timeout: float = 60.0,
|
|
219
|
+
success_threshold: int = 2
|
|
220
|
+
):
|
|
221
|
+
self.failure_threshold = failure_threshold
|
|
222
|
+
self.recovery_timeout = recovery_timeout
|
|
223
|
+
self.success_threshold = success_threshold
|
|
224
|
+
|
|
225
|
+
self.failure_count = 0
|
|
226
|
+
self.success_count = 0
|
|
227
|
+
self.last_failure_time: Optional[float] = None
|
|
228
|
+
self.state = CircuitState.CLOSED
|
|
229
|
+
self._lock = asyncio.Lock()
|
|
230
|
+
|
|
231
|
+
async def call(self, func: Callable, *args, **kwargs) -> Any:
|
|
232
|
+
"""Execute function through circuit breaker."""
|
|
233
|
+
async with self._lock:
|
|
234
|
+
# Check if we should transition from OPEN to HALF_OPEN
|
|
235
|
+
if (self.state == CircuitState.OPEN and
|
|
236
|
+
self.last_failure_time and
|
|
237
|
+
time.time() - self.last_failure_time > self.recovery_timeout):
|
|
238
|
+
self.state = CircuitState.HALF_OPEN
|
|
239
|
+
self.success_count = 0
|
|
240
|
+
logger.debug("Circuit breaker transitioning to HALF_OPEN")
|
|
241
|
+
|
|
242
|
+
# Reject requests if circuit is OPEN
|
|
243
|
+
if self.state == CircuitState.OPEN:
|
|
244
|
+
try:
|
|
245
|
+
from ..core.exceptions import CircuitBreakerOpenError
|
|
246
|
+
except ImportError:
|
|
247
|
+
from core.exceptions import CircuitBreakerOpenError
|
|
248
|
+
raise CircuitBreakerOpenError("Circuit breaker is open")
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
result = await func(*args, **kwargs)
|
|
252
|
+
await self._on_success()
|
|
253
|
+
return result
|
|
254
|
+
except Exception as e:
|
|
255
|
+
await self._on_failure()
|
|
256
|
+
raise
|
|
257
|
+
|
|
258
|
+
async def _on_success(self):
|
|
259
|
+
"""Handle successful operation."""
|
|
260
|
+
async with self._lock:
|
|
261
|
+
self.failure_count = 0
|
|
262
|
+
|
|
263
|
+
if self.state == CircuitState.HALF_OPEN:
|
|
264
|
+
self.success_count += 1
|
|
265
|
+
if self.success_count >= self.success_threshold:
|
|
266
|
+
self.state = CircuitState.CLOSED
|
|
267
|
+
logger.info("Circuit breaker closed after successful recovery")
|
|
268
|
+
elif self.state == CircuitState.CLOSED:
|
|
269
|
+
pass # Already in good state
|
|
270
|
+
|
|
271
|
+
async def _on_failure(self):
|
|
272
|
+
"""Handle failed operation."""
|
|
273
|
+
async with self._lock:
|
|
274
|
+
self.failure_count += 1
|
|
275
|
+
self.last_failure_time = time.time()
|
|
276
|
+
|
|
277
|
+
if self.state == CircuitState.HALF_OPEN:
|
|
278
|
+
# Failure during recovery test - go back to OPEN
|
|
279
|
+
self.state = CircuitState.OPEN
|
|
280
|
+
logger.warning("Circuit breaker opened after failed recovery attempt")
|
|
281
|
+
elif (self.state == CircuitState.CLOSED and
|
|
282
|
+
self.failure_count >= self.failure_threshold):
|
|
283
|
+
# Too many failures - open the circuit
|
|
284
|
+
self.state = CircuitState.OPEN
|
|
285
|
+
logger.warning(f"Circuit breaker opened after {self.failure_count} failures")
|
|
286
|
+
|
|
287
|
+
def get_state(self) -> Dict[str, Any]:
|
|
288
|
+
"""Get current circuit breaker state."""
|
|
289
|
+
return {
|
|
290
|
+
"state": self.state.value,
|
|
291
|
+
"failure_count": self.failure_count,
|
|
292
|
+
"success_count": self.success_count,
|
|
293
|
+
"last_failure_time": self.last_failure_time
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Backpressure Control
|
|
297
|
+
|
|
298
|
+
class BackpressureController:
|
|
299
|
+
"""
|
|
300
|
+
Controls backpressure for agent task queues.
|
|
301
|
+
|
|
302
|
+
Prevents memory exhaustion by limiting concurrent tasks and queue sizes.
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
def __init__(
|
|
306
|
+
self,
|
|
307
|
+
max_concurrent_tasks: int = 10,
|
|
308
|
+
max_queue_size: int = 100,
|
|
309
|
+
agent_id: Optional[str] = None
|
|
310
|
+
):
|
|
311
|
+
self.max_concurrent_tasks = max_concurrent_tasks
|
|
312
|
+
self.max_queue_size = max_queue_size
|
|
313
|
+
self.agent_id = agent_id
|
|
314
|
+
|
|
315
|
+
self.current_tasks = 0
|
|
316
|
+
self.task_queue: asyncio.Queue = asyncio.Queue(maxsize=max_queue_size)
|
|
317
|
+
self._semaphore = asyncio.Semaphore(max_concurrent_tasks)
|
|
318
|
+
self._lock = asyncio.Lock()
|
|
319
|
+
|
|
320
|
+
async def submit_task(self, task_data: Any) -> bool:
|
|
321
|
+
"""Submit task for processing. Returns False if queue is full."""
|
|
322
|
+
try:
|
|
323
|
+
self.task_queue.put_nowait(task_data)
|
|
324
|
+
return True
|
|
325
|
+
except asyncio.QueueFull:
|
|
326
|
+
logger.warning(f"Queue full for agent {self.agent_id}, rejecting task")
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
async def get_next_task(self, timeout: Optional[float] = None) -> Optional[Any]:
|
|
330
|
+
"""Get next task from queue with optional timeout."""
|
|
331
|
+
try:
|
|
332
|
+
if timeout:
|
|
333
|
+
return await asyncio.wait_for(self.task_queue.get(), timeout=timeout)
|
|
334
|
+
else:
|
|
335
|
+
return await self.task_queue.get()
|
|
336
|
+
except asyncio.TimeoutError:
|
|
337
|
+
return None
|
|
338
|
+
|
|
339
|
+
async def acquire_processing_slot(self) -> bool:
|
|
340
|
+
"""Acquire a processing slot for concurrent task execution."""
|
|
341
|
+
try:
|
|
342
|
+
await self._semaphore.acquire()
|
|
343
|
+
async with self._lock:
|
|
344
|
+
self.current_tasks += 1
|
|
345
|
+
return True
|
|
346
|
+
except Exception:
|
|
347
|
+
return False
|
|
348
|
+
|
|
349
|
+
def release_processing_slot(self):
|
|
350
|
+
"""Release a processing slot after task completion."""
|
|
351
|
+
try:
|
|
352
|
+
self._semaphore.release()
|
|
353
|
+
asyncio.create_task(self._decrement_current_tasks())
|
|
354
|
+
except Exception as e:
|
|
355
|
+
logger.error(f"Error releasing processing slot: {e}")
|
|
356
|
+
|
|
357
|
+
async def _decrement_current_tasks(self):
|
|
358
|
+
"""Safely decrement current task count."""
|
|
359
|
+
async with self._lock:
|
|
360
|
+
self.current_tasks = max(0, self.current_tasks - 1)
|
|
361
|
+
|
|
362
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
363
|
+
"""Get current backpressure statistics."""
|
|
364
|
+
return {
|
|
365
|
+
"current_tasks": self.current_tasks,
|
|
366
|
+
"max_concurrent_tasks": self.max_concurrent_tasks,
|
|
367
|
+
"queue_size": self.task_queue.qsize(),
|
|
368
|
+
"max_queue_size": self.max_queue_size,
|
|
369
|
+
"queue_utilization": self.task_queue.qsize() / self.max_queue_size,
|
|
370
|
+
"concurrency_utilization": self.current_tasks / self.max_concurrent_tasks
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# Global instances for shared use
|
|
374
|
+
_global_task_manager: Optional[TaskManager] = None
|
|
375
|
+
|
|
376
|
+
def get_global_task_manager() -> TaskManager:
|
|
377
|
+
"""Get global task manager instance."""
|
|
378
|
+
global _global_task_manager
|
|
379
|
+
if _global_task_manager is None:
|
|
380
|
+
_global_task_manager = TaskManager()
|
|
381
|
+
return _global_task_manager
|