kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kairo/backend/api/agents.py +337 -16
- kairo/backend/app.py +84 -4
- kairo/backend/config.py +4 -2
- kairo/backend/models/agent.py +216 -2
- kairo/backend/models/api_key.py +4 -1
- kairo/backend/models/task.py +31 -0
- kairo/backend/models/user_provider_key.py +26 -0
- kairo/backend/schemas/agent.py +249 -2
- kairo/backend/schemas/api_key.py +3 -0
- kairo/backend/services/agent/__init__.py +52 -0
- kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
- kairo/backend/services/agent/agent_alerts_service.py +201 -0
- kairo/backend/services/agent/agent_commands_service.py +142 -0
- kairo/backend/services/agent/agent_crud_service.py +150 -0
- kairo/backend/services/agent/agent_events_service.py +103 -0
- kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
- kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
- kairo/backend/services/agent/agent_metrics_service.py +259 -0
- kairo/backend/services/agent/agent_service.py +315 -0
- kairo/backend/services/agent/agent_setup_service.py +180 -0
- kairo/backend/services/agent/constants.py +28 -0
- kairo/backend/services/agent_service.py +18 -102
- kairo/backend/services/api_key_service.py +23 -3
- kairo/backend/services/byok_service.py +204 -0
- kairo/backend/services/chat_service.py +398 -63
- kairo/backend/services/deep_search_service.py +159 -0
- kairo/backend/services/email_service.py +418 -19
- kairo/backend/services/few_shot_service.py +223 -0
- kairo/backend/services/post_processor.py +261 -0
- kairo/backend/services/rag_service.py +150 -0
- kairo/backend/services/task_service.py +119 -0
- kairo/backend/tests/__init__.py +1 -0
- kairo/backend/tests/e2e/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/conftest.py +389 -0
- kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
- kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
- kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
- kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
- kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
- kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
- kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
- kairo/migrations/versions/010_agent_dashboard.py +246 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
- kairo_migrations/env.py +92 -0
- kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent CRUD operations service.
|
|
3
|
+
|
|
4
|
+
Handles registration, listing, retrieval, updating, and deletion of agents.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime, timedelta, UTC
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import func, select
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
|
+
|
|
14
|
+
from backend.models.agent import Agent, AgentMetrics1m
|
|
15
|
+
from backend.schemas.agent import RegisterAgentRequest, UpdateAgentRequest
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AgentCrudService:
|
|
21
|
+
"""Service for agent CRUD operations."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, db: AsyncSession):
|
|
24
|
+
self.db = db
|
|
25
|
+
|
|
26
|
+
async def register(self, user_id: str, req: RegisterAgentRequest) -> Agent:
|
|
27
|
+
"""Register a new agent."""
|
|
28
|
+
agent = Agent(
|
|
29
|
+
user_id=user_id,
|
|
30
|
+
name=req.name,
|
|
31
|
+
description=req.description,
|
|
32
|
+
system_prompt=req.system_prompt,
|
|
33
|
+
model_preference=req.model_preference,
|
|
34
|
+
agent_type=req.agent_type,
|
|
35
|
+
tools_config=json.dumps(req.tools) if req.tools else None,
|
|
36
|
+
state="created",
|
|
37
|
+
)
|
|
38
|
+
self.db.add(agent)
|
|
39
|
+
await self.db.commit()
|
|
40
|
+
await self.db.refresh(agent)
|
|
41
|
+
logger.info("Agent registered: user=%s name=%s id=%s", user_id, req.name, agent.id)
|
|
42
|
+
return agent
|
|
43
|
+
|
|
44
|
+
async def list_agents(self, user_id: str) -> list[Agent]:
|
|
45
|
+
"""List all agents for a user."""
|
|
46
|
+
stmt = (
|
|
47
|
+
select(Agent)
|
|
48
|
+
.where(Agent.user_id == user_id)
|
|
49
|
+
.where(Agent.deleted_at.is_(None))
|
|
50
|
+
.order_by(Agent.created_at.desc())
|
|
51
|
+
)
|
|
52
|
+
result = await self.db.execute(stmt)
|
|
53
|
+
return list(result.scalars().all())
|
|
54
|
+
|
|
55
|
+
async def list_agents_with_metrics(self, user_id: str) -> list[dict]:
|
|
56
|
+
"""List agents with 24h summary metrics."""
|
|
57
|
+
agents = await self.list_agents(user_id)
|
|
58
|
+
cutoff = datetime.now(UTC) - timedelta(hours=24)
|
|
59
|
+
|
|
60
|
+
result = []
|
|
61
|
+
for agent in agents:
|
|
62
|
+
metrics_stmt = (
|
|
63
|
+
select(
|
|
64
|
+
func.sum(AgentMetrics1m.request_count).label("requests"),
|
|
65
|
+
func.sum(AgentMetrics1m.error_count).label("errors"),
|
|
66
|
+
func.sum(AgentMetrics1m.input_tokens + AgentMetrics1m.output_tokens).label("tokens"),
|
|
67
|
+
)
|
|
68
|
+
.where(AgentMetrics1m.agent_id == agent.id)
|
|
69
|
+
.where(AgentMetrics1m.bucket_time >= cutoff)
|
|
70
|
+
)
|
|
71
|
+
metrics_result = await self.db.execute(metrics_stmt)
|
|
72
|
+
row = metrics_result.one_or_none()
|
|
73
|
+
|
|
74
|
+
requests_24h = row.requests or 0 if row else 0
|
|
75
|
+
errors_24h = row.errors or 0 if row else 0
|
|
76
|
+
tokens_24h = row.tokens or 0 if row else 0
|
|
77
|
+
error_rate = (errors_24h / requests_24h * 100) if requests_24h > 0 else 0.0
|
|
78
|
+
|
|
79
|
+
result.append({
|
|
80
|
+
"id": agent.id,
|
|
81
|
+
"name": agent.name,
|
|
82
|
+
"description": agent.description,
|
|
83
|
+
"model_preference": agent.model_preference,
|
|
84
|
+
"agent_type": agent.agent_type,
|
|
85
|
+
"state": agent.state,
|
|
86
|
+
"sdk_version": agent.sdk_version,
|
|
87
|
+
"last_heartbeat_at": agent.last_heartbeat_at,
|
|
88
|
+
"created_at": agent.created_at,
|
|
89
|
+
"requests_24h": requests_24h,
|
|
90
|
+
"errors_24h": errors_24h,
|
|
91
|
+
"tokens_24h": tokens_24h,
|
|
92
|
+
"error_rate": round(error_rate, 2),
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
async def get_agent(self, user_id: str, agent_id: str) -> Agent | None:
|
|
98
|
+
"""Get agent by ID for a specific user."""
|
|
99
|
+
stmt = (
|
|
100
|
+
select(Agent)
|
|
101
|
+
.where(Agent.id == agent_id, Agent.user_id == user_id)
|
|
102
|
+
.where(Agent.deleted_at.is_(None))
|
|
103
|
+
)
|
|
104
|
+
result = await self.db.execute(stmt)
|
|
105
|
+
return result.scalar_one_or_none()
|
|
106
|
+
|
|
107
|
+
async def get_agent_by_id(self, agent_id: str) -> Agent | None:
|
|
108
|
+
"""Get agent by ID only (for SDK auth)."""
|
|
109
|
+
stmt = select(Agent).where(Agent.id == agent_id).where(Agent.deleted_at.is_(None))
|
|
110
|
+
result = await self.db.execute(stmt)
|
|
111
|
+
return result.scalar_one_or_none()
|
|
112
|
+
|
|
113
|
+
async def update_agent(
|
|
114
|
+
self,
|
|
115
|
+
user_id: str,
|
|
116
|
+
agent_id: str,
|
|
117
|
+
req: UpdateAgentRequest
|
|
118
|
+
) -> Agent | None:
|
|
119
|
+
"""Update agent properties."""
|
|
120
|
+
agent = await self.get_agent(user_id, agent_id)
|
|
121
|
+
if not agent:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
if req.name is not None:
|
|
125
|
+
agent.name = req.name
|
|
126
|
+
if req.description is not None:
|
|
127
|
+
agent.description = req.description
|
|
128
|
+
if req.system_prompt is not None:
|
|
129
|
+
agent.system_prompt = req.system_prompt
|
|
130
|
+
if req.model_preference is not None:
|
|
131
|
+
agent.model_preference = req.model_preference
|
|
132
|
+
if req.tools is not None:
|
|
133
|
+
agent.tools_config = json.dumps(req.tools)
|
|
134
|
+
|
|
135
|
+
agent.updated_at = datetime.now(UTC)
|
|
136
|
+
await self.db.commit()
|
|
137
|
+
await self.db.refresh(agent)
|
|
138
|
+
return agent
|
|
139
|
+
|
|
140
|
+
async def delete_agent(self, user_id: str, agent_id: str) -> bool:
|
|
141
|
+
"""Soft delete agent."""
|
|
142
|
+
agent = await self.get_agent(user_id, agent_id)
|
|
143
|
+
if not agent:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
agent.deleted_at = datetime.now(UTC)
|
|
147
|
+
agent.state = "disabled"
|
|
148
|
+
await self.db.commit()
|
|
149
|
+
logger.info("Agent deleted: id=%s user=%s", agent_id, user_id)
|
|
150
|
+
return True
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent events service.
|
|
3
|
+
|
|
4
|
+
Handles event logging and querying for agent activity tracking.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime, timedelta, UTC
|
|
9
|
+
|
|
10
|
+
from sqlalchemy import select
|
|
11
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
|
+
|
|
13
|
+
from backend.models.agent import Agent, AgentEvent
|
|
14
|
+
from backend.services.agent.constants import DEFAULT_EVENTS_RETENTION_DAYS
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AgentEventsService:
|
|
20
|
+
"""Service for agent event management."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, db: AsyncSession):
|
|
23
|
+
self.db = db
|
|
24
|
+
|
|
25
|
+
async def log_event(
|
|
26
|
+
self,
|
|
27
|
+
agent_id: str,
|
|
28
|
+
event_type: str,
|
|
29
|
+
event_data: dict | None = None,
|
|
30
|
+
error_type: str | None = None,
|
|
31
|
+
error_message: str | None = None,
|
|
32
|
+
client_ip: str | None = None,
|
|
33
|
+
) -> AgentEvent:
|
|
34
|
+
"""Log an agent event."""
|
|
35
|
+
event = AgentEvent(
|
|
36
|
+
agent_id=agent_id,
|
|
37
|
+
event_type=event_type,
|
|
38
|
+
event_data=event_data,
|
|
39
|
+
error_type=error_type,
|
|
40
|
+
error_message=error_message,
|
|
41
|
+
client_ip=client_ip,
|
|
42
|
+
)
|
|
43
|
+
self.db.add(event)
|
|
44
|
+
return event
|
|
45
|
+
|
|
46
|
+
async def get_events(
|
|
47
|
+
self,
|
|
48
|
+
agent_id: str,
|
|
49
|
+
event_type: str | None = None,
|
|
50
|
+
limit: int = 50,
|
|
51
|
+
) -> list[AgentEvent]:
|
|
52
|
+
"""Get agent events with optional type filter."""
|
|
53
|
+
stmt = (
|
|
54
|
+
select(AgentEvent)
|
|
55
|
+
.where(AgentEvent.agent_id == agent_id)
|
|
56
|
+
.order_by(AgentEvent.created_at.desc())
|
|
57
|
+
.limit(limit)
|
|
58
|
+
)
|
|
59
|
+
if event_type:
|
|
60
|
+
stmt = stmt.where(AgentEvent.event_type == event_type)
|
|
61
|
+
|
|
62
|
+
result = await self.db.execute(stmt)
|
|
63
|
+
return list(result.scalars().all())
|
|
64
|
+
|
|
65
|
+
async def get_events_for_user(
|
|
66
|
+
self,
|
|
67
|
+
user_id: str,
|
|
68
|
+
agent_id: str,
|
|
69
|
+
event_type: str | None = None,
|
|
70
|
+
limit: int = 50,
|
|
71
|
+
) -> list[AgentEvent] | None:
|
|
72
|
+
"""Get events for an agent owned by a user. Returns None if agent not found."""
|
|
73
|
+
# Verify agent ownership
|
|
74
|
+
agent_stmt = (
|
|
75
|
+
select(Agent)
|
|
76
|
+
.where(Agent.id == agent_id, Agent.user_id == user_id)
|
|
77
|
+
.where(Agent.deleted_at.is_(None))
|
|
78
|
+
)
|
|
79
|
+
agent_result = await self.db.execute(agent_stmt)
|
|
80
|
+
if not agent_result.scalar_one_or_none():
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
return await self.get_events(agent_id, event_type, limit)
|
|
84
|
+
|
|
85
|
+
async def cleanup_old_events(
|
|
86
|
+
self,
|
|
87
|
+
retention_days: int = DEFAULT_EVENTS_RETENTION_DAYS
|
|
88
|
+
) -> int:
|
|
89
|
+
"""Clean up old events based on retention policy."""
|
|
90
|
+
cutoff = datetime.now(UTC) - timedelta(days=retention_days)
|
|
91
|
+
stmt = select(AgentEvent).where(AgentEvent.created_at < cutoff)
|
|
92
|
+
result = await self.db.execute(stmt)
|
|
93
|
+
events = list(result.scalars().all())
|
|
94
|
+
count = len(events)
|
|
95
|
+
|
|
96
|
+
for event in events:
|
|
97
|
+
await self.db.delete(event)
|
|
98
|
+
|
|
99
|
+
if count > 0:
|
|
100
|
+
await self.db.commit()
|
|
101
|
+
logger.info("Cleaned up %d old events", count)
|
|
102
|
+
|
|
103
|
+
return count
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent heartbeat service.
|
|
3
|
+
|
|
4
|
+
Handles heartbeat processing, agent state management, and background state checks.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime, timedelta, UTC
|
|
9
|
+
|
|
10
|
+
from sqlalchemy import select, or_
|
|
11
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
|
+
|
|
13
|
+
from backend.models.agent import Agent, AgentMetrics1m
|
|
14
|
+
from backend.schemas.agent import AgentHeartbeatRequest
|
|
15
|
+
from backend.services.agent.agent_events_service import AgentEventsService
|
|
16
|
+
from backend.services.agent.agent_commands_service import AgentCommandsService
|
|
17
|
+
from backend.services.agent.constants import (
|
|
18
|
+
DEFAULT_HEARTBEAT_TIMEOUT_SECONDS,
|
|
19
|
+
DEFAULT_STALE_AGENT_DAYS,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AgentHeartbeatService:
|
|
26
|
+
"""Service for agent heartbeat processing and state management."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, db: AsyncSession):
|
|
29
|
+
self.db = db
|
|
30
|
+
self._events_service = AgentEventsService(db)
|
|
31
|
+
self._commands_service = AgentCommandsService(db)
|
|
32
|
+
|
|
33
|
+
async def heartbeat(
|
|
34
|
+
self,
|
|
35
|
+
agent: Agent,
|
|
36
|
+
req: AgentHeartbeatRequest,
|
|
37
|
+
client_ip: str | None = None
|
|
38
|
+
) -> tuple[Agent, list[dict]]:
|
|
39
|
+
"""
|
|
40
|
+
Process heartbeat and return agent + pending commands.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
agent: The agent to process heartbeat for
|
|
44
|
+
req: The heartbeat request data
|
|
45
|
+
client_ip: Optional client IP address
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Tuple of (updated agent, list of pending commands)
|
|
49
|
+
"""
|
|
50
|
+
now = datetime.now(UTC)
|
|
51
|
+
is_first_connection = agent.first_connected_at is None
|
|
52
|
+
|
|
53
|
+
# Update agent status
|
|
54
|
+
agent.status = req.status
|
|
55
|
+
agent.last_heartbeat_at = now
|
|
56
|
+
|
|
57
|
+
# Update state based on status
|
|
58
|
+
self._update_agent_state(agent, req, now, is_first_connection)
|
|
59
|
+
|
|
60
|
+
# Update SDK info
|
|
61
|
+
if req.sdk_version:
|
|
62
|
+
agent.sdk_version = req.sdk_version
|
|
63
|
+
if req.host_info:
|
|
64
|
+
agent.host_info = req.host_info.model_dump()
|
|
65
|
+
|
|
66
|
+
# Record metrics if provided
|
|
67
|
+
if req.metrics_since_last_heartbeat:
|
|
68
|
+
await self._record_metrics(agent.id, req.metrics_since_last_heartbeat)
|
|
69
|
+
|
|
70
|
+
# Log event
|
|
71
|
+
await self._events_service.log_event(
|
|
72
|
+
agent_id=agent.id,
|
|
73
|
+
event_type="heartbeat",
|
|
74
|
+
event_data={
|
|
75
|
+
"status": req.status,
|
|
76
|
+
"sdk_version": req.sdk_version,
|
|
77
|
+
"queue_depth": req.queue_depth,
|
|
78
|
+
"active_request": req.active_request,
|
|
79
|
+
},
|
|
80
|
+
client_ip=client_ip,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
agent.updated_at = now
|
|
84
|
+
await self.db.commit()
|
|
85
|
+
await self.db.refresh(agent)
|
|
86
|
+
|
|
87
|
+
# Get pending commands
|
|
88
|
+
commands = await self._commands_service.get_pending_commands(agent.id)
|
|
89
|
+
|
|
90
|
+
return agent, commands
|
|
91
|
+
|
|
92
|
+
def _update_agent_state(
|
|
93
|
+
self,
|
|
94
|
+
agent: Agent,
|
|
95
|
+
req: AgentHeartbeatRequest,
|
|
96
|
+
now: datetime,
|
|
97
|
+
is_first_connection: bool
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Update agent state based on heartbeat status."""
|
|
100
|
+
if req.status in ("online", "busy", "idle"):
|
|
101
|
+
agent.state = "online" if req.status == "online" else req.status
|
|
102
|
+
agent.last_online_at = now
|
|
103
|
+
if is_first_connection:
|
|
104
|
+
agent.first_connected_at = now
|
|
105
|
+
elif req.status == "error":
|
|
106
|
+
agent.state = "error"
|
|
107
|
+
if req.last_error:
|
|
108
|
+
agent.last_error_at = now
|
|
109
|
+
agent.last_error_message = req.last_error.get("message", "Unknown error")
|
|
110
|
+
|
|
111
|
+
async def _record_metrics(self, agent_id: str, metrics) -> None:
|
|
112
|
+
"""Record metrics in 1-minute bucket."""
|
|
113
|
+
now = datetime.now(UTC)
|
|
114
|
+
bucket_time = now.replace(second=0, microsecond=0)
|
|
115
|
+
|
|
116
|
+
stmt = select(AgentMetrics1m).where(
|
|
117
|
+
AgentMetrics1m.agent_id == agent_id,
|
|
118
|
+
AgentMetrics1m.bucket_time == bucket_time
|
|
119
|
+
)
|
|
120
|
+
result = await self.db.execute(stmt)
|
|
121
|
+
existing = result.scalar_one_or_none()
|
|
122
|
+
|
|
123
|
+
if existing:
|
|
124
|
+
existing.request_count += metrics.requests_completed + metrics.requests_failed
|
|
125
|
+
existing.error_count += metrics.requests_failed
|
|
126
|
+
existing.input_tokens += metrics.input_tokens
|
|
127
|
+
existing.output_tokens += metrics.output_tokens
|
|
128
|
+
existing.total_latency_ms += metrics.total_latency_ms
|
|
129
|
+
existing.tool_calls += metrics.tool_calls
|
|
130
|
+
existing.tool_errors += metrics.tool_errors
|
|
131
|
+
else:
|
|
132
|
+
new_metrics = AgentMetrics1m(
|
|
133
|
+
agent_id=agent_id,
|
|
134
|
+
bucket_time=bucket_time,
|
|
135
|
+
request_count=metrics.requests_completed + metrics.requests_failed,
|
|
136
|
+
error_count=metrics.requests_failed,
|
|
137
|
+
input_tokens=metrics.input_tokens,
|
|
138
|
+
output_tokens=metrics.output_tokens,
|
|
139
|
+
total_latency_ms=metrics.total_latency_ms,
|
|
140
|
+
tool_calls=metrics.tool_calls,
|
|
141
|
+
tool_errors=metrics.tool_errors,
|
|
142
|
+
)
|
|
143
|
+
self.db.add(new_metrics)
|
|
144
|
+
|
|
145
|
+
async def mark_stale_agents_offline(
|
|
146
|
+
self,
|
|
147
|
+
threshold_seconds: int = DEFAULT_HEARTBEAT_TIMEOUT_SECONDS
|
|
148
|
+
) -> int:
|
|
149
|
+
"""Mark agents as offline if no heartbeat within threshold."""
|
|
150
|
+
cutoff = datetime.now(UTC) - timedelta(seconds=threshold_seconds)
|
|
151
|
+
stmt = (
|
|
152
|
+
select(Agent)
|
|
153
|
+
.where(Agent.state.in_(["online", "busy", "idle"]))
|
|
154
|
+
.where(Agent.deleted_at.is_(None))
|
|
155
|
+
.where(
|
|
156
|
+
or_(
|
|
157
|
+
Agent.last_heartbeat_at < cutoff,
|
|
158
|
+
Agent.last_heartbeat_at.is_(None)
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
result = await self.db.execute(stmt)
|
|
163
|
+
agents = list(result.scalars().all())
|
|
164
|
+
|
|
165
|
+
for agent in agents:
|
|
166
|
+
old_state = agent.state
|
|
167
|
+
agent.state = "offline"
|
|
168
|
+
agent.status = "offline"
|
|
169
|
+
agent.updated_at = datetime.now(UTC)
|
|
170
|
+
|
|
171
|
+
await self._events_service.log_event(
|
|
172
|
+
agent_id=agent.id,
|
|
173
|
+
event_type="state_change",
|
|
174
|
+
event_data={
|
|
175
|
+
"from": old_state,
|
|
176
|
+
"to": "offline",
|
|
177
|
+
"reason": "heartbeat_timeout"
|
|
178
|
+
},
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if agents:
|
|
182
|
+
await self.db.commit()
|
|
183
|
+
logger.info("Marked %d agents as offline", len(agents))
|
|
184
|
+
|
|
185
|
+
return len(agents)
|
|
186
|
+
|
|
187
|
+
async def mark_stale_agents(self, days: int = DEFAULT_STALE_AGENT_DAYS) -> int:
|
|
188
|
+
"""Mark offline agents as stale after days."""
|
|
189
|
+
cutoff = datetime.now(UTC) - timedelta(days=days)
|
|
190
|
+
stmt = (
|
|
191
|
+
select(Agent)
|
|
192
|
+
.where(Agent.state == "offline")
|
|
193
|
+
.where(Agent.deleted_at.is_(None))
|
|
194
|
+
.where(Agent.last_online_at < cutoff)
|
|
195
|
+
)
|
|
196
|
+
result = await self.db.execute(stmt)
|
|
197
|
+
agents = list(result.scalars().all())
|
|
198
|
+
|
|
199
|
+
for agent in agents:
|
|
200
|
+
agent.state = "stale"
|
|
201
|
+
agent.updated_at = datetime.now(UTC)
|
|
202
|
+
|
|
203
|
+
if agents:
|
|
204
|
+
await self.db.commit()
|
|
205
|
+
logger.info("Marked %d agents as stale", len(agents))
|
|
206
|
+
|
|
207
|
+
return len(agents)
|