kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kairo/backend/api/agents.py +337 -16
- kairo/backend/app.py +84 -4
- kairo/backend/config.py +4 -2
- kairo/backend/models/agent.py +216 -2
- kairo/backend/models/api_key.py +4 -1
- kairo/backend/models/task.py +31 -0
- kairo/backend/models/user_provider_key.py +26 -0
- kairo/backend/schemas/agent.py +249 -2
- kairo/backend/schemas/api_key.py +3 -0
- kairo/backend/services/agent/__init__.py +52 -0
- kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
- kairo/backend/services/agent/agent_alerts_service.py +201 -0
- kairo/backend/services/agent/agent_commands_service.py +142 -0
- kairo/backend/services/agent/agent_crud_service.py +150 -0
- kairo/backend/services/agent/agent_events_service.py +103 -0
- kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
- kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
- kairo/backend/services/agent/agent_metrics_service.py +259 -0
- kairo/backend/services/agent/agent_service.py +315 -0
- kairo/backend/services/agent/agent_setup_service.py +180 -0
- kairo/backend/services/agent/constants.py +28 -0
- kairo/backend/services/agent_service.py +18 -102
- kairo/backend/services/api_key_service.py +23 -3
- kairo/backend/services/byok_service.py +204 -0
- kairo/backend/services/chat_service.py +398 -63
- kairo/backend/services/deep_search_service.py +159 -0
- kairo/backend/services/email_service.py +418 -19
- kairo/backend/services/few_shot_service.py +223 -0
- kairo/backend/services/post_processor.py +261 -0
- kairo/backend/services/rag_service.py +150 -0
- kairo/backend/services/task_service.py +119 -0
- kairo/backend/tests/__init__.py +1 -0
- kairo/backend/tests/e2e/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/conftest.py +389 -0
- kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
- kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
- kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
- kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
- kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
- kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
- kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
- kairo/migrations/versions/010_agent_dashboard.py +246 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
- kairo_migrations/env.py +92 -0
- kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent alerts evaluation service.
|
|
3
|
+
|
|
4
|
+
Handles alert condition evaluation and triggering.
|
|
5
|
+
This is a background job service that runs periodically.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime, timedelta, UTC
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import func, select
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
|
+
|
|
14
|
+
from backend.models.agent import Agent, AgentAlertConfig, AgentAlertHistory, AgentMetrics1m
|
|
15
|
+
from backend.services.agent.agent_events_service import AgentEventsService
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AgentAlertsEvaluationService:
|
|
21
|
+
"""Service for evaluating and triggering alerts."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, db: AsyncSession):
|
|
24
|
+
self.db = db
|
|
25
|
+
self._events_service = AgentEventsService(db)
|
|
26
|
+
|
|
27
|
+
async def evaluate_alerts(self) -> int:
|
|
28
|
+
"""Evaluate all active alert configs and trigger if conditions met."""
|
|
29
|
+
now = datetime.now(UTC)
|
|
30
|
+
triggered_count = 0
|
|
31
|
+
|
|
32
|
+
configs = await self._get_enabled_configs()
|
|
33
|
+
|
|
34
|
+
for config in configs:
|
|
35
|
+
try:
|
|
36
|
+
if self._is_in_cooldown(config, now):
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
agent = await self._get_agent(config.agent_id)
|
|
40
|
+
if not agent or agent.deleted_at:
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
should_trigger, trigger_value = await self._evaluate_config(config, agent, now)
|
|
44
|
+
|
|
45
|
+
if should_trigger:
|
|
46
|
+
await self._trigger_alert(config, trigger_value)
|
|
47
|
+
triggered_count += 1
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logger.warning("Error evaluating alert config %s: %s", config.id, e)
|
|
51
|
+
|
|
52
|
+
if triggered_count > 0:
|
|
53
|
+
await self.db.commit()
|
|
54
|
+
logger.info("Triggered %d alerts", triggered_count)
|
|
55
|
+
|
|
56
|
+
return triggered_count
|
|
57
|
+
|
|
58
|
+
async def _get_enabled_configs(self) -> list[AgentAlertConfig]:
|
|
59
|
+
"""Get all enabled alert configurations."""
|
|
60
|
+
stmt = select(AgentAlertConfig).where(AgentAlertConfig.is_enabled == True)
|
|
61
|
+
result = await self.db.execute(stmt)
|
|
62
|
+
return list(result.scalars().all())
|
|
63
|
+
|
|
64
|
+
def _is_in_cooldown(self, config: AgentAlertConfig, now: datetime) -> bool:
|
|
65
|
+
"""Check if config is still in cooldown period."""
|
|
66
|
+
if not config.last_triggered_at:
|
|
67
|
+
return False
|
|
68
|
+
cooldown_end = config.last_triggered_at + timedelta(seconds=config.cooldown_seconds)
|
|
69
|
+
return now < cooldown_end
|
|
70
|
+
|
|
71
|
+
async def _get_agent(self, agent_id: str) -> Agent | None:
|
|
72
|
+
"""Get agent by ID."""
|
|
73
|
+
stmt = select(Agent).where(Agent.id == agent_id)
|
|
74
|
+
result = await self.db.execute(stmt)
|
|
75
|
+
return result.scalar_one_or_none()
|
|
76
|
+
|
|
77
|
+
async def _evaluate_config(
|
|
78
|
+
self,
|
|
79
|
+
config: AgentAlertConfig,
|
|
80
|
+
agent: Agent,
|
|
81
|
+
now: datetime
|
|
82
|
+
) -> tuple[bool, float | None]:
|
|
83
|
+
"""Evaluate a single alert config. Returns (should_trigger, trigger_value)."""
|
|
84
|
+
evaluators = {
|
|
85
|
+
"offline": self._evaluate_offline_alert,
|
|
86
|
+
"error_rate": self._evaluate_error_rate_alert,
|
|
87
|
+
"latency": self._evaluate_latency_alert,
|
|
88
|
+
"token_budget": self._evaluate_token_budget_alert,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
evaluator = evaluators.get(config.alert_type)
|
|
92
|
+
if not evaluator:
|
|
93
|
+
return False, None
|
|
94
|
+
|
|
95
|
+
if config.alert_type == "offline":
|
|
96
|
+
return evaluator(agent)
|
|
97
|
+
return await evaluator(config, now)
|
|
98
|
+
|
|
99
|
+
def _evaluate_offline_alert(self, agent: Agent) -> tuple[bool, float | None]:
|
|
100
|
+
"""Evaluate offline alert condition."""
|
|
101
|
+
if agent.state in ("offline", "stale"):
|
|
102
|
+
return True, 1.0
|
|
103
|
+
return False, None
|
|
104
|
+
|
|
105
|
+
async def _evaluate_error_rate_alert(
|
|
106
|
+
self,
|
|
107
|
+
config: AgentAlertConfig,
|
|
108
|
+
now: datetime
|
|
109
|
+
) -> tuple[bool, float | None]:
|
|
110
|
+
"""Evaluate error rate alert condition."""
|
|
111
|
+
window_start = now - timedelta(seconds=config.window_seconds)
|
|
112
|
+
stmt = (
|
|
113
|
+
select(
|
|
114
|
+
func.sum(AgentMetrics1m.request_count).label("requests"),
|
|
115
|
+
func.sum(AgentMetrics1m.error_count).label("errors"),
|
|
116
|
+
)
|
|
117
|
+
.where(AgentMetrics1m.agent_id == config.agent_id)
|
|
118
|
+
.where(AgentMetrics1m.bucket_time >= window_start)
|
|
119
|
+
)
|
|
120
|
+
result = await self.db.execute(stmt)
|
|
121
|
+
row = result.one_or_none()
|
|
122
|
+
|
|
123
|
+
if row and row.requests and row.requests > 0:
|
|
124
|
+
error_rate = (row.errors or 0) / row.requests * 100
|
|
125
|
+
should_trigger = self._check_condition(error_rate, config.condition, config.threshold)
|
|
126
|
+
return should_trigger, error_rate
|
|
127
|
+
return False, None
|
|
128
|
+
|
|
129
|
+
async def _evaluate_latency_alert(
|
|
130
|
+
self,
|
|
131
|
+
config: AgentAlertConfig,
|
|
132
|
+
now: datetime
|
|
133
|
+
) -> tuple[bool, float | None]:
|
|
134
|
+
"""Evaluate latency alert condition."""
|
|
135
|
+
window_start = now - timedelta(seconds=config.window_seconds)
|
|
136
|
+
stmt = (
|
|
137
|
+
select(
|
|
138
|
+
func.sum(AgentMetrics1m.request_count).label("requests"),
|
|
139
|
+
func.sum(AgentMetrics1m.total_latency_ms).label("latency"),
|
|
140
|
+
)
|
|
141
|
+
.where(AgentMetrics1m.agent_id == config.agent_id)
|
|
142
|
+
.where(AgentMetrics1m.bucket_time >= window_start)
|
|
143
|
+
)
|
|
144
|
+
result = await self.db.execute(stmt)
|
|
145
|
+
row = result.one_or_none()
|
|
146
|
+
|
|
147
|
+
if row and row.requests and row.requests > 0:
|
|
148
|
+
avg_latency = (row.latency or 0) / row.requests
|
|
149
|
+
should_trigger = self._check_condition(avg_latency, config.condition, config.threshold)
|
|
150
|
+
return should_trigger, avg_latency
|
|
151
|
+
return False, None
|
|
152
|
+
|
|
153
|
+
async def _evaluate_token_budget_alert(
|
|
154
|
+
self,
|
|
155
|
+
config: AgentAlertConfig,
|
|
156
|
+
now: datetime
|
|
157
|
+
) -> tuple[bool, float | None]:
|
|
158
|
+
"""Evaluate token budget alert condition."""
|
|
159
|
+
window_start = now - timedelta(seconds=config.window_seconds)
|
|
160
|
+
stmt = (
|
|
161
|
+
select(
|
|
162
|
+
func.sum(AgentMetrics1m.input_tokens + AgentMetrics1m.output_tokens).label("tokens"),
|
|
163
|
+
)
|
|
164
|
+
.where(AgentMetrics1m.agent_id == config.agent_id)
|
|
165
|
+
.where(AgentMetrics1m.bucket_time >= window_start)
|
|
166
|
+
)
|
|
167
|
+
result = await self.db.execute(stmt)
|
|
168
|
+
row = result.one_or_none()
|
|
169
|
+
|
|
170
|
+
if row and row.tokens:
|
|
171
|
+
should_trigger = self._check_condition(row.tokens, config.condition, config.threshold)
|
|
172
|
+
return should_trigger, float(row.tokens)
|
|
173
|
+
return False, None
|
|
174
|
+
|
|
175
|
+
def _check_condition(self, value: float, condition: str, threshold: float) -> bool:
|
|
176
|
+
"""Check if value meets condition against threshold."""
|
|
177
|
+
conditions = {
|
|
178
|
+
"gt": lambda v, t: v > t,
|
|
179
|
+
"gte": lambda v, t: v >= t,
|
|
180
|
+
"lt": lambda v, t: v < t,
|
|
181
|
+
"lte": lambda v, t: v <= t,
|
|
182
|
+
"eq": lambda v, t: v == t,
|
|
183
|
+
}
|
|
184
|
+
checker = conditions.get(condition)
|
|
185
|
+
return checker(value, threshold) if checker else False
|
|
186
|
+
|
|
187
|
+
async def _trigger_alert(
|
|
188
|
+
self,
|
|
189
|
+
config: AgentAlertConfig,
|
|
190
|
+
trigger_value: float | None
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Trigger an alert and create history record."""
|
|
193
|
+
now = datetime.now(UTC)
|
|
194
|
+
|
|
195
|
+
alert = AgentAlertHistory(
|
|
196
|
+
config_id=config.id,
|
|
197
|
+
agent_id=config.agent_id,
|
|
198
|
+
alert_type=config.alert_type,
|
|
199
|
+
severity=config.severity,
|
|
200
|
+
status="triggered",
|
|
201
|
+
message=f"{config.name}: {config.metric} {config.condition} {config.threshold}",
|
|
202
|
+
trigger_value=trigger_value,
|
|
203
|
+
threshold_value=config.threshold,
|
|
204
|
+
)
|
|
205
|
+
self.db.add(alert)
|
|
206
|
+
|
|
207
|
+
config.last_triggered_at = now
|
|
208
|
+
|
|
209
|
+
await self._events_service.log_event(
|
|
210
|
+
agent_id=config.agent_id,
|
|
211
|
+
event_type="alert_triggered",
|
|
212
|
+
event_data={
|
|
213
|
+
"config_id": config.id,
|
|
214
|
+
"alert_type": config.alert_type,
|
|
215
|
+
"severity": config.severity,
|
|
216
|
+
"trigger_value": trigger_value,
|
|
217
|
+
"threshold": config.threshold,
|
|
218
|
+
},
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
logger.info(
|
|
222
|
+
"Alert triggered: agent=%s type=%s severity=%s value=%s threshold=%s",
|
|
223
|
+
config.agent_id, config.alert_type, config.severity, trigger_value, config.threshold
|
|
224
|
+
)
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent alerts service.
|
|
3
|
+
|
|
4
|
+
Handles alert configuration CRUD and history queries.
|
|
5
|
+
For alert evaluation and triggering, see agent_alerts_evaluation_service.py.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime, UTC
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import select
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
|
+
|
|
14
|
+
from backend.models.agent import Agent, AgentAlertConfig, AgentAlertHistory
|
|
15
|
+
from backend.schemas.agent import CreateAlertConfigRequest, UpdateAlertConfigRequest
|
|
16
|
+
from backend.services.agent.agent_alerts_evaluation_service import AgentAlertsEvaluationService
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AgentAlertsService:
|
|
22
|
+
"""Service for alert configuration management."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db: AsyncSession):
|
|
25
|
+
self.db = db
|
|
26
|
+
self._evaluation = AgentAlertsEvaluationService(db)
|
|
27
|
+
|
|
28
|
+
# ─── Alert Config CRUD ─────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
async def create_alert_config(
|
|
31
|
+
self,
|
|
32
|
+
agent_id: str,
|
|
33
|
+
user_id: str,
|
|
34
|
+
req: CreateAlertConfigRequest
|
|
35
|
+
) -> AgentAlertConfig:
|
|
36
|
+
"""Create alert configuration."""
|
|
37
|
+
config = AgentAlertConfig(
|
|
38
|
+
agent_id=agent_id,
|
|
39
|
+
user_id=user_id,
|
|
40
|
+
name=req.name,
|
|
41
|
+
alert_type=req.alert_type,
|
|
42
|
+
metric=req.metric,
|
|
43
|
+
condition=req.condition,
|
|
44
|
+
threshold=req.threshold,
|
|
45
|
+
window_seconds=req.window_seconds,
|
|
46
|
+
cooldown_seconds=req.cooldown_seconds,
|
|
47
|
+
severity=req.severity,
|
|
48
|
+
channels=req.channels,
|
|
49
|
+
)
|
|
50
|
+
self.db.add(config)
|
|
51
|
+
await self.db.commit()
|
|
52
|
+
await self.db.refresh(config)
|
|
53
|
+
return config
|
|
54
|
+
|
|
55
|
+
async def get_alert_configs(self, agent_id: str) -> list[AgentAlertConfig]:
|
|
56
|
+
"""Get alert configurations for an agent."""
|
|
57
|
+
stmt = (
|
|
58
|
+
select(AgentAlertConfig)
|
|
59
|
+
.where(AgentAlertConfig.agent_id == agent_id)
|
|
60
|
+
.order_by(AgentAlertConfig.created_at)
|
|
61
|
+
)
|
|
62
|
+
result = await self.db.execute(stmt)
|
|
63
|
+
return list(result.scalars().all())
|
|
64
|
+
|
|
65
|
+
async def get_alert_configs_for_user(
|
|
66
|
+
self,
|
|
67
|
+
user_id: str,
|
|
68
|
+
agent_id: str
|
|
69
|
+
) -> list[AgentAlertConfig] | None:
|
|
70
|
+
"""Get configs for an agent owned by a user. Returns None if agent not found."""
|
|
71
|
+
if not await self._verify_agent_ownership(user_id, agent_id):
|
|
72
|
+
return None
|
|
73
|
+
return await self.get_alert_configs(agent_id)
|
|
74
|
+
|
|
75
|
+
async def update_alert_config(
|
|
76
|
+
self,
|
|
77
|
+
agent_id: str,
|
|
78
|
+
config_id: str,
|
|
79
|
+
req: UpdateAlertConfigRequest
|
|
80
|
+
) -> AgentAlertConfig | None:
|
|
81
|
+
"""Update alert configuration."""
|
|
82
|
+
config = await self._get_config(agent_id, config_id)
|
|
83
|
+
if not config:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
self._apply_config_updates(config, req)
|
|
87
|
+
config.updated_at = datetime.now(UTC)
|
|
88
|
+
|
|
89
|
+
await self.db.commit()
|
|
90
|
+
await self.db.refresh(config)
|
|
91
|
+
return config
|
|
92
|
+
|
|
93
|
+
def _apply_config_updates(
|
|
94
|
+
self,
|
|
95
|
+
config: AgentAlertConfig,
|
|
96
|
+
req: UpdateAlertConfigRequest
|
|
97
|
+
) -> None:
|
|
98
|
+
"""Apply update request fields to config."""
|
|
99
|
+
if req.name is not None:
|
|
100
|
+
config.name = req.name
|
|
101
|
+
if req.threshold is not None:
|
|
102
|
+
config.threshold = req.threshold
|
|
103
|
+
if req.window_seconds is not None:
|
|
104
|
+
config.window_seconds = req.window_seconds
|
|
105
|
+
if req.cooldown_seconds is not None:
|
|
106
|
+
config.cooldown_seconds = req.cooldown_seconds
|
|
107
|
+
if req.severity is not None:
|
|
108
|
+
config.severity = req.severity
|
|
109
|
+
if req.channels is not None:
|
|
110
|
+
config.channels = req.channels
|
|
111
|
+
if req.is_enabled is not None:
|
|
112
|
+
config.is_enabled = req.is_enabled
|
|
113
|
+
|
|
114
|
+
async def update_alert_config_for_user(
|
|
115
|
+
self,
|
|
116
|
+
user_id: str,
|
|
117
|
+
agent_id: str,
|
|
118
|
+
config_id: str,
|
|
119
|
+
req: UpdateAlertConfigRequest
|
|
120
|
+
) -> AgentAlertConfig | None:
|
|
121
|
+
"""Update config for an agent owned by a user. Returns None if not found."""
|
|
122
|
+
if not await self._verify_agent_ownership(user_id, agent_id):
|
|
123
|
+
return None
|
|
124
|
+
return await self.update_alert_config(agent_id, config_id, req)
|
|
125
|
+
|
|
126
|
+
async def delete_alert_config(self, agent_id: str, config_id: str) -> bool:
|
|
127
|
+
"""Delete alert configuration."""
|
|
128
|
+
config = await self._get_config(agent_id, config_id)
|
|
129
|
+
if not config:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
await self.db.delete(config)
|
|
133
|
+
await self.db.commit()
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
async def delete_alert_config_for_user(
|
|
137
|
+
self,
|
|
138
|
+
user_id: str,
|
|
139
|
+
agent_id: str,
|
|
140
|
+
config_id: str
|
|
141
|
+
) -> bool:
|
|
142
|
+
"""Delete config for an agent owned by a user."""
|
|
143
|
+
if not await self._verify_agent_ownership(user_id, agent_id):
|
|
144
|
+
return False
|
|
145
|
+
return await self.delete_alert_config(agent_id, config_id)
|
|
146
|
+
|
|
147
|
+
# ─── Alert History ─────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
async def get_alert_history(self, agent_id: str, limit: int = 50) -> list[AgentAlertHistory]:
|
|
150
|
+
"""Get alert history for an agent."""
|
|
151
|
+
stmt = (
|
|
152
|
+
select(AgentAlertHistory)
|
|
153
|
+
.where(AgentAlertHistory.agent_id == agent_id)
|
|
154
|
+
.order_by(AgentAlertHistory.created_at.desc())
|
|
155
|
+
.limit(limit)
|
|
156
|
+
)
|
|
157
|
+
result = await self.db.execute(stmt)
|
|
158
|
+
return list(result.scalars().all())
|
|
159
|
+
|
|
160
|
+
async def get_alert_history_for_user(
|
|
161
|
+
self,
|
|
162
|
+
user_id: str,
|
|
163
|
+
agent_id: str,
|
|
164
|
+
limit: int = 50
|
|
165
|
+
) -> list[AgentAlertHistory] | None:
|
|
166
|
+
"""Get history for an agent owned by a user. Returns None if not found."""
|
|
167
|
+
if not await self._verify_agent_ownership(user_id, agent_id):
|
|
168
|
+
return None
|
|
169
|
+
return await self.get_alert_history(agent_id, limit)
|
|
170
|
+
|
|
171
|
+
# ─── Delegated Evaluation ──────────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
async def evaluate_alerts(self) -> int:
|
|
174
|
+
"""Evaluate all active alert configs and trigger if conditions met."""
|
|
175
|
+
return await self._evaluation.evaluate_alerts()
|
|
176
|
+
|
|
177
|
+
# ─── Private Helpers ───────────────────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
async def _get_config(
|
|
180
|
+
self,
|
|
181
|
+
agent_id: str,
|
|
182
|
+
config_id: str
|
|
183
|
+
) -> AgentAlertConfig | None:
|
|
184
|
+
"""Get a specific alert config."""
|
|
185
|
+
stmt = (
|
|
186
|
+
select(AgentAlertConfig)
|
|
187
|
+
.where(AgentAlertConfig.id == config_id)
|
|
188
|
+
.where(AgentAlertConfig.agent_id == agent_id)
|
|
189
|
+
)
|
|
190
|
+
result = await self.db.execute(stmt)
|
|
191
|
+
return result.scalar_one_or_none()
|
|
192
|
+
|
|
193
|
+
async def _verify_agent_ownership(self, user_id: str, agent_id: str) -> bool:
|
|
194
|
+
"""Verify that a user owns an agent."""
|
|
195
|
+
stmt = (
|
|
196
|
+
select(Agent)
|
|
197
|
+
.where(Agent.id == agent_id, Agent.user_id == user_id)
|
|
198
|
+
.where(Agent.deleted_at.is_(None))
|
|
199
|
+
)
|
|
200
|
+
result = await self.db.execute(stmt)
|
|
201
|
+
return result.scalar_one_or_none() is not None
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent commands service.
|
|
3
|
+
|
|
4
|
+
Handles command issuing, signing, dispatch, and acknowledgment.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import hmac
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import datetime, timedelta, UTC
|
|
12
|
+
|
|
13
|
+
from sqlalchemy import select
|
|
14
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
15
|
+
|
|
16
|
+
from backend.models.agent import Agent, AgentCommand
|
|
17
|
+
from backend.schemas.agent import IssueCommandRequest
|
|
18
|
+
from backend.services.agent.constants import COMMAND_SIGNING_KEY, DEFAULT_COMMAND_TTL_MINUTES
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AgentCommandsService:
|
|
24
|
+
"""Service for agent command management."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db: AsyncSession):
|
|
27
|
+
self.db = db
|
|
28
|
+
|
|
29
|
+
async def issue_command(
|
|
30
|
+
self,
|
|
31
|
+
agent: Agent,
|
|
32
|
+
user_id: str,
|
|
33
|
+
req: IssueCommandRequest
|
|
34
|
+
) -> AgentCommand:
|
|
35
|
+
"""
|
|
36
|
+
Issue a command to an agent.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
agent: The target agent
|
|
40
|
+
user_id: The user issuing the command
|
|
41
|
+
req: The command request data
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
The created command
|
|
45
|
+
"""
|
|
46
|
+
now = datetime.now(UTC)
|
|
47
|
+
expires_at = now + timedelta(minutes=DEFAULT_COMMAND_TTL_MINUTES)
|
|
48
|
+
|
|
49
|
+
command = AgentCommand(
|
|
50
|
+
agent_id=agent.id,
|
|
51
|
+
command_type=req.command_type,
|
|
52
|
+
payload=req.payload,
|
|
53
|
+
issued_by=user_id,
|
|
54
|
+
expires_at=expires_at,
|
|
55
|
+
)
|
|
56
|
+
self.db.add(command)
|
|
57
|
+
|
|
58
|
+
# If restart, update restart tracking
|
|
59
|
+
if req.command_type == "restart":
|
|
60
|
+
agent.restart_count += 1
|
|
61
|
+
agent.last_restart_at = now
|
|
62
|
+
agent.last_restart_reason = "user_requested"
|
|
63
|
+
|
|
64
|
+
await self.db.commit()
|
|
65
|
+
await self.db.refresh(command)
|
|
66
|
+
logger.info("Command issued: agent=%s type=%s", agent.id, req.command_type)
|
|
67
|
+
return command
|
|
68
|
+
|
|
69
|
+
async def get_pending_commands(self, agent_id: str) -> list[dict]:
|
|
70
|
+
"""
|
|
71
|
+
Get and dispatch pending commands for an agent.
|
|
72
|
+
|
|
73
|
+
Returns signed commands ready for dispatch.
|
|
74
|
+
"""
|
|
75
|
+
now = datetime.now(UTC)
|
|
76
|
+
|
|
77
|
+
stmt = (
|
|
78
|
+
select(AgentCommand)
|
|
79
|
+
.where(AgentCommand.agent_id == agent_id)
|
|
80
|
+
.where(AgentCommand.status == "pending")
|
|
81
|
+
.where(AgentCommand.expires_at > now)
|
|
82
|
+
.order_by(AgentCommand.created_at)
|
|
83
|
+
)
|
|
84
|
+
result = await self.db.execute(stmt)
|
|
85
|
+
commands = list(result.scalars().all())
|
|
86
|
+
|
|
87
|
+
signed_commands = []
|
|
88
|
+
for cmd in commands:
|
|
89
|
+
command_data = {
|
|
90
|
+
"command_id": cmd.id,
|
|
91
|
+
"type": cmd.command_type,
|
|
92
|
+
"payload": cmd.payload,
|
|
93
|
+
"issued_at": cmd.created_at.isoformat(),
|
|
94
|
+
"expires_at": cmd.expires_at.isoformat(),
|
|
95
|
+
}
|
|
96
|
+
signature = self._sign_command(command_data)
|
|
97
|
+
|
|
98
|
+
signed_commands.append({
|
|
99
|
+
"command_id": cmd.id,
|
|
100
|
+
"type": cmd.command_type,
|
|
101
|
+
"payload": cmd.payload,
|
|
102
|
+
"issued_at": cmd.created_at,
|
|
103
|
+
"expires_at": cmd.expires_at,
|
|
104
|
+
"signature": signature,
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
# Mark as dispatched
|
|
108
|
+
cmd.status = "dispatched"
|
|
109
|
+
cmd.dispatched_at = now
|
|
110
|
+
|
|
111
|
+
if commands:
|
|
112
|
+
await self.db.commit()
|
|
113
|
+
|
|
114
|
+
return signed_commands
|
|
115
|
+
|
|
116
|
+
async def acknowledge_command(self, agent_id: str, command_id: str) -> bool:
|
|
117
|
+
"""Mark command as acknowledged by agent."""
|
|
118
|
+
stmt = (
|
|
119
|
+
select(AgentCommand)
|
|
120
|
+
.where(AgentCommand.id == command_id)
|
|
121
|
+
.where(AgentCommand.agent_id == agent_id)
|
|
122
|
+
)
|
|
123
|
+
result = await self.db.execute(stmt)
|
|
124
|
+
command = result.scalar_one_or_none()
|
|
125
|
+
|
|
126
|
+
if not command:
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
command.status = "acknowledged"
|
|
130
|
+
command.acknowledged_at = datetime.now(UTC)
|
|
131
|
+
await self.db.commit()
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
def _sign_command(self, command_data: dict) -> str:
|
|
135
|
+
"""Sign command using HMAC-SHA256."""
|
|
136
|
+
message = json.dumps(command_data, sort_keys=True, default=str).encode()
|
|
137
|
+
return hmac.new(COMMAND_SIGNING_KEY, message, hashlib.sha256).hexdigest()
|
|
138
|
+
|
|
139
|
+
def verify_command_signature(self, command_data: dict, signature: str) -> bool:
|
|
140
|
+
"""Verify a command signature."""
|
|
141
|
+
expected = self._sign_command(command_data)
|
|
142
|
+
return hmac.compare_digest(expected, signature)
|