kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kairo/backend/api/agents.py +337 -16
- kairo/backend/app.py +84 -4
- kairo/backend/config.py +4 -2
- kairo/backend/models/agent.py +216 -2
- kairo/backend/models/api_key.py +4 -1
- kairo/backend/models/task.py +31 -0
- kairo/backend/models/user_provider_key.py +26 -0
- kairo/backend/schemas/agent.py +249 -2
- kairo/backend/schemas/api_key.py +3 -0
- kairo/backend/services/agent/__init__.py +52 -0
- kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
- kairo/backend/services/agent/agent_alerts_service.py +201 -0
- kairo/backend/services/agent/agent_commands_service.py +142 -0
- kairo/backend/services/agent/agent_crud_service.py +150 -0
- kairo/backend/services/agent/agent_events_service.py +103 -0
- kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
- kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
- kairo/backend/services/agent/agent_metrics_service.py +259 -0
- kairo/backend/services/agent/agent_service.py +315 -0
- kairo/backend/services/agent/agent_setup_service.py +180 -0
- kairo/backend/services/agent/constants.py +28 -0
- kairo/backend/services/agent_service.py +18 -102
- kairo/backend/services/api_key_service.py +23 -3
- kairo/backend/services/byok_service.py +204 -0
- kairo/backend/services/chat_service.py +398 -63
- kairo/backend/services/deep_search_service.py +159 -0
- kairo/backend/services/email_service.py +418 -19
- kairo/backend/services/few_shot_service.py +223 -0
- kairo/backend/services/post_processor.py +261 -0
- kairo/backend/services/rag_service.py +150 -0
- kairo/backend/services/task_service.py +119 -0
- kairo/backend/tests/__init__.py +1 -0
- kairo/backend/tests/e2e/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/conftest.py +389 -0
- kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
- kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
- kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
- kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
- kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
- kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
- kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
- kairo/migrations/versions/010_agent_dashboard.py +246 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
- kairo_migrations/env.py +92 -0
- kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
"""
|
|
2
|
+
End-to-end tests for Agent Heartbeat functionality.
|
|
3
|
+
|
|
4
|
+
Tests cover:
|
|
5
|
+
- Successful heartbeat updates last_heartbeat_at
|
|
6
|
+
- Heartbeat with metrics records data
|
|
7
|
+
- Heartbeat returns pending commands
|
|
8
|
+
- State transitions (online, busy, idle, error)
|
|
9
|
+
- Stale agent detection (mark offline after timeout)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
import uuid
|
|
14
|
+
from datetime import datetime, timedelta, UTC
|
|
15
|
+
from httpx import AsyncClient
|
|
16
|
+
from sqlalchemy import select
|
|
17
|
+
|
|
18
|
+
from backend.models.agent import Agent, AgentMetrics1m
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestHeartbeatBasic:
|
|
22
|
+
"""Basic heartbeat functionality tests."""
|
|
23
|
+
|
|
24
|
+
@pytest.mark.asyncio
|
|
25
|
+
async def test_heartbeat_success(
|
|
26
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
27
|
+
):
|
|
28
|
+
"""Should successfully process heartbeat and return acknowledgment."""
|
|
29
|
+
payload = {
|
|
30
|
+
"agent_id": test_agent.id,
|
|
31
|
+
"status": "online",
|
|
32
|
+
"sdk_version": "1.0.0",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
response = await client.post(
|
|
36
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
assert response.status_code == 200
|
|
40
|
+
data = response.json()
|
|
41
|
+
assert data["acknowledged"] is True
|
|
42
|
+
assert "server_time" in data
|
|
43
|
+
assert "commands" in data
|
|
44
|
+
assert isinstance(data["commands"], list)
|
|
45
|
+
|
|
46
|
+
@pytest.mark.asyncio
|
|
47
|
+
async def test_heartbeat_updates_last_heartbeat_at(
|
|
48
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
49
|
+
):
|
|
50
|
+
"""Should update last_heartbeat_at timestamp on heartbeat."""
|
|
51
|
+
original_heartbeat = test_agent.last_heartbeat_at
|
|
52
|
+
|
|
53
|
+
payload = {
|
|
54
|
+
"agent_id": test_agent.id,
|
|
55
|
+
"status": "online",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
await client.post(
|
|
59
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
await db_session.refresh(test_agent)
|
|
63
|
+
|
|
64
|
+
assert test_agent.last_heartbeat_at is not None
|
|
65
|
+
if original_heartbeat:
|
|
66
|
+
assert test_agent.last_heartbeat_at > original_heartbeat
|
|
67
|
+
|
|
68
|
+
@pytest.mark.asyncio
|
|
69
|
+
async def test_heartbeat_updates_sdk_version(
|
|
70
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
71
|
+
):
|
|
72
|
+
"""Should update SDK version on heartbeat."""
|
|
73
|
+
payload = {
|
|
74
|
+
"agent_id": test_agent.id,
|
|
75
|
+
"status": "online",
|
|
76
|
+
"sdk_version": "2.5.0",
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
await client.post(
|
|
80
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
await db_session.refresh(test_agent)
|
|
84
|
+
|
|
85
|
+
assert test_agent.sdk_version == "2.5.0"
|
|
86
|
+
|
|
87
|
+
@pytest.mark.asyncio
|
|
88
|
+
async def test_heartbeat_updates_host_info(
|
|
89
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
90
|
+
):
|
|
91
|
+
"""Should update host info on heartbeat."""
|
|
92
|
+
payload = {
|
|
93
|
+
"agent_id": test_agent.id,
|
|
94
|
+
"status": "online",
|
|
95
|
+
"host_info": {
|
|
96
|
+
"hostname": "prod-server-01",
|
|
97
|
+
"ip": "10.0.1.50",
|
|
98
|
+
"os": "debian-11",
|
|
99
|
+
"memory_mb": 32768,
|
|
100
|
+
"memory_used_percent": 45.5,
|
|
101
|
+
},
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
await client.post(
|
|
105
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
await db_session.refresh(test_agent)
|
|
109
|
+
|
|
110
|
+
assert test_agent.host_info is not None
|
|
111
|
+
assert test_agent.host_info["hostname"] == "prod-server-01"
|
|
112
|
+
assert test_agent.host_info["memory_used_percent"] == 45.5
|
|
113
|
+
|
|
114
|
+
@pytest.mark.asyncio
|
|
115
|
+
async def test_heartbeat_agent_not_found(
|
|
116
|
+
self, client: AsyncClient, api_key_headers: dict
|
|
117
|
+
):
|
|
118
|
+
"""Should return 404 for non-existent agent."""
|
|
119
|
+
fake_id = str(uuid.uuid4())
|
|
120
|
+
payload = {
|
|
121
|
+
"agent_id": fake_id,
|
|
122
|
+
"status": "online",
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
response = await client.post(
|
|
126
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
assert response.status_code == 404
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class TestHeartbeatStateTransitions:
|
|
133
|
+
"""Tests for agent state transitions via heartbeat."""
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_transition_to_online(
|
|
137
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
138
|
+
):
|
|
139
|
+
"""Should transition agent to 'online' state."""
|
|
140
|
+
payload = {
|
|
141
|
+
"agent_id": test_agent.id,
|
|
142
|
+
"status": "online",
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
await client.post(
|
|
146
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
await db_session.refresh(test_agent)
|
|
150
|
+
|
|
151
|
+
assert test_agent.state == "online"
|
|
152
|
+
assert test_agent.last_online_at is not None
|
|
153
|
+
|
|
154
|
+
@pytest.mark.asyncio
|
|
155
|
+
async def test_transition_to_busy(
|
|
156
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
157
|
+
):
|
|
158
|
+
"""Should transition agent to 'busy' state."""
|
|
159
|
+
payload = {
|
|
160
|
+
"agent_id": test_agent.id,
|
|
161
|
+
"status": "busy",
|
|
162
|
+
"active_request": True,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
await client.post(
|
|
166
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
await db_session.refresh(test_agent)
|
|
170
|
+
|
|
171
|
+
assert test_agent.state == "busy"
|
|
172
|
+
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
async def test_transition_to_idle(
|
|
175
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
176
|
+
):
|
|
177
|
+
"""Should transition agent to 'idle' state."""
|
|
178
|
+
payload = {
|
|
179
|
+
"agent_id": test_agent.id,
|
|
180
|
+
"status": "idle",
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
await client.post(
|
|
184
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
await db_session.refresh(test_agent)
|
|
188
|
+
|
|
189
|
+
assert test_agent.state == "idle"
|
|
190
|
+
|
|
191
|
+
@pytest.mark.asyncio
|
|
192
|
+
async def test_transition_to_error(
|
|
193
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
194
|
+
):
|
|
195
|
+
"""Should transition agent to 'error' state with error details."""
|
|
196
|
+
payload = {
|
|
197
|
+
"agent_id": test_agent.id,
|
|
198
|
+
"status": "error",
|
|
199
|
+
"last_error": {
|
|
200
|
+
"message": "Connection to LLM service failed",
|
|
201
|
+
"code": "LLM_CONNECTION_ERROR",
|
|
202
|
+
},
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
await client.post(
|
|
206
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
await db_session.refresh(test_agent)
|
|
210
|
+
|
|
211
|
+
assert test_agent.state == "error"
|
|
212
|
+
assert test_agent.last_error_at is not None
|
|
213
|
+
assert test_agent.last_error_message == "Connection to LLM service failed"
|
|
214
|
+
|
|
215
|
+
@pytest.mark.asyncio
|
|
216
|
+
async def test_first_connection_sets_timestamp(
|
|
217
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
218
|
+
):
|
|
219
|
+
"""Should set first_connected_at on first heartbeat."""
|
|
220
|
+
assert test_agent.first_connected_at is None
|
|
221
|
+
|
|
222
|
+
payload = {
|
|
223
|
+
"agent_id": test_agent.id,
|
|
224
|
+
"status": "online",
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
await client.post(
|
|
228
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
await db_session.refresh(test_agent)
|
|
232
|
+
|
|
233
|
+
assert test_agent.first_connected_at is not None
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class TestHeartbeatMetrics:
|
|
237
|
+
"""Tests for metrics recording via heartbeat."""
|
|
238
|
+
|
|
239
|
+
@pytest.mark.asyncio
|
|
240
|
+
async def test_heartbeat_records_metrics(
|
|
241
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
242
|
+
):
|
|
243
|
+
"""Should record metrics from heartbeat payload."""
|
|
244
|
+
payload = {
|
|
245
|
+
"agent_id": test_agent.id,
|
|
246
|
+
"status": "online",
|
|
247
|
+
"metrics_since_last_heartbeat": {
|
|
248
|
+
"requests_completed": 10,
|
|
249
|
+
"requests_failed": 2,
|
|
250
|
+
"total_latency_ms": 5000,
|
|
251
|
+
"input_tokens": 10000,
|
|
252
|
+
"output_tokens": 5000,
|
|
253
|
+
"tool_calls": 15,
|
|
254
|
+
"tool_errors": 1,
|
|
255
|
+
},
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
await client.post(
|
|
259
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Check metrics were recorded
|
|
263
|
+
stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
|
|
264
|
+
result = await db_session.execute(stmt)
|
|
265
|
+
metrics = result.scalar_one_or_none()
|
|
266
|
+
|
|
267
|
+
assert metrics is not None
|
|
268
|
+
assert metrics.request_count == 12 # completed + failed
|
|
269
|
+
assert metrics.error_count == 2
|
|
270
|
+
assert metrics.input_tokens == 10000
|
|
271
|
+
assert metrics.output_tokens == 5000
|
|
272
|
+
assert metrics.tool_calls == 15
|
|
273
|
+
|
|
274
|
+
@pytest.mark.asyncio
|
|
275
|
+
async def test_heartbeat_aggregates_metrics_same_bucket(
|
|
276
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
277
|
+
):
|
|
278
|
+
"""Should aggregate metrics when multiple heartbeats occur in same minute."""
|
|
279
|
+
payload = {
|
|
280
|
+
"agent_id": test_agent.id,
|
|
281
|
+
"status": "online",
|
|
282
|
+
"metrics_since_last_heartbeat": {
|
|
283
|
+
"requests_completed": 5,
|
|
284
|
+
"requests_failed": 1,
|
|
285
|
+
"input_tokens": 1000,
|
|
286
|
+
"output_tokens": 500,
|
|
287
|
+
},
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Send two heartbeats
|
|
291
|
+
await client.post(
|
|
292
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
293
|
+
)
|
|
294
|
+
await client.post(
|
|
295
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Check aggregated metrics
|
|
299
|
+
stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
|
|
300
|
+
result = await db_session.execute(stmt)
|
|
301
|
+
metrics = result.scalar_one_or_none()
|
|
302
|
+
|
|
303
|
+
assert metrics is not None
|
|
304
|
+
assert metrics.request_count == 12 # (5+1) * 2
|
|
305
|
+
assert metrics.error_count == 2 # 1 * 2
|
|
306
|
+
assert metrics.input_tokens == 2000 # 1000 * 2
|
|
307
|
+
assert metrics.output_tokens == 1000 # 500 * 2
|
|
308
|
+
|
|
309
|
+
@pytest.mark.asyncio
|
|
310
|
+
async def test_heartbeat_without_metrics(
|
|
311
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
|
|
312
|
+
):
|
|
313
|
+
"""Should handle heartbeat without metrics payload."""
|
|
314
|
+
payload = {
|
|
315
|
+
"agent_id": test_agent.id,
|
|
316
|
+
"status": "online",
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
response = await client.post(
|
|
320
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
assert response.status_code == 200
|
|
324
|
+
|
|
325
|
+
# No metrics should be recorded
|
|
326
|
+
stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
|
|
327
|
+
result = await db_session.execute(stmt)
|
|
328
|
+
metrics = result.scalar_one_or_none()
|
|
329
|
+
|
|
330
|
+
assert metrics is None
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class TestHeartbeatCommands:
|
|
334
|
+
"""Tests for pending command delivery via heartbeat."""
|
|
335
|
+
|
|
336
|
+
@pytest.mark.asyncio
|
|
337
|
+
async def test_heartbeat_returns_pending_commands(
|
|
338
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
|
|
339
|
+
):
|
|
340
|
+
"""Should return pending commands in heartbeat response."""
|
|
341
|
+
# Create a pending command
|
|
342
|
+
await agent_factory.create_command(
|
|
343
|
+
test_agent.id, command_type="restart", status="pending"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
payload = {
|
|
347
|
+
"agent_id": test_agent.id,
|
|
348
|
+
"status": "online",
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
response = await client.post(
|
|
352
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
assert response.status_code == 200
|
|
356
|
+
data = response.json()
|
|
357
|
+
assert len(data["commands"]) == 1
|
|
358
|
+
assert data["commands"][0]["type"] == "restart"
|
|
359
|
+
assert "command_id" in data["commands"][0]
|
|
360
|
+
assert "signature" in data["commands"][0]
|
|
361
|
+
|
|
362
|
+
@pytest.mark.asyncio
|
|
363
|
+
async def test_heartbeat_marks_commands_dispatched(
|
|
364
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory, db_session
|
|
365
|
+
):
|
|
366
|
+
"""Should mark commands as dispatched after delivery."""
|
|
367
|
+
command = await agent_factory.create_command(
|
|
368
|
+
test_agent.id, command_type="stop", status="pending"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
payload = {
|
|
372
|
+
"agent_id": test_agent.id,
|
|
373
|
+
"status": "online",
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
await client.post(
|
|
377
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
await db_session.refresh(command)
|
|
381
|
+
|
|
382
|
+
assert command.status == "dispatched"
|
|
383
|
+
assert command.dispatched_at is not None
|
|
384
|
+
|
|
385
|
+
@pytest.mark.asyncio
|
|
386
|
+
async def test_heartbeat_excludes_expired_commands(
|
|
387
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
|
|
388
|
+
):
|
|
389
|
+
"""Should not return expired commands."""
|
|
390
|
+
# Create an expired command
|
|
391
|
+
await agent_factory.create_command(
|
|
392
|
+
test_agent.id,
|
|
393
|
+
command_type="restart",
|
|
394
|
+
status="pending",
|
|
395
|
+
expires_at=datetime.now(UTC) - timedelta(hours=1),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
payload = {
|
|
399
|
+
"agent_id": test_agent.id,
|
|
400
|
+
"status": "online",
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
response = await client.post(
|
|
404
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
assert response.status_code == 200
|
|
408
|
+
data = response.json()
|
|
409
|
+
assert len(data["commands"]) == 0
|
|
410
|
+
|
|
411
|
+
@pytest.mark.asyncio
|
|
412
|
+
async def test_heartbeat_excludes_already_dispatched(
|
|
413
|
+
self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
|
|
414
|
+
):
|
|
415
|
+
"""Should not return already dispatched commands."""
|
|
416
|
+
# Create a dispatched command
|
|
417
|
+
await agent_factory.create_command(
|
|
418
|
+
test_agent.id,
|
|
419
|
+
command_type="restart",
|
|
420
|
+
status="dispatched",
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
payload = {
|
|
424
|
+
"agent_id": test_agent.id,
|
|
425
|
+
"status": "online",
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
response = await client.post(
|
|
429
|
+
"/api/agents/heartbeat", json=payload, headers=api_key_headers
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
assert response.status_code == 200
|
|
433
|
+
data = response.json()
|
|
434
|
+
assert len(data["commands"]) == 0
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class TestStaleAgentDetection:
|
|
438
|
+
"""Tests for stale agent detection and offline marking."""
|
|
439
|
+
|
|
440
|
+
@pytest.mark.asyncio
|
|
441
|
+
async def test_mark_stale_agent_offline(self, db_session, test_user):
|
|
442
|
+
"""Should mark agents as offline when heartbeat times out."""
|
|
443
|
+
from backend.services.agent_service import AgentService
|
|
444
|
+
|
|
445
|
+
# Create an agent with old heartbeat
|
|
446
|
+
stale_time = datetime.now(UTC) - timedelta(minutes=5)
|
|
447
|
+
agent = Agent(
|
|
448
|
+
id=str(uuid.uuid4()),
|
|
449
|
+
user_id=test_user.id,
|
|
450
|
+
name="Stale Agent",
|
|
451
|
+
state="online",
|
|
452
|
+
status="online",
|
|
453
|
+
last_heartbeat_at=stale_time,
|
|
454
|
+
)
|
|
455
|
+
db_session.add(agent)
|
|
456
|
+
await db_session.commit()
|
|
457
|
+
|
|
458
|
+
# Run staleness check with 2-minute threshold
|
|
459
|
+
svc = AgentService(db_session)
|
|
460
|
+
count = await svc.mark_stale_agents_offline(threshold_seconds=120)
|
|
461
|
+
|
|
462
|
+
assert count == 1
|
|
463
|
+
|
|
464
|
+
await db_session.refresh(agent)
|
|
465
|
+
assert agent.state == "offline"
|
|
466
|
+
assert agent.status == "offline"
|
|
467
|
+
|
|
468
|
+
@pytest.mark.asyncio
|
|
469
|
+
async def test_keep_recent_agent_online(self, db_session, test_user):
|
|
470
|
+
"""Should not mark agents with recent heartbeat as offline."""
|
|
471
|
+
from backend.services.agent_service import AgentService
|
|
472
|
+
|
|
473
|
+
# Create an agent with recent heartbeat
|
|
474
|
+
recent_time = datetime.now(UTC) - timedelta(seconds=30)
|
|
475
|
+
agent = Agent(
|
|
476
|
+
id=str(uuid.uuid4()),
|
|
477
|
+
user_id=test_user.id,
|
|
478
|
+
name="Active Agent",
|
|
479
|
+
state="online",
|
|
480
|
+
status="online",
|
|
481
|
+
last_heartbeat_at=recent_time,
|
|
482
|
+
)
|
|
483
|
+
db_session.add(agent)
|
|
484
|
+
await db_session.commit()
|
|
485
|
+
|
|
486
|
+
# Run staleness check with 2-minute threshold
|
|
487
|
+
svc = AgentService(db_session)
|
|
488
|
+
count = await svc.mark_stale_agents_offline(threshold_seconds=120)
|
|
489
|
+
|
|
490
|
+
assert count == 0
|
|
491
|
+
|
|
492
|
+
await db_session.refresh(agent)
|
|
493
|
+
assert agent.state == "online"
|
|
494
|
+
|
|
495
|
+
@pytest.mark.asyncio
|
|
496
|
+
async def test_mark_stale_agents_after_days(self, db_session, test_user):
|
|
497
|
+
"""Should mark offline agents as stale after several days."""
|
|
498
|
+
from backend.services.agent_service import AgentService
|
|
499
|
+
|
|
500
|
+
# Create an agent offline for 10 days
|
|
501
|
+
old_time = datetime.now(UTC) - timedelta(days=10)
|
|
502
|
+
agent = Agent(
|
|
503
|
+
id=str(uuid.uuid4()),
|
|
504
|
+
user_id=test_user.id,
|
|
505
|
+
name="Old Offline Agent",
|
|
506
|
+
state="offline",
|
|
507
|
+
status="offline",
|
|
508
|
+
last_online_at=old_time,
|
|
509
|
+
)
|
|
510
|
+
db_session.add(agent)
|
|
511
|
+
await db_session.commit()
|
|
512
|
+
|
|
513
|
+
# Run staleness check with 7-day threshold
|
|
514
|
+
svc = AgentService(db_session)
|
|
515
|
+
count = await svc.mark_stale_agents(days=7)
|
|
516
|
+
|
|
517
|
+
assert count == 1
|
|
518
|
+
|
|
519
|
+
await db_session.refresh(agent)
|
|
520
|
+
assert agent.state == "stale"
|