kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. kairo/backend/api/agents.py +337 -16
  2. kairo/backend/app.py +84 -4
  3. kairo/backend/config.py +4 -2
  4. kairo/backend/models/agent.py +216 -2
  5. kairo/backend/models/api_key.py +4 -1
  6. kairo/backend/models/task.py +31 -0
  7. kairo/backend/models/user_provider_key.py +26 -0
  8. kairo/backend/schemas/agent.py +249 -2
  9. kairo/backend/schemas/api_key.py +3 -0
  10. kairo/backend/services/agent/__init__.py +52 -0
  11. kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
  12. kairo/backend/services/agent/agent_alerts_service.py +201 -0
  13. kairo/backend/services/agent/agent_commands_service.py +142 -0
  14. kairo/backend/services/agent/agent_crud_service.py +150 -0
  15. kairo/backend/services/agent/agent_events_service.py +103 -0
  16. kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
  17. kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
  18. kairo/backend/services/agent/agent_metrics_service.py +259 -0
  19. kairo/backend/services/agent/agent_service.py +315 -0
  20. kairo/backend/services/agent/agent_setup_service.py +180 -0
  21. kairo/backend/services/agent/constants.py +28 -0
  22. kairo/backend/services/agent_service.py +18 -102
  23. kairo/backend/services/api_key_service.py +23 -3
  24. kairo/backend/services/byok_service.py +204 -0
  25. kairo/backend/services/chat_service.py +398 -63
  26. kairo/backend/services/deep_search_service.py +159 -0
  27. kairo/backend/services/email_service.py +418 -19
  28. kairo/backend/services/few_shot_service.py +223 -0
  29. kairo/backend/services/post_processor.py +261 -0
  30. kairo/backend/services/rag_service.py +150 -0
  31. kairo/backend/services/task_service.py +119 -0
  32. kairo/backend/tests/__init__.py +1 -0
  33. kairo/backend/tests/e2e/__init__.py +1 -0
  34. kairo/backend/tests/e2e/agents/__init__.py +1 -0
  35. kairo/backend/tests/e2e/agents/conftest.py +389 -0
  36. kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
  37. kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
  38. kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
  39. kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
  40. kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
  41. kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
  42. kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
  43. kairo/migrations/versions/010_agent_dashboard.py +246 -0
  44. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
  45. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
  46. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
  47. kairo_migrations/env.py +92 -0
  48. kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
  49. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
  50. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,520 @@
1
+ """
2
+ End-to-end tests for Agent Heartbeat functionality.
3
+
4
+ Tests cover:
5
+ - Successful heartbeat updates last_heartbeat_at
6
+ - Heartbeat with metrics records data
7
+ - Heartbeat returns pending commands
8
+ - State transitions (online, busy, idle, error)
9
+ - Stale agent detection (mark offline after timeout)
10
+ """
11
+
12
+ import pytest
13
+ import uuid
14
+ from datetime import datetime, timedelta, UTC
15
+ from httpx import AsyncClient
16
+ from sqlalchemy import select
17
+
18
+ from backend.models.agent import Agent, AgentMetrics1m
19
+
20
+
21
+ class TestHeartbeatBasic:
22
+ """Basic heartbeat functionality tests."""
23
+
24
+ @pytest.mark.asyncio
25
+ async def test_heartbeat_success(
26
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
27
+ ):
28
+ """Should successfully process heartbeat and return acknowledgment."""
29
+ payload = {
30
+ "agent_id": test_agent.id,
31
+ "status": "online",
32
+ "sdk_version": "1.0.0",
33
+ }
34
+
35
+ response = await client.post(
36
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
37
+ )
38
+
39
+ assert response.status_code == 200
40
+ data = response.json()
41
+ assert data["acknowledged"] is True
42
+ assert "server_time" in data
43
+ assert "commands" in data
44
+ assert isinstance(data["commands"], list)
45
+
46
+ @pytest.mark.asyncio
47
+ async def test_heartbeat_updates_last_heartbeat_at(
48
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
49
+ ):
50
+ """Should update last_heartbeat_at timestamp on heartbeat."""
51
+ original_heartbeat = test_agent.last_heartbeat_at
52
+
53
+ payload = {
54
+ "agent_id": test_agent.id,
55
+ "status": "online",
56
+ }
57
+
58
+ await client.post(
59
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
60
+ )
61
+
62
+ await db_session.refresh(test_agent)
63
+
64
+ assert test_agent.last_heartbeat_at is not None
65
+ if original_heartbeat:
66
+ assert test_agent.last_heartbeat_at > original_heartbeat
67
+
68
+ @pytest.mark.asyncio
69
+ async def test_heartbeat_updates_sdk_version(
70
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
71
+ ):
72
+ """Should update SDK version on heartbeat."""
73
+ payload = {
74
+ "agent_id": test_agent.id,
75
+ "status": "online",
76
+ "sdk_version": "2.5.0",
77
+ }
78
+
79
+ await client.post(
80
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
81
+ )
82
+
83
+ await db_session.refresh(test_agent)
84
+
85
+ assert test_agent.sdk_version == "2.5.0"
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_heartbeat_updates_host_info(
89
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
90
+ ):
91
+ """Should update host info on heartbeat."""
92
+ payload = {
93
+ "agent_id": test_agent.id,
94
+ "status": "online",
95
+ "host_info": {
96
+ "hostname": "prod-server-01",
97
+ "ip": "10.0.1.50",
98
+ "os": "debian-11",
99
+ "memory_mb": 32768,
100
+ "memory_used_percent": 45.5,
101
+ },
102
+ }
103
+
104
+ await client.post(
105
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
106
+ )
107
+
108
+ await db_session.refresh(test_agent)
109
+
110
+ assert test_agent.host_info is not None
111
+ assert test_agent.host_info["hostname"] == "prod-server-01"
112
+ assert test_agent.host_info["memory_used_percent"] == 45.5
113
+
114
+ @pytest.mark.asyncio
115
+ async def test_heartbeat_agent_not_found(
116
+ self, client: AsyncClient, api_key_headers: dict
117
+ ):
118
+ """Should return 404 for non-existent agent."""
119
+ fake_id = str(uuid.uuid4())
120
+ payload = {
121
+ "agent_id": fake_id,
122
+ "status": "online",
123
+ }
124
+
125
+ response = await client.post(
126
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
127
+ )
128
+
129
+ assert response.status_code == 404
130
+
131
+
132
+ class TestHeartbeatStateTransitions:
133
+ """Tests for agent state transitions via heartbeat."""
134
+
135
+ @pytest.mark.asyncio
136
+ async def test_transition_to_online(
137
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
138
+ ):
139
+ """Should transition agent to 'online' state."""
140
+ payload = {
141
+ "agent_id": test_agent.id,
142
+ "status": "online",
143
+ }
144
+
145
+ await client.post(
146
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
147
+ )
148
+
149
+ await db_session.refresh(test_agent)
150
+
151
+ assert test_agent.state == "online"
152
+ assert test_agent.last_online_at is not None
153
+
154
+ @pytest.mark.asyncio
155
+ async def test_transition_to_busy(
156
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
157
+ ):
158
+ """Should transition agent to 'busy' state."""
159
+ payload = {
160
+ "agent_id": test_agent.id,
161
+ "status": "busy",
162
+ "active_request": True,
163
+ }
164
+
165
+ await client.post(
166
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
167
+ )
168
+
169
+ await db_session.refresh(test_agent)
170
+
171
+ assert test_agent.state == "busy"
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_transition_to_idle(
175
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
176
+ ):
177
+ """Should transition agent to 'idle' state."""
178
+ payload = {
179
+ "agent_id": test_agent.id,
180
+ "status": "idle",
181
+ }
182
+
183
+ await client.post(
184
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
185
+ )
186
+
187
+ await db_session.refresh(test_agent)
188
+
189
+ assert test_agent.state == "idle"
190
+
191
+ @pytest.mark.asyncio
192
+ async def test_transition_to_error(
193
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
194
+ ):
195
+ """Should transition agent to 'error' state with error details."""
196
+ payload = {
197
+ "agent_id": test_agent.id,
198
+ "status": "error",
199
+ "last_error": {
200
+ "message": "Connection to LLM service failed",
201
+ "code": "LLM_CONNECTION_ERROR",
202
+ },
203
+ }
204
+
205
+ await client.post(
206
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
207
+ )
208
+
209
+ await db_session.refresh(test_agent)
210
+
211
+ assert test_agent.state == "error"
212
+ assert test_agent.last_error_at is not None
213
+ assert test_agent.last_error_message == "Connection to LLM service failed"
214
+
215
+ @pytest.mark.asyncio
216
+ async def test_first_connection_sets_timestamp(
217
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
218
+ ):
219
+ """Should set first_connected_at on first heartbeat."""
220
+ assert test_agent.first_connected_at is None
221
+
222
+ payload = {
223
+ "agent_id": test_agent.id,
224
+ "status": "online",
225
+ }
226
+
227
+ await client.post(
228
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
229
+ )
230
+
231
+ await db_session.refresh(test_agent)
232
+
233
+ assert test_agent.first_connected_at is not None
234
+
235
+
236
+ class TestHeartbeatMetrics:
237
+ """Tests for metrics recording via heartbeat."""
238
+
239
+ @pytest.mark.asyncio
240
+ async def test_heartbeat_records_metrics(
241
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
242
+ ):
243
+ """Should record metrics from heartbeat payload."""
244
+ payload = {
245
+ "agent_id": test_agent.id,
246
+ "status": "online",
247
+ "metrics_since_last_heartbeat": {
248
+ "requests_completed": 10,
249
+ "requests_failed": 2,
250
+ "total_latency_ms": 5000,
251
+ "input_tokens": 10000,
252
+ "output_tokens": 5000,
253
+ "tool_calls": 15,
254
+ "tool_errors": 1,
255
+ },
256
+ }
257
+
258
+ await client.post(
259
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
260
+ )
261
+
262
+ # Check metrics were recorded
263
+ stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
264
+ result = await db_session.execute(stmt)
265
+ metrics = result.scalar_one_or_none()
266
+
267
+ assert metrics is not None
268
+ assert metrics.request_count == 12 # completed + failed
269
+ assert metrics.error_count == 2
270
+ assert metrics.input_tokens == 10000
271
+ assert metrics.output_tokens == 5000
272
+ assert metrics.tool_calls == 15
273
+
274
+ @pytest.mark.asyncio
275
+ async def test_heartbeat_aggregates_metrics_same_bucket(
276
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
277
+ ):
278
+ """Should aggregate metrics when multiple heartbeats occur in same minute."""
279
+ payload = {
280
+ "agent_id": test_agent.id,
281
+ "status": "online",
282
+ "metrics_since_last_heartbeat": {
283
+ "requests_completed": 5,
284
+ "requests_failed": 1,
285
+ "input_tokens": 1000,
286
+ "output_tokens": 500,
287
+ },
288
+ }
289
+
290
+ # Send two heartbeats
291
+ await client.post(
292
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
293
+ )
294
+ await client.post(
295
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
296
+ )
297
+
298
+ # Check aggregated metrics
299
+ stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
300
+ result = await db_session.execute(stmt)
301
+ metrics = result.scalar_one_or_none()
302
+
303
+ assert metrics is not None
304
+ assert metrics.request_count == 12 # (5+1) * 2
305
+ assert metrics.error_count == 2 # 1 * 2
306
+ assert metrics.input_tokens == 2000 # 1000 * 2
307
+ assert metrics.output_tokens == 1000 # 500 * 2
308
+
309
+ @pytest.mark.asyncio
310
+ async def test_heartbeat_without_metrics(
311
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, db_session
312
+ ):
313
+ """Should handle heartbeat without metrics payload."""
314
+ payload = {
315
+ "agent_id": test_agent.id,
316
+ "status": "online",
317
+ }
318
+
319
+ response = await client.post(
320
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
321
+ )
322
+
323
+ assert response.status_code == 200
324
+
325
+ # No metrics should be recorded
326
+ stmt = select(AgentMetrics1m).where(AgentMetrics1m.agent_id == test_agent.id)
327
+ result = await db_session.execute(stmt)
328
+ metrics = result.scalar_one_or_none()
329
+
330
+ assert metrics is None
331
+
332
+
333
+ class TestHeartbeatCommands:
334
+ """Tests for pending command delivery via heartbeat."""
335
+
336
+ @pytest.mark.asyncio
337
+ async def test_heartbeat_returns_pending_commands(
338
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
339
+ ):
340
+ """Should return pending commands in heartbeat response."""
341
+ # Create a pending command
342
+ await agent_factory.create_command(
343
+ test_agent.id, command_type="restart", status="pending"
344
+ )
345
+
346
+ payload = {
347
+ "agent_id": test_agent.id,
348
+ "status": "online",
349
+ }
350
+
351
+ response = await client.post(
352
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
353
+ )
354
+
355
+ assert response.status_code == 200
356
+ data = response.json()
357
+ assert len(data["commands"]) == 1
358
+ assert data["commands"][0]["type"] == "restart"
359
+ assert "command_id" in data["commands"][0]
360
+ assert "signature" in data["commands"][0]
361
+
362
+ @pytest.mark.asyncio
363
+ async def test_heartbeat_marks_commands_dispatched(
364
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory, db_session
365
+ ):
366
+ """Should mark commands as dispatched after delivery."""
367
+ command = await agent_factory.create_command(
368
+ test_agent.id, command_type="stop", status="pending"
369
+ )
370
+
371
+ payload = {
372
+ "agent_id": test_agent.id,
373
+ "status": "online",
374
+ }
375
+
376
+ await client.post(
377
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
378
+ )
379
+
380
+ await db_session.refresh(command)
381
+
382
+ assert command.status == "dispatched"
383
+ assert command.dispatched_at is not None
384
+
385
+ @pytest.mark.asyncio
386
+ async def test_heartbeat_excludes_expired_commands(
387
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
388
+ ):
389
+ """Should not return expired commands."""
390
+ # Create an expired command
391
+ await agent_factory.create_command(
392
+ test_agent.id,
393
+ command_type="restart",
394
+ status="pending",
395
+ expires_at=datetime.now(UTC) - timedelta(hours=1),
396
+ )
397
+
398
+ payload = {
399
+ "agent_id": test_agent.id,
400
+ "status": "online",
401
+ }
402
+
403
+ response = await client.post(
404
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
405
+ )
406
+
407
+ assert response.status_code == 200
408
+ data = response.json()
409
+ assert len(data["commands"]) == 0
410
+
411
+ @pytest.mark.asyncio
412
+ async def test_heartbeat_excludes_already_dispatched(
413
+ self, client: AsyncClient, api_key_headers: dict, test_agent: Agent, agent_factory
414
+ ):
415
+ """Should not return already dispatched commands."""
416
+ # Create a dispatched command
417
+ await agent_factory.create_command(
418
+ test_agent.id,
419
+ command_type="restart",
420
+ status="dispatched",
421
+ )
422
+
423
+ payload = {
424
+ "agent_id": test_agent.id,
425
+ "status": "online",
426
+ }
427
+
428
+ response = await client.post(
429
+ "/api/agents/heartbeat", json=payload, headers=api_key_headers
430
+ )
431
+
432
+ assert response.status_code == 200
433
+ data = response.json()
434
+ assert len(data["commands"]) == 0
435
+
436
+
437
+ class TestStaleAgentDetection:
438
+ """Tests for stale agent detection and offline marking."""
439
+
440
+ @pytest.mark.asyncio
441
+ async def test_mark_stale_agent_offline(self, db_session, test_user):
442
+ """Should mark agents as offline when heartbeat times out."""
443
+ from backend.services.agent_service import AgentService
444
+
445
+ # Create an agent with old heartbeat
446
+ stale_time = datetime.now(UTC) - timedelta(minutes=5)
447
+ agent = Agent(
448
+ id=str(uuid.uuid4()),
449
+ user_id=test_user.id,
450
+ name="Stale Agent",
451
+ state="online",
452
+ status="online",
453
+ last_heartbeat_at=stale_time,
454
+ )
455
+ db_session.add(agent)
456
+ await db_session.commit()
457
+
458
+ # Run staleness check with 2-minute threshold
459
+ svc = AgentService(db_session)
460
+ count = await svc.mark_stale_agents_offline(threshold_seconds=120)
461
+
462
+ assert count == 1
463
+
464
+ await db_session.refresh(agent)
465
+ assert agent.state == "offline"
466
+ assert agent.status == "offline"
467
+
468
+ @pytest.mark.asyncio
469
+ async def test_keep_recent_agent_online(self, db_session, test_user):
470
+ """Should not mark agents with recent heartbeat as offline."""
471
+ from backend.services.agent_service import AgentService
472
+
473
+ # Create an agent with recent heartbeat
474
+ recent_time = datetime.now(UTC) - timedelta(seconds=30)
475
+ agent = Agent(
476
+ id=str(uuid.uuid4()),
477
+ user_id=test_user.id,
478
+ name="Active Agent",
479
+ state="online",
480
+ status="online",
481
+ last_heartbeat_at=recent_time,
482
+ )
483
+ db_session.add(agent)
484
+ await db_session.commit()
485
+
486
+ # Run staleness check with 2-minute threshold
487
+ svc = AgentService(db_session)
488
+ count = await svc.mark_stale_agents_offline(threshold_seconds=120)
489
+
490
+ assert count == 0
491
+
492
+ await db_session.refresh(agent)
493
+ assert agent.state == "online"
494
+
495
+ @pytest.mark.asyncio
496
+ async def test_mark_stale_agents_after_days(self, db_session, test_user):
497
+ """Should mark offline agents as stale after several days."""
498
+ from backend.services.agent_service import AgentService
499
+
500
+ # Create an agent offline for 10 days
501
+ old_time = datetime.now(UTC) - timedelta(days=10)
502
+ agent = Agent(
503
+ id=str(uuid.uuid4()),
504
+ user_id=test_user.id,
505
+ name="Old Offline Agent",
506
+ state="offline",
507
+ status="offline",
508
+ last_online_at=old_time,
509
+ )
510
+ db_session.add(agent)
511
+ await db_session.commit()
512
+
513
+ # Run staleness check with 7-day threshold
514
+ svc = AgentService(db_session)
515
+ count = await svc.mark_stale_agents(days=7)
516
+
517
+ assert count == 1
518
+
519
+ await db_session.refresh(agent)
520
+ assert agent.state == "stale"