kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kubiya-control-plane-api might be problematic. Click here for more details.
- control_plane_api/README.md +266 -0
- control_plane_api/__init__.py +0 -0
- control_plane_api/__version__.py +1 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +98 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
- control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
- control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
- control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
- control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
- control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
- control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
- control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
- control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
- control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
- control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +379 -0
- control_plane_api/app/activities/team_activities.py +410 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +354 -0
- control_plane_api/app/config/model_pricing.py +318 -0
- control_plane_api/app/config.py +95 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/job_executor.py +312 -0
- control_plane_api/app/lib/kubiya_client.py +235 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/planning_tools/__init__.py +22 -0
- control_plane_api/app/lib/planning_tools/agents.py +155 -0
- control_plane_api/app/lib/planning_tools/base.py +189 -0
- control_plane_api/app/lib/planning_tools/environments.py +214 -0
- control_plane_api/app/lib/planning_tools/resources.py +240 -0
- control_plane_api/app/lib/planning_tools/teams.py +198 -0
- control_plane_api/app/lib/policy_enforcer_client.py +939 -0
- control_plane_api/app/lib/redis_client.py +436 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/temporal_client.py +138 -0
- control_plane_api/app/lib/validation/__init__.py +20 -0
- control_plane_api/app/lib/validation/runtime_validation.py +287 -0
- control_plane_api/app/main.py +128 -0
- control_plane_api/app/middleware/__init__.py +8 -0
- control_plane_api/app/middleware/auth.py +513 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +27 -0
- control_plane_api/app/models/agent.py +79 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +81 -0
- control_plane_api/app/models/environment.py +63 -0
- control_plane_api/app/models/execution.py +93 -0
- control_plane_api/app/models/job.py +179 -0
- control_plane_api/app/models/llm_model.py +75 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +47 -0
- control_plane_api/app/models/session.py +38 -0
- control_plane_api/app/models/team.py +66 -0
- control_plane_api/app/models/workflow.py +55 -0
- control_plane_api/app/policies/README.md +121 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +364 -0
- control_plane_api/app/routers/agents_v2.py +1260 -0
- control_plane_api/app/routers/analytics.py +1014 -0
- control_plane_api/app/routers/context_manager.py +562 -0
- control_plane_api/app/routers/environment_context.py +270 -0
- control_plane_api/app/routers/environments.py +715 -0
- control_plane_api/app/routers/execution_environment.py +517 -0
- control_plane_api/app/routers/executions.py +1911 -0
- control_plane_api/app/routers/health.py +92 -0
- control_plane_api/app/routers/health_v2.py +326 -0
- control_plane_api/app/routers/integrations.py +274 -0
- control_plane_api/app/routers/jobs.py +1344 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +361 -0
- control_plane_api/app/routers/policies.py +639 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +902 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +155 -0
- control_plane_api/app/routers/skills.py +1001 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/task_planning.py +1256 -0
- control_plane_api/app/routers/task_queues.py +654 -0
- control_plane_api/app/routers/team_context.py +270 -0
- control_plane_api/app/routers/teams.py +1400 -0
- control_plane_api/app/routers/worker_queues.py +1545 -0
- control_plane_api/app/routers/workers.py +935 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/job_schemas.py +295 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_service.py +619 -0
- control_plane_api/app/services/litellm_service.py +190 -0
- control_plane_api/app/services/policy_service.py +525 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/skills/__init__.py +44 -0
- control_plane_api/app/skills/base.py +229 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/data_visualization.py +154 -0
- control_plane_api/app/skills/docker.py +104 -0
- control_plane_api/app/skills/file_generation.py +94 -0
- control_plane_api/app/skills/file_system.py +110 -0
- control_plane_api/app/skills/python.py +92 -0
- control_plane_api/app/skills/registry.py +65 -0
- control_plane_api/app/skills/shell.py +102 -0
- control_plane_api/app/skills/workflow_executor.py +469 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +507 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/team_execution.py +399 -0
- control_plane_api/scripts/seed_models.py +239 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1241 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/runtime_activities.py +388 -0
- control_plane_api/worker/activities/skill_activities.py +267 -0
- control_plane_api/worker/activities/team_activities.py +1217 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +275 -0
- control_plane_api/worker/control_plane_client.py +529 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +31 -0
- control_plane_api/worker/runtimes/base.py +789 -0
- control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
- control_plane_api/worker/runtimes/default_runtime.py +617 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_executor.py +422 -0
- control_plane_api/worker/services/agent_executor_v2.py +383 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/data_visualization.py +827 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +194 -0
- control_plane_api/worker/services/skill_factory.py +175 -0
- control_plane_api/worker/services/team_executor.py +574 -0
- control_plane_api/worker/services/team_executor_v2.py +465 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +305 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +373 -0
- control_plane_api/worker/worker.py +753 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +589 -0
- control_plane_api/worker/workflows/team_execution.py +429 -0
- kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
- kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
- kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
- kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
- kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
- kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
- kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
- {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
- {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Health check endpoints"""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Request, HTTPException, status
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
logger = structlog.get_logger()
|
|
8
|
+
|
|
9
|
+
router = APIRouter()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/health")
|
|
13
|
+
async def health_check(request: Request):
|
|
14
|
+
"""
|
|
15
|
+
Health check endpoint (no authentication required).
|
|
16
|
+
|
|
17
|
+
Returns basic health status and service information.
|
|
18
|
+
"""
|
|
19
|
+
return {
|
|
20
|
+
"status": "healthy",
|
|
21
|
+
"service": "agent-control_plane_api",
|
|
22
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@router.get("/ready")
|
|
27
|
+
async def readiness_check():
|
|
28
|
+
"""Readiness check endpoint (no authentication required)"""
|
|
29
|
+
return {"status": "ready", "timestamp": datetime.utcnow().isoformat()}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@router.get("/health/detailed")
|
|
33
|
+
async def detailed_health_check(request: Request):
|
|
34
|
+
"""
|
|
35
|
+
Detailed health check with dependency status.
|
|
36
|
+
|
|
37
|
+
Checks connectivity to database, Redis, and Temporal.
|
|
38
|
+
No authentication required for health checks.
|
|
39
|
+
"""
|
|
40
|
+
checks = {
|
|
41
|
+
"api": "healthy",
|
|
42
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Try Supabase (new way)
|
|
46
|
+
try:
|
|
47
|
+
from control_plane_api.app.lib.supabase import get_supabase
|
|
48
|
+
client = get_supabase()
|
|
49
|
+
result = client.table("organizations").select("id").limit(1).execute()
|
|
50
|
+
checks["database"] = "healthy"
|
|
51
|
+
except Exception as e1:
|
|
52
|
+
# Fallback to SQLAlchemy (old way)
|
|
53
|
+
try:
|
|
54
|
+
from control_plane_api.app.database import get_db
|
|
55
|
+
from sqlalchemy import text
|
|
56
|
+
db = next(get_db())
|
|
57
|
+
db.execute(text("SELECT 1"))
|
|
58
|
+
checks["database"] = "healthy (legacy)"
|
|
59
|
+
except Exception as e2:
|
|
60
|
+
logger.error("database_health_check_failed", supabase_error=str(e1), sqlalchemy_error=str(e2))
|
|
61
|
+
checks["database"] = f"unhealthy"
|
|
62
|
+
|
|
63
|
+
# Check Redis
|
|
64
|
+
try:
|
|
65
|
+
import redis
|
|
66
|
+
from control_plane_api.app.config import settings
|
|
67
|
+
r = redis.from_url(settings.redis_url)
|
|
68
|
+
r.ping()
|
|
69
|
+
checks["redis"] = "healthy"
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error("redis_health_check_failed", error=str(e))
|
|
72
|
+
checks["redis"] = f"unhealthy: {str(e)}"
|
|
73
|
+
|
|
74
|
+
# Check Temporal (just configuration check, not actual connection)
|
|
75
|
+
try:
|
|
76
|
+
from control_plane_api.app.config import settings
|
|
77
|
+
if settings.temporal_host and settings.temporal_namespace:
|
|
78
|
+
checks["temporal"] = "configured"
|
|
79
|
+
else:
|
|
80
|
+
checks["temporal"] = "not configured"
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error("temporal_health_check_failed", error=str(e))
|
|
83
|
+
checks["temporal"] = f"error: {str(e)}"
|
|
84
|
+
|
|
85
|
+
# Determine overall status
|
|
86
|
+
checks["status"] = "healthy" if all(
|
|
87
|
+
v in ["healthy", "healthy (legacy)", "configured"]
|
|
88
|
+
for k, v in checks.items()
|
|
89
|
+
if k not in ["timestamp", "status"]
|
|
90
|
+
) else "degraded"
|
|
91
|
+
|
|
92
|
+
return checks
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced health check endpoints for production monitoring.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- Basic health check (/health)
|
|
6
|
+
- Readiness check with dependency validation (/health/ready)
|
|
7
|
+
- Liveness check (/health/live)
|
|
8
|
+
- Detailed health status (/health/detailed)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, Depends, HTTPException, status
|
|
12
|
+
from fastapi.responses import JSONResponse
|
|
13
|
+
from typing import Dict, Any, Optional
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
|
+
from sqlalchemy import text
|
|
17
|
+
import structlog
|
|
18
|
+
import httpx
|
|
19
|
+
import asyncio
|
|
20
|
+
import time
|
|
21
|
+
import os
|
|
22
|
+
import psutil
|
|
23
|
+
|
|
24
|
+
from control_plane_api.app.database import get_session
|
|
25
|
+
from control_plane_api.app.lib.redis_client import get_redis_client
|
|
26
|
+
from control_plane_api.app.lib.temporal_client import get_temporal_client
|
|
27
|
+
from control_plane_api.app.config import settings
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger()
|
|
30
|
+
|
|
31
|
+
router = APIRouter()
|
|
32
|
+
|
|
33
|
+
# Track application start time
|
|
34
|
+
APP_START_TIME = time.time()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@router.get("/health", tags=["Health"])
|
|
38
|
+
async def health_check() -> Dict[str, str]:
|
|
39
|
+
"""
|
|
40
|
+
Basic health check endpoint.
|
|
41
|
+
|
|
42
|
+
Returns 200 if the service is running.
|
|
43
|
+
Used by load balancers for basic availability checks.
|
|
44
|
+
"""
|
|
45
|
+
return {
|
|
46
|
+
"status": "healthy",
|
|
47
|
+
"service": "agent-control-plane",
|
|
48
|
+
"version": settings.api_version,
|
|
49
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.get("/health/live", tags=["Health"])
|
|
54
|
+
async def liveness_check() -> Dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Liveness probe for Kubernetes.
|
|
57
|
+
|
|
58
|
+
Checks if the application is running and not deadlocked.
|
|
59
|
+
Returns 200 if alive, 503 if the application needs to be restarted.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
# Simple check - can we allocate memory and respond?
|
|
63
|
+
test_data = list(range(1000))
|
|
64
|
+
|
|
65
|
+
uptime = time.time() - APP_START_TIME
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"status": "alive",
|
|
69
|
+
"uptime_seconds": round(uptime, 2),
|
|
70
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
71
|
+
}
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error("liveness_check_failed", error=str(e))
|
|
74
|
+
raise HTTPException(
|
|
75
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
76
|
+
detail="Liveness check failed",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@router.get("/health/ready", tags=["Health"])
|
|
81
|
+
async def readiness_check(
|
|
82
|
+
db_session: Optional[AsyncSession] = Depends(get_session),
|
|
83
|
+
) -> Dict[str, Any]:
|
|
84
|
+
"""
|
|
85
|
+
Readiness probe for Kubernetes and monitoring.
|
|
86
|
+
|
|
87
|
+
Checks if the application is ready to serve traffic by validating:
|
|
88
|
+
- Database connectivity
|
|
89
|
+
- Redis connectivity (if configured)
|
|
90
|
+
- Temporal connectivity (if configured)
|
|
91
|
+
|
|
92
|
+
Returns 200 if ready, 503 if not ready to serve traffic.
|
|
93
|
+
"""
|
|
94
|
+
checks = {
|
|
95
|
+
"database": False,
|
|
96
|
+
"redis": False,
|
|
97
|
+
"temporal": False,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
errors = []
|
|
101
|
+
|
|
102
|
+
# Check database
|
|
103
|
+
if db_session:
|
|
104
|
+
try:
|
|
105
|
+
result = await db_session.execute(text("SELECT 1"))
|
|
106
|
+
checks["database"] = result.scalar() == 1
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning("database_health_check_failed", error=str(e))
|
|
109
|
+
errors.append(f"Database: {str(e)}")
|
|
110
|
+
else:
|
|
111
|
+
errors.append("Database: No session available")
|
|
112
|
+
|
|
113
|
+
# Check Redis (if configured)
|
|
114
|
+
try:
|
|
115
|
+
redis_client = get_redis_client()
|
|
116
|
+
if redis_client:
|
|
117
|
+
await redis_client.ping()
|
|
118
|
+
checks["redis"] = True
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.warning("redis_health_check_failed", error=str(e))
|
|
121
|
+
errors.append(f"Redis: {str(e)}")
|
|
122
|
+
|
|
123
|
+
# Check Temporal (if configured)
|
|
124
|
+
try:
|
|
125
|
+
temporal_client = await get_temporal_client()
|
|
126
|
+
if temporal_client:
|
|
127
|
+
# Try to describe the namespace
|
|
128
|
+
await temporal_client.service_client.describe_namespace(
|
|
129
|
+
settings.temporal_namespace
|
|
130
|
+
)
|
|
131
|
+
checks["temporal"] = True
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.warning("temporal_health_check_failed", error=str(e))
|
|
134
|
+
errors.append(f"Temporal: {str(e)}")
|
|
135
|
+
|
|
136
|
+
# Determine overall readiness
|
|
137
|
+
# Database is required, Redis and Temporal are optional
|
|
138
|
+
is_ready = checks["database"]
|
|
139
|
+
|
|
140
|
+
response = {
|
|
141
|
+
"status": "ready" if is_ready else "not_ready",
|
|
142
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
143
|
+
"checks": checks,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if errors:
|
|
147
|
+
response["errors"] = errors
|
|
148
|
+
|
|
149
|
+
if not is_ready:
|
|
150
|
+
return JSONResponse(
|
|
151
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
152
|
+
content=response,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return response
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@router.get("/health/detailed", tags=["Health"])
|
|
159
|
+
async def detailed_health_check(
|
|
160
|
+
db_session: Optional[AsyncSession] = Depends(get_session),
|
|
161
|
+
) -> Dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Detailed health check with comprehensive system information.
|
|
164
|
+
|
|
165
|
+
Provides:
|
|
166
|
+
- Service health status
|
|
167
|
+
- Dependency health checks
|
|
168
|
+
- System metrics (CPU, memory, disk)
|
|
169
|
+
- Configuration information
|
|
170
|
+
|
|
171
|
+
Used for debugging and monitoring dashboards.
|
|
172
|
+
"""
|
|
173
|
+
uptime = time.time() - APP_START_TIME
|
|
174
|
+
|
|
175
|
+
# System metrics
|
|
176
|
+
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
177
|
+
memory = psutil.virtual_memory()
|
|
178
|
+
disk = psutil.disk_usage('/')
|
|
179
|
+
|
|
180
|
+
# Dependency checks
|
|
181
|
+
dependencies = {}
|
|
182
|
+
|
|
183
|
+
# Database check with latency
|
|
184
|
+
db_latency = None
|
|
185
|
+
if db_session:
|
|
186
|
+
try:
|
|
187
|
+
start = time.time()
|
|
188
|
+
result = await db_session.execute(text("SELECT 1"))
|
|
189
|
+
db_latency = (time.time() - start) * 1000 # Convert to ms
|
|
190
|
+
dependencies["database"] = {
|
|
191
|
+
"healthy": result.scalar() == 1,
|
|
192
|
+
"latency_ms": round(db_latency, 2),
|
|
193
|
+
}
|
|
194
|
+
except Exception as e:
|
|
195
|
+
dependencies["database"] = {
|
|
196
|
+
"healthy": False,
|
|
197
|
+
"error": str(e),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Redis check with latency
|
|
201
|
+
try:
|
|
202
|
+
redis_client = get_redis_client()
|
|
203
|
+
if redis_client:
|
|
204
|
+
start = time.time()
|
|
205
|
+
await redis_client.ping()
|
|
206
|
+
redis_latency = (time.time() - start) * 1000
|
|
207
|
+
dependencies["redis"] = {
|
|
208
|
+
"healthy": True,
|
|
209
|
+
"latency_ms": round(redis_latency, 2),
|
|
210
|
+
}
|
|
211
|
+
except Exception as e:
|
|
212
|
+
dependencies["redis"] = {
|
|
213
|
+
"healthy": False,
|
|
214
|
+
"error": str(e),
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Temporal check
|
|
218
|
+
try:
|
|
219
|
+
temporal_client = await get_temporal_client()
|
|
220
|
+
if temporal_client:
|
|
221
|
+
start = time.time()
|
|
222
|
+
await temporal_client.service_client.describe_namespace(
|
|
223
|
+
settings.temporal_namespace
|
|
224
|
+
)
|
|
225
|
+
temporal_latency = (time.time() - start) * 1000
|
|
226
|
+
dependencies["temporal"] = {
|
|
227
|
+
"healthy": True,
|
|
228
|
+
"latency_ms": round(temporal_latency, 2),
|
|
229
|
+
"namespace": settings.temporal_namespace,
|
|
230
|
+
}
|
|
231
|
+
except Exception as e:
|
|
232
|
+
dependencies["temporal"] = {
|
|
233
|
+
"healthy": False,
|
|
234
|
+
"error": str(e),
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# External services check (if configured)
|
|
238
|
+
external_services = {}
|
|
239
|
+
|
|
240
|
+
# Check Kubiya API
|
|
241
|
+
if settings.kubiya_api_base:
|
|
242
|
+
try:
|
|
243
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
244
|
+
start = time.time()
|
|
245
|
+
response = await client.get(f"{settings.kubiya_api_base}/health")
|
|
246
|
+
kubiya_latency = (time.time() - start) * 1000
|
|
247
|
+
external_services["kubiya_api"] = {
|
|
248
|
+
"healthy": response.status_code == 200,
|
|
249
|
+
"latency_ms": round(kubiya_latency, 2),
|
|
250
|
+
"status_code": response.status_code,
|
|
251
|
+
}
|
|
252
|
+
except Exception as e:
|
|
253
|
+
external_services["kubiya_api"] = {
|
|
254
|
+
"healthy": False,
|
|
255
|
+
"error": str(e),
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
# Check LiteLLM Proxy
|
|
259
|
+
if settings.litellm_api_base:
|
|
260
|
+
try:
|
|
261
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
262
|
+
start = time.time()
|
|
263
|
+
response = await client.get(f"{settings.litellm_api_base}/health")
|
|
264
|
+
litellm_latency = (time.time() - start) * 1000
|
|
265
|
+
external_services["litellm_proxy"] = {
|
|
266
|
+
"healthy": response.status_code == 200,
|
|
267
|
+
"latency_ms": round(litellm_latency, 2),
|
|
268
|
+
"status_code": response.status_code,
|
|
269
|
+
}
|
|
270
|
+
except Exception as e:
|
|
271
|
+
external_services["litellm_proxy"] = {
|
|
272
|
+
"healthy": False,
|
|
273
|
+
"error": str(e),
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
# Determine overall health
|
|
277
|
+
all_healthy = all(
|
|
278
|
+
dep.get("healthy", False) for dep in dependencies.values()
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
"status": "healthy" if all_healthy else "degraded",
|
|
283
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
284
|
+
"version": settings.api_version,
|
|
285
|
+
"environment": settings.environment,
|
|
286
|
+
"uptime": {
|
|
287
|
+
"seconds": round(uptime, 2),
|
|
288
|
+
"human_readable": _format_uptime(uptime),
|
|
289
|
+
},
|
|
290
|
+
"system": {
|
|
291
|
+
"cpu": {
|
|
292
|
+
"percent": cpu_percent,
|
|
293
|
+
"cores": psutil.cpu_count(),
|
|
294
|
+
},
|
|
295
|
+
"memory": {
|
|
296
|
+
"percent": memory.percent,
|
|
297
|
+
"used_gb": round(memory.used / (1024**3), 2),
|
|
298
|
+
"total_gb": round(memory.total / (1024**3), 2),
|
|
299
|
+
},
|
|
300
|
+
"disk": {
|
|
301
|
+
"percent": disk.percent,
|
|
302
|
+
"used_gb": round(disk.used / (1024**3), 2),
|
|
303
|
+
"total_gb": round(disk.total / (1024**3), 2),
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
"dependencies": dependencies,
|
|
307
|
+
"external_services": external_services if external_services else None,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _format_uptime(seconds: float) -> str:
|
|
312
|
+
"""Format uptime in human-readable format."""
|
|
313
|
+
days, remainder = divmod(int(seconds), 86400)
|
|
314
|
+
hours, remainder = divmod(remainder, 3600)
|
|
315
|
+
minutes, seconds = divmod(remainder, 60)
|
|
316
|
+
|
|
317
|
+
parts = []
|
|
318
|
+
if days > 0:
|
|
319
|
+
parts.append(f"{days}d")
|
|
320
|
+
if hours > 0:
|
|
321
|
+
parts.append(f"{hours}h")
|
|
322
|
+
if minutes > 0:
|
|
323
|
+
parts.append(f"{minutes}m")
|
|
324
|
+
parts.append(f"{seconds}s")
|
|
325
|
+
|
|
326
|
+
return " ".join(parts)
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integrations Router - Proxy to Kubiya Integrations API
|
|
3
|
+
|
|
4
|
+
This router provides access to organization integrations from Kubiya API.
|
|
5
|
+
Integrations provide delegated credentials to third-party services (GitHub, Jira, AWS, etc.)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
10
|
+
from typing import Optional, List, Dict, Any
|
|
11
|
+
import structlog
|
|
12
|
+
|
|
13
|
+
from control_plane_api.app.middleware.auth import get_current_organization
|
|
14
|
+
from control_plane_api.app.lib.kubiya_client import get_kubiya_client, KUBIYA_API_BASE
|
|
15
|
+
|
|
16
|
+
logger = structlog.get_logger()
|
|
17
|
+
|
|
18
|
+
router = APIRouter(prefix="/integrations", tags=["integrations"])
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.get("")
|
|
22
|
+
async def list_integrations(
|
|
23
|
+
request: Request,
|
|
24
|
+
organization: dict = Depends(get_current_organization),
|
|
25
|
+
connected_only: bool = False,
|
|
26
|
+
) -> List[Dict[str, Any]]:
|
|
27
|
+
"""
|
|
28
|
+
List all integrations available in the organization.
|
|
29
|
+
|
|
30
|
+
This endpoint proxies to Kubiya Integrations API and returns a list of
|
|
31
|
+
integrations with their metadata and connection status.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
connected_only: If True, only return connected/active integrations (default: False)
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
List of integrations with metadata
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
token = request.state.kubiya_token
|
|
41
|
+
auth_type = getattr(request.state, "kubiya_auth_type", "Bearer")
|
|
42
|
+
org_id = organization["id"]
|
|
43
|
+
|
|
44
|
+
logger.debug(
|
|
45
|
+
"integrations_list_auth",
|
|
46
|
+
auth_type=auth_type,
|
|
47
|
+
token_prefix=token[:20] if token else None,
|
|
48
|
+
org_id=org_id
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Prepare headers for Kubiya API
|
|
52
|
+
headers = {
|
|
53
|
+
"Authorization": f"{auth_type} {token}",
|
|
54
|
+
"Accept": "application/json",
|
|
55
|
+
"Content-Type": "application/json",
|
|
56
|
+
"X-Kubiya-Client": "agent-control-plane",
|
|
57
|
+
"X-Organization-ID": org_id,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Call Kubiya Integrations API with full details
|
|
61
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
62
|
+
response = await client.get(
|
|
63
|
+
f"{KUBIYA_API_BASE}/api/v2/integrations?full=true",
|
|
64
|
+
headers=headers,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if response.status_code == 200:
|
|
68
|
+
integrations = response.json()
|
|
69
|
+
|
|
70
|
+
# Filter to only connected integrations if requested
|
|
71
|
+
if connected_only:
|
|
72
|
+
integrations = [
|
|
73
|
+
i for i in integrations
|
|
74
|
+
if i.get("connected") or i.get("status") == "active"
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
logger.info(
|
|
78
|
+
"integrations_fetched",
|
|
79
|
+
org_id=org_id,
|
|
80
|
+
total_count=len(response.json()),
|
|
81
|
+
connected_count=len(integrations),
|
|
82
|
+
)
|
|
83
|
+
return integrations
|
|
84
|
+
else:
|
|
85
|
+
logger.error(
|
|
86
|
+
"kubiya_api_error",
|
|
87
|
+
status=response.status_code,
|
|
88
|
+
response=response.text[:500],
|
|
89
|
+
)
|
|
90
|
+
raise HTTPException(
|
|
91
|
+
status_code=response.status_code,
|
|
92
|
+
detail=f"Failed to fetch integrations from Kubiya API: {response.text[:200]}",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
except httpx.TimeoutException:
|
|
96
|
+
logger.error("kubiya_api_timeout", endpoint="integrations")
|
|
97
|
+
raise HTTPException(status_code=504, detail="Kubiya API request timed out")
|
|
98
|
+
except httpx.RequestError as e:
|
|
99
|
+
logger.error("kubiya_api_request_error", error=str(e))
|
|
100
|
+
raise HTTPException(status_code=502, detail=f"Failed to connect to Kubiya API: {str(e)}")
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error("unexpected_error", error=str(e), error_type=type(e).__name__)
|
|
103
|
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@router.get("/{integration_id}")
|
|
107
|
+
async def get_integration(
|
|
108
|
+
integration_id: str,
|
|
109
|
+
request: Request,
|
|
110
|
+
organization: dict = Depends(get_current_organization),
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
"""
|
|
113
|
+
Get details of a specific integration.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
integration_id: Integration UUID
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Integration details
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
token = request.state.kubiya_token
|
|
123
|
+
auth_type = getattr(request.state, "kubiya_auth_type", "Bearer")
|
|
124
|
+
org_id = organization["id"]
|
|
125
|
+
|
|
126
|
+
# Prepare headers for Kubiya API
|
|
127
|
+
headers = {
|
|
128
|
+
"Authorization": f"{auth_type} {token}",
|
|
129
|
+
"Accept": "application/json",
|
|
130
|
+
"Content-Type": "application/json",
|
|
131
|
+
"X-Kubiya-Client": "agent-control-plane",
|
|
132
|
+
"X-Organization-ID": org_id,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
# Call Kubiya Integrations API
|
|
136
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
137
|
+
response = await client.get(
|
|
138
|
+
f"{KUBIYA_API_BASE}/api/v2/integrations/{integration_id}",
|
|
139
|
+
headers=headers,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if response.status_code == 200:
|
|
143
|
+
integration = response.json()
|
|
144
|
+
logger.info(
|
|
145
|
+
"integration_fetched",
|
|
146
|
+
org_id=org_id,
|
|
147
|
+
integration_id=integration_id[:8],
|
|
148
|
+
)
|
|
149
|
+
return integration
|
|
150
|
+
else:
|
|
151
|
+
logger.error(
|
|
152
|
+
"kubiya_api_error",
|
|
153
|
+
status=response.status_code,
|
|
154
|
+
integration_id=integration_id[:8],
|
|
155
|
+
response=response.text[:500],
|
|
156
|
+
)
|
|
157
|
+
raise HTTPException(
|
|
158
|
+
status_code=response.status_code,
|
|
159
|
+
detail=f"Failed to fetch integration from Kubiya API: {response.text[:200]}",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
except httpx.TimeoutException:
|
|
163
|
+
logger.error("kubiya_api_timeout", endpoint=f"integrations/{integration_id[:8]}")
|
|
164
|
+
raise HTTPException(status_code=504, detail="Kubiya API request timed out")
|
|
165
|
+
except httpx.RequestError as e:
|
|
166
|
+
logger.error("kubiya_api_request_error", error=str(e))
|
|
167
|
+
raise HTTPException(status_code=502, detail=f"Failed to connect to Kubiya API: {str(e)}")
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.error("unexpected_error", error=str(e), error_type=type(e).__name__)
|
|
170
|
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@router.get("/{integration_type}/{integration_id}/token")
|
|
174
|
+
async def get_integration_token(
|
|
175
|
+
integration_type: str,
|
|
176
|
+
integration_id: str,
|
|
177
|
+
request: Request,
|
|
178
|
+
organization: dict = Depends(get_current_organization),
|
|
179
|
+
) -> Dict[str, Any]:
|
|
180
|
+
"""
|
|
181
|
+
Get delegated credentials/token for a specific integration.
|
|
182
|
+
|
|
183
|
+
This endpoint is used by workers at runtime to get integration credentials.
|
|
184
|
+
Should be called securely from backend only, not exposed to frontend.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
integration_type: Type of integration (github, github_app, jira, etc.)
|
|
188
|
+
integration_id: Integration UUID or installation ID
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Integration credentials/token
|
|
192
|
+
|
|
193
|
+
Examples:
|
|
194
|
+
- /api/v1/integrations/github/uuid-here/token
|
|
195
|
+
- /api/v1/integrations/github_app/installation-id/token
|
|
196
|
+
- /api/v1/integrations/jira/uuid-here/token
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
token = request.state.kubiya_token
|
|
200
|
+
auth_type = getattr(request.state, "kubiya_auth_type", "Bearer")
|
|
201
|
+
org_id = organization["id"]
|
|
202
|
+
|
|
203
|
+
# Prepare headers for Kubiya API
|
|
204
|
+
headers = {
|
|
205
|
+
"Authorization": f"{auth_type} {token}",
|
|
206
|
+
"Accept": "application/json",
|
|
207
|
+
"Content-Type": "application/json",
|
|
208
|
+
"X-Kubiya-Client": "agent-control-plane",
|
|
209
|
+
"X-Organization-ID": org_id,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
# Build token URL based on integration type
|
|
213
|
+
integration_type_lower = integration_type.lower()
|
|
214
|
+
|
|
215
|
+
if integration_type_lower == "github":
|
|
216
|
+
token_url = f"{KUBIYA_API_BASE}/api/v1/integration/github/token/{integration_id}"
|
|
217
|
+
elif integration_type_lower == "github_app":
|
|
218
|
+
token_url = f"{KUBIYA_API_BASE}/api/v1/integration/github_app/token/{integration_id}"
|
|
219
|
+
elif integration_type_lower == "jira":
|
|
220
|
+
token_url = f"{KUBIYA_API_BASE}/api/v1/integration/jira/token/{integration_id}"
|
|
221
|
+
else:
|
|
222
|
+
raise HTTPException(
|
|
223
|
+
status_code=400,
|
|
224
|
+
detail=f"Unsupported integration type: {integration_type}. Supported types: github, github_app, jira",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Get token
|
|
228
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
229
|
+
token_response = await client.get(token_url, headers=headers)
|
|
230
|
+
|
|
231
|
+
if token_response.status_code == 200:
|
|
232
|
+
# Try to parse as JSON first
|
|
233
|
+
try:
|
|
234
|
+
token_data = token_response.json()
|
|
235
|
+
logger.info(
|
|
236
|
+
"integration_token_fetched",
|
|
237
|
+
org_id=org_id,
|
|
238
|
+
integration_id=integration_id[:8] if len(integration_id) > 8 else integration_id,
|
|
239
|
+
integration_type=integration_type,
|
|
240
|
+
)
|
|
241
|
+
return token_data
|
|
242
|
+
except:
|
|
243
|
+
# If not JSON, return as plain text value
|
|
244
|
+
token_value = token_response.text
|
|
245
|
+
logger.info(
|
|
246
|
+
"integration_token_fetched",
|
|
247
|
+
org_id=org_id,
|
|
248
|
+
integration_id=integration_id[:8] if len(integration_id) > 8 else integration_id,
|
|
249
|
+
integration_type=integration_type,
|
|
250
|
+
)
|
|
251
|
+
return {"token": token_value}
|
|
252
|
+
else:
|
|
253
|
+
logger.error(
|
|
254
|
+
"kubiya_api_error",
|
|
255
|
+
status=token_response.status_code,
|
|
256
|
+
integration_id=integration_id[:8] if len(integration_id) > 8 else integration_id,
|
|
257
|
+
response=token_response.text[:500],
|
|
258
|
+
)
|
|
259
|
+
raise HTTPException(
|
|
260
|
+
status_code=token_response.status_code,
|
|
261
|
+
detail=f"Failed to fetch integration token: {token_response.text[:200]}",
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
except httpx.TimeoutException:
|
|
265
|
+
logger.error("kubiya_api_timeout", endpoint=f"integrations/{integration_id[:8]}/token")
|
|
266
|
+
raise HTTPException(status_code=504, detail="Kubiya API request timed out")
|
|
267
|
+
except httpx.RequestError as e:
|
|
268
|
+
logger.error("kubiya_api_request_error", error=str(e))
|
|
269
|
+
raise HTTPException(status_code=502, detail=f"Failed to connect to Kubiya API: {str(e)}")
|
|
270
|
+
except HTTPException:
|
|
271
|
+
raise
|
|
272
|
+
except Exception as e:
|
|
273
|
+
logger.error("unexpected_error", error=str(e), error_type=type(e).__name__)
|
|
274
|
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|