kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Health check endpoints"""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Request, HTTPException, status
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
logger = structlog.get_logger()
|
|
8
|
+
|
|
9
|
+
router = APIRouter()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/health")
|
|
13
|
+
async def health_check(request: Request):
|
|
14
|
+
"""
|
|
15
|
+
Health check endpoint (no authentication required).
|
|
16
|
+
|
|
17
|
+
Returns health status - all services shown as operational by default.
|
|
18
|
+
External service health is checked in background, not blocking.
|
|
19
|
+
"""
|
|
20
|
+
from control_plane_api.app.config import settings
|
|
21
|
+
|
|
22
|
+
# Always return healthy for the control plane itself
|
|
23
|
+
# External services are assumed operational unless we can't reach them
|
|
24
|
+
services_status = {
|
|
25
|
+
"kubiya_api": "healthy",
|
|
26
|
+
"context_graph": "healthy",
|
|
27
|
+
"cognitive_memory": "healthy"
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
"status": "healthy",
|
|
32
|
+
"service": "agent-control-plane",
|
|
33
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
34
|
+
"services": services_status
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.get("/ready")
|
|
39
|
+
async def readiness_check():
|
|
40
|
+
"""Readiness check endpoint (no authentication required)"""
|
|
41
|
+
return {"status": "ready", "timestamp": datetime.utcnow().isoformat()}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.get("/health/detailed")
|
|
45
|
+
async def detailed_health_check(request: Request):
|
|
46
|
+
"""
|
|
47
|
+
Detailed health check with dependency status.
|
|
48
|
+
|
|
49
|
+
Checks connectivity to database, Redis, and Temporal.
|
|
50
|
+
No authentication required for health checks.
|
|
51
|
+
"""
|
|
52
|
+
checks = {
|
|
53
|
+
"api": "healthy",
|
|
54
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Database health check using SQLAlchemy
|
|
58
|
+
try:
|
|
59
|
+
from control_plane_api.app.database import health_check_db
|
|
60
|
+
if health_check_db():
|
|
61
|
+
checks["database"] = "healthy"
|
|
62
|
+
else:
|
|
63
|
+
checks["database"] = "unhealthy"
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error("database_health_check_failed", error=str(e))
|
|
66
|
+
checks["database"] = "unhealthy"
|
|
67
|
+
|
|
68
|
+
# Check Redis
|
|
69
|
+
try:
|
|
70
|
+
import redis
|
|
71
|
+
from control_plane_api.app.config import settings
|
|
72
|
+
r = redis.from_url(settings.redis_url)
|
|
73
|
+
r.ping()
|
|
74
|
+
checks["redis"] = "healthy"
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.error("redis_health_check_failed", error=str(e))
|
|
77
|
+
checks["redis"] = f"unhealthy: {str(e)}"
|
|
78
|
+
|
|
79
|
+
# Check Temporal (just configuration check, not actual connection)
|
|
80
|
+
try:
|
|
81
|
+
from control_plane_api.app.config import settings
|
|
82
|
+
if settings.temporal_host and settings.temporal_namespace:
|
|
83
|
+
checks["temporal"] = "configured"
|
|
84
|
+
else:
|
|
85
|
+
checks["temporal"] = "not configured"
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error("temporal_health_check_failed", error=str(e))
|
|
88
|
+
checks["temporal"] = f"error: {str(e)}"
|
|
89
|
+
|
|
90
|
+
# Determine overall status
|
|
91
|
+
checks["status"] = "healthy" if all(
|
|
92
|
+
v in ["healthy", "configured"]
|
|
93
|
+
for k, v in checks.items()
|
|
94
|
+
if k not in ["timestamp", "status"]
|
|
95
|
+
) else "degraded"
|
|
96
|
+
|
|
97
|
+
return checks
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@router.get("/health/event-bus")
|
|
101
|
+
async def event_bus_health_check():
|
|
102
|
+
"""
|
|
103
|
+
Event bus health check with provider-level status.
|
|
104
|
+
|
|
105
|
+
Checks health of all enabled event bus providers:
|
|
106
|
+
- HTTP provider
|
|
107
|
+
- WebSocket provider
|
|
108
|
+
- Redis provider
|
|
109
|
+
- NATS provider (if enabled)
|
|
110
|
+
|
|
111
|
+
No authentication required for health checks.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
from control_plane_api.app.config import settings
|
|
115
|
+
|
|
116
|
+
# Check if event bus is configured
|
|
117
|
+
if not hasattr(settings, "event_bus") or not settings.event_bus:
|
|
118
|
+
return {
|
|
119
|
+
"status": "not_configured",
|
|
120
|
+
"message": "Event bus not configured - using default HTTP event publishing",
|
|
121
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Initialize event bus manager
|
|
125
|
+
from control_plane_api.app.lib.event_bus.manager import (
|
|
126
|
+
EventBusManager,
|
|
127
|
+
EventBusManagerConfig,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Build config from settings
|
|
131
|
+
try:
|
|
132
|
+
manager_config = EventBusManagerConfig(**settings.event_bus)
|
|
133
|
+
manager = EventBusManager(manager_config)
|
|
134
|
+
|
|
135
|
+
# Initialize providers
|
|
136
|
+
await manager.initialize()
|
|
137
|
+
|
|
138
|
+
# Get health status from all providers
|
|
139
|
+
provider_health = await manager.health_check()
|
|
140
|
+
|
|
141
|
+
# Determine overall status
|
|
142
|
+
overall_healthy = provider_health.get("_overall", {}).get("healthy", False)
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
"status": "healthy" if overall_healthy else "degraded",
|
|
146
|
+
"providers": provider_health,
|
|
147
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error("event_bus_health_check_failed", error=str(e))
|
|
152
|
+
return {
|
|
153
|
+
"status": "error",
|
|
154
|
+
"error": str(e),
|
|
155
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
except ImportError as e:
|
|
159
|
+
# Event bus dependencies not installed
|
|
160
|
+
return {
|
|
161
|
+
"status": "dependencies_missing",
|
|
162
|
+
"message": "Event bus dependencies not installed",
|
|
163
|
+
"error": str(e),
|
|
164
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
165
|
+
}
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced health check endpoints for production monitoring.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- Basic health check (/health)
|
|
6
|
+
- Readiness check with dependency validation (/health/ready)
|
|
7
|
+
- Liveness check (/health/live)
|
|
8
|
+
- Detailed health status (/health/detailed)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, Depends, HTTPException, status
|
|
12
|
+
from fastapi.responses import JSONResponse
|
|
13
|
+
from typing import Dict, Any, Optional
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
|
+
from sqlalchemy import text
|
|
17
|
+
import structlog
|
|
18
|
+
import httpx
|
|
19
|
+
import asyncio
|
|
20
|
+
import time
|
|
21
|
+
import os
|
|
22
|
+
import psutil
|
|
23
|
+
|
|
24
|
+
from control_plane_api.app.database import get_session
|
|
25
|
+
from control_plane_api.app.lib.redis_client import get_redis_client
|
|
26
|
+
from control_plane_api.app.lib.temporal_client import get_temporal_client
|
|
27
|
+
from control_plane_api.app.config import settings
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger()
|
|
30
|
+
|
|
31
|
+
router = APIRouter()
|
|
32
|
+
|
|
33
|
+
# Track application start time
|
|
34
|
+
APP_START_TIME = time.time()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@router.get("/health", tags=["Health"])
|
|
38
|
+
async def health_check() -> Dict[str, str]:
|
|
39
|
+
"""
|
|
40
|
+
Basic health check endpoint.
|
|
41
|
+
|
|
42
|
+
Returns 200 if the service is running.
|
|
43
|
+
Used by load balancers for basic availability checks.
|
|
44
|
+
"""
|
|
45
|
+
return {
|
|
46
|
+
"status": "healthy",
|
|
47
|
+
"service": "agent-control-plane",
|
|
48
|
+
"version": settings.api_version,
|
|
49
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.get("/health/live", tags=["Health"])
|
|
54
|
+
async def liveness_check() -> Dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Liveness probe for Kubernetes.
|
|
57
|
+
|
|
58
|
+
Checks if the application is running and not deadlocked.
|
|
59
|
+
Returns 200 if alive, 503 if the application needs to be restarted.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
# Simple check - can we allocate memory and respond?
|
|
63
|
+
test_data = list(range(1000))
|
|
64
|
+
|
|
65
|
+
uptime = time.time() - APP_START_TIME
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"status": "alive",
|
|
69
|
+
"uptime_seconds": round(uptime, 2),
|
|
70
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
71
|
+
}
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error("liveness_check_failed", error=str(e))
|
|
74
|
+
raise HTTPException(
|
|
75
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
76
|
+
detail="Liveness check failed",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@router.get("/health/ready", tags=["Health"])
|
|
81
|
+
async def readiness_check(
|
|
82
|
+
db_session: Optional[AsyncSession] = Depends(get_session),
|
|
83
|
+
) -> Dict[str, Any]:
|
|
84
|
+
"""
|
|
85
|
+
Readiness probe for Kubernetes and monitoring.
|
|
86
|
+
|
|
87
|
+
Checks if the application is ready to serve traffic by validating:
|
|
88
|
+
- Database connectivity
|
|
89
|
+
- Redis connectivity (if configured)
|
|
90
|
+
- Temporal connectivity (if configured)
|
|
91
|
+
|
|
92
|
+
Returns 200 if ready, 503 if not ready to serve traffic.
|
|
93
|
+
"""
|
|
94
|
+
checks = {
|
|
95
|
+
"database": False,
|
|
96
|
+
"redis": False,
|
|
97
|
+
"temporal": False,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
errors = []
|
|
101
|
+
|
|
102
|
+
# Check database
|
|
103
|
+
if db_session:
|
|
104
|
+
try:
|
|
105
|
+
result = await db_session.execute(text("SELECT 1"))
|
|
106
|
+
checks["database"] = result.scalar() == 1
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning("database_health_check_failed", error=str(e))
|
|
109
|
+
errors.append(f"Database: {str(e)}")
|
|
110
|
+
else:
|
|
111
|
+
errors.append("Database: No session available")
|
|
112
|
+
|
|
113
|
+
# Check Redis (if configured)
|
|
114
|
+
try:
|
|
115
|
+
redis_client = get_redis_client()
|
|
116
|
+
if redis_client:
|
|
117
|
+
await redis_client.ping()
|
|
118
|
+
checks["redis"] = True
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.warning("redis_health_check_failed", error=str(e))
|
|
121
|
+
errors.append(f"Redis: {str(e)}")
|
|
122
|
+
|
|
123
|
+
# Check Temporal (if configured)
|
|
124
|
+
try:
|
|
125
|
+
temporal_client = await get_temporal_client()
|
|
126
|
+
if temporal_client:
|
|
127
|
+
# Try to describe the namespace
|
|
128
|
+
await temporal_client.service_client.describe_namespace(
|
|
129
|
+
settings.temporal_namespace
|
|
130
|
+
)
|
|
131
|
+
checks["temporal"] = True
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.warning("temporal_health_check_failed", error=str(e))
|
|
134
|
+
errors.append(f"Temporal: {str(e)}")
|
|
135
|
+
|
|
136
|
+
# Determine overall readiness
|
|
137
|
+
# Database is required, Redis and Temporal are optional
|
|
138
|
+
is_ready = checks["database"]
|
|
139
|
+
|
|
140
|
+
response = {
|
|
141
|
+
"status": "ready" if is_ready else "not_ready",
|
|
142
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
143
|
+
"checks": checks,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if errors:
|
|
147
|
+
response["errors"] = errors
|
|
148
|
+
|
|
149
|
+
if not is_ready:
|
|
150
|
+
return JSONResponse(
|
|
151
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
152
|
+
content=response,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return response
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@router.get("/health/detailed", tags=["Health"])
|
|
159
|
+
async def detailed_health_check(
|
|
160
|
+
db_session: Optional[AsyncSession] = Depends(get_session),
|
|
161
|
+
) -> Dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Detailed health check with comprehensive system information.
|
|
164
|
+
|
|
165
|
+
Provides:
|
|
166
|
+
- Service health status
|
|
167
|
+
- Dependency health checks
|
|
168
|
+
- System metrics (CPU, memory, disk)
|
|
169
|
+
- Configuration information
|
|
170
|
+
|
|
171
|
+
Used for debugging and monitoring dashboards.
|
|
172
|
+
"""
|
|
173
|
+
uptime = time.time() - APP_START_TIME
|
|
174
|
+
|
|
175
|
+
# System metrics
|
|
176
|
+
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
177
|
+
memory = psutil.virtual_memory()
|
|
178
|
+
disk = psutil.disk_usage('/')
|
|
179
|
+
|
|
180
|
+
# Dependency checks
|
|
181
|
+
dependencies = {}
|
|
182
|
+
|
|
183
|
+
# Database check with latency
|
|
184
|
+
db_latency = None
|
|
185
|
+
if db_session:
|
|
186
|
+
try:
|
|
187
|
+
start = time.time()
|
|
188
|
+
result = await db_session.execute(text("SELECT 1"))
|
|
189
|
+
db_latency = (time.time() - start) * 1000 # Convert to ms
|
|
190
|
+
dependencies["database"] = {
|
|
191
|
+
"healthy": result.scalar() == 1,
|
|
192
|
+
"latency_ms": round(db_latency, 2),
|
|
193
|
+
}
|
|
194
|
+
except Exception as e:
|
|
195
|
+
dependencies["database"] = {
|
|
196
|
+
"healthy": False,
|
|
197
|
+
"error": str(e),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Redis check with latency
|
|
201
|
+
try:
|
|
202
|
+
redis_client = get_redis_client()
|
|
203
|
+
if redis_client:
|
|
204
|
+
start = time.time()
|
|
205
|
+
await redis_client.ping()
|
|
206
|
+
redis_latency = (time.time() - start) * 1000
|
|
207
|
+
dependencies["redis"] = {
|
|
208
|
+
"healthy": True,
|
|
209
|
+
"latency_ms": round(redis_latency, 2),
|
|
210
|
+
}
|
|
211
|
+
except Exception as e:
|
|
212
|
+
dependencies["redis"] = {
|
|
213
|
+
"healthy": False,
|
|
214
|
+
"error": str(e),
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Temporal check
|
|
218
|
+
try:
|
|
219
|
+
temporal_client = await get_temporal_client()
|
|
220
|
+
if temporal_client:
|
|
221
|
+
start = time.time()
|
|
222
|
+
await temporal_client.service_client.describe_namespace(
|
|
223
|
+
settings.temporal_namespace
|
|
224
|
+
)
|
|
225
|
+
temporal_latency = (time.time() - start) * 1000
|
|
226
|
+
dependencies["temporal"] = {
|
|
227
|
+
"healthy": True,
|
|
228
|
+
"latency_ms": round(temporal_latency, 2),
|
|
229
|
+
"namespace": settings.temporal_namespace,
|
|
230
|
+
}
|
|
231
|
+
except Exception as e:
|
|
232
|
+
dependencies["temporal"] = {
|
|
233
|
+
"healthy": False,
|
|
234
|
+
"error": str(e),
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# External services check (if configured)
|
|
238
|
+
external_services = {}
|
|
239
|
+
|
|
240
|
+
# Check Kubiya API
|
|
241
|
+
if settings.kubiya_api_base:
|
|
242
|
+
try:
|
|
243
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
244
|
+
start = time.time()
|
|
245
|
+
response = await client.get(f"{settings.kubiya_api_base}/health")
|
|
246
|
+
kubiya_latency = (time.time() - start) * 1000
|
|
247
|
+
external_services["kubiya_api"] = {
|
|
248
|
+
"healthy": response.status_code == 200,
|
|
249
|
+
"latency_ms": round(kubiya_latency, 2),
|
|
250
|
+
"status_code": response.status_code,
|
|
251
|
+
}
|
|
252
|
+
except Exception as e:
|
|
253
|
+
external_services["kubiya_api"] = {
|
|
254
|
+
"healthy": False,
|
|
255
|
+
"error": str(e),
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
# Check LiteLLM Proxy
|
|
259
|
+
if settings.litellm_api_base:
|
|
260
|
+
try:
|
|
261
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
262
|
+
start = time.time()
|
|
263
|
+
response = await client.get(f"{settings.litellm_api_base}/health")
|
|
264
|
+
litellm_latency = (time.time() - start) * 1000
|
|
265
|
+
external_services["litellm_proxy"] = {
|
|
266
|
+
"healthy": response.status_code == 200,
|
|
267
|
+
"latency_ms": round(litellm_latency, 2),
|
|
268
|
+
"status_code": response.status_code,
|
|
269
|
+
}
|
|
270
|
+
except Exception as e:
|
|
271
|
+
external_services["litellm_proxy"] = {
|
|
272
|
+
"healthy": False,
|
|
273
|
+
"error": str(e),
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
# Determine overall health
|
|
277
|
+
all_healthy = all(
|
|
278
|
+
dep.get("healthy", False) for dep in dependencies.values()
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
"status": "healthy" if all_healthy else "degraded",
|
|
283
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
284
|
+
"version": settings.api_version,
|
|
285
|
+
"environment": settings.environment,
|
|
286
|
+
"uptime": {
|
|
287
|
+
"seconds": round(uptime, 2),
|
|
288
|
+
"human_readable": _format_uptime(uptime),
|
|
289
|
+
},
|
|
290
|
+
"system": {
|
|
291
|
+
"cpu": {
|
|
292
|
+
"percent": cpu_percent,
|
|
293
|
+
"cores": psutil.cpu_count(),
|
|
294
|
+
},
|
|
295
|
+
"memory": {
|
|
296
|
+
"percent": memory.percent,
|
|
297
|
+
"used_gb": round(memory.used / (1024**3), 2),
|
|
298
|
+
"total_gb": round(memory.total / (1024**3), 2),
|
|
299
|
+
},
|
|
300
|
+
"disk": {
|
|
301
|
+
"percent": disk.percent,
|
|
302
|
+
"used_gb": round(disk.used / (1024**3), 2),
|
|
303
|
+
"total_gb": round(disk.total / (1024**3), 2),
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
"dependencies": dependencies,
|
|
307
|
+
"external_services": external_services if external_services else None,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@router.get("/health/temporal-credentials", tags=["Health"])
|
|
312
|
+
async def check_temporal_credentials(
|
|
313
|
+
request: Any = Depends(lambda: None),
|
|
314
|
+
organization: dict = Depends(lambda: {"id": "test-org"}),
|
|
315
|
+
) -> Dict[str, Any]:
|
|
316
|
+
"""
|
|
317
|
+
Check if organization has valid Temporal credentials.
|
|
318
|
+
|
|
319
|
+
This endpoint helps diagnose credential issues by attempting to
|
|
320
|
+
fetch Temporal credentials for the organization.
|
|
321
|
+
|
|
322
|
+
Returns credential status including namespace, org, TTL, and API key presence.
|
|
323
|
+
"""
|
|
324
|
+
from fastapi import Request
|
|
325
|
+
from control_plane_api.app.middleware.auth import get_current_organization
|
|
326
|
+
from control_plane_api.app.lib.temporal_credentials_service import get_temporal_credentials_for_org
|
|
327
|
+
|
|
328
|
+
# This is a diagnostic endpoint, so we'll make the auth optional for testing
|
|
329
|
+
# In production, you may want to require authentication
|
|
330
|
+
try:
|
|
331
|
+
# Try to get request and token if available
|
|
332
|
+
from starlette.requests import Request as StarletteRequest
|
|
333
|
+
|
|
334
|
+
# For authenticated requests
|
|
335
|
+
if hasattr(request, 'state') and hasattr(request.state, 'kubiya_token'):
|
|
336
|
+
token = request.state.kubiya_token
|
|
337
|
+
org_id = organization.get("id", "unknown")
|
|
338
|
+
else:
|
|
339
|
+
# For unauthenticated health checks, return local status
|
|
340
|
+
from control_plane_api.app.lib.temporal_credentials_service import is_local_temporal
|
|
341
|
+
if is_local_temporal():
|
|
342
|
+
return {
|
|
343
|
+
"status": "ok",
|
|
344
|
+
"mode": "local",
|
|
345
|
+
"message": "Using local Temporal server",
|
|
346
|
+
"namespace": os.getenv("TEMPORAL_NAMESPACE", "default"),
|
|
347
|
+
"host": os.getenv("TEMPORAL_HOST", "localhost:7233"),
|
|
348
|
+
}
|
|
349
|
+
else:
|
|
350
|
+
return {
|
|
351
|
+
"status": "info",
|
|
352
|
+
"message": "Authentication required to check cloud Temporal credentials",
|
|
353
|
+
"mode": "cloud",
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
credentials = await get_temporal_credentials_for_org(
|
|
357
|
+
org_id=org_id,
|
|
358
|
+
token=token,
|
|
359
|
+
use_fallback=False # Don't fallback for health check
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
"status": "ok",
|
|
364
|
+
"namespace": credentials.get("namespace"),
|
|
365
|
+
"org": credentials.get("org"),
|
|
366
|
+
"ttl": credentials.get("ttl"),
|
|
367
|
+
"has_api_key": bool(credentials.get("api_key")),
|
|
368
|
+
"host": credentials.get("host"),
|
|
369
|
+
}
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.error("temporal_credentials_health_check_failed", error=str(e))
|
|
372
|
+
return {
|
|
373
|
+
"status": "error",
|
|
374
|
+
"error": str(e),
|
|
375
|
+
"message": "Failed to fetch Temporal credentials",
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _format_uptime(seconds: float) -> str:
|
|
380
|
+
"""Format uptime in human-readable format."""
|
|
381
|
+
days, remainder = divmod(int(seconds), 86400)
|
|
382
|
+
hours, remainder = divmod(remainder, 3600)
|
|
383
|
+
minutes, seconds = divmod(remainder, 60)
|
|
384
|
+
|
|
385
|
+
parts = []
|
|
386
|
+
if days > 0:
|
|
387
|
+
parts.append(f"{days}d")
|
|
388
|
+
if hours > 0:
|
|
389
|
+
parts.append(f"{hours}h")
|
|
390
|
+
if minutes > 0:
|
|
391
|
+
parts.append(f"{minutes}m")
|
|
392
|
+
parts.append(f"{seconds}s")
|
|
393
|
+
|
|
394
|
+
return " ".join(parts)
|