kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prometheus metrics for Kubiya Control Plane.
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive metrics for monitoring the AI orchestration layer:
|
|
5
|
+
- HTTP request metrics (latency, error rates, request counts)
|
|
6
|
+
- Execution metrics (active count, failures, duration, wait time)
|
|
7
|
+
- Worker and queue metrics
|
|
8
|
+
- LLM/AI metrics (requests, latency, tokens)
|
|
9
|
+
- Tool execution metrics
|
|
10
|
+
- Business metrics (agents, jobs, organizations)
|
|
11
|
+
|
|
12
|
+
Metrics naming convention follows Prometheus best practices:
|
|
13
|
+
- Prefix: kubiya_control_plane_ for HTTP metrics
|
|
14
|
+
- Prefix: kubiya_ for business metrics
|
|
15
|
+
- Suffix: _total for counters, _seconds for durations, _count for gauges
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
import os
|
|
20
|
+
import structlog
|
|
21
|
+
from typing import Tuple, Optional
|
|
22
|
+
from functools import lru_cache
|
|
23
|
+
|
|
24
|
+
from prometheus_client import (
|
|
25
|
+
Counter,
|
|
26
|
+
Histogram,
|
|
27
|
+
Gauge,
|
|
28
|
+
REGISTRY,
|
|
29
|
+
generate_latest,
|
|
30
|
+
CONTENT_TYPE_LATEST,
|
|
31
|
+
CollectorRegistry,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
logger = structlog.get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
PROMETHEUS_MULTIPROC_DIR = os.environ.get('PROMETHEUS_MULTIPROC_DIR')
|
|
37
|
+
|
|
38
|
+
HTTP_REQUESTS_TOTAL = Counter(
|
|
39
|
+
'kubiya_control_plane_http_requests_total',
|
|
40
|
+
'Total number of HTTP requests received by the Control Plane API',
|
|
41
|
+
['method', 'endpoint', 'status_code'],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
HTTP_REQUEST_DURATION_SECONDS = Histogram(
|
|
45
|
+
'kubiya_control_plane_http_request_duration_seconds',
|
|
46
|
+
'HTTP request duration in seconds (latency)',
|
|
47
|
+
['method', 'endpoint'],
|
|
48
|
+
buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
ACTIVE_TASKS_COUNT = Gauge(
|
|
53
|
+
'kubiya_active_tasks_count',
|
|
54
|
+
'Number of currently active tasks/executions by type and status',
|
|
55
|
+
['execution_type', 'status'],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
TASK_FAILURE_TOTAL = Gauge(
|
|
59
|
+
'kubiya_task_failure_total',
|
|
60
|
+
'Total number of failed task/executions by type',
|
|
61
|
+
['execution_type'],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Execution duration - how long executions take
|
|
66
|
+
EXECUTION_DURATION_SECONDS = Gauge(
|
|
67
|
+
'kubiya_execution_duration_seconds',
|
|
68
|
+
'Average execution duration in seconds by type and status (from completed executions)',
|
|
69
|
+
['execution_type', 'status'],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Worker queue depth - pending executions per queue
|
|
73
|
+
WORKER_QUEUE_DEPTH = Gauge(
|
|
74
|
+
'kubiya_worker_queue_depth',
|
|
75
|
+
'Number of pending executions waiting in queue',
|
|
76
|
+
['queue_id'],
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# LLM requests total
|
|
80
|
+
LLM_REQUESTS_TOTAL = Gauge(
|
|
81
|
+
'kubiya_llm_requests_total',
|
|
82
|
+
'Total number of LLM API requests by model and status',
|
|
83
|
+
['model', 'status'],
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# LLM latency
|
|
87
|
+
LLM_LATENCY_SECONDS = Gauge(
|
|
88
|
+
'kubiya_llm_latency_seconds',
|
|
89
|
+
'Average LLM request latency in seconds by model',
|
|
90
|
+
['model'],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# LLM tokens total
|
|
94
|
+
LLM_TOKENS_TOTAL = Gauge(
|
|
95
|
+
'kubiya_llm_tokens_total',
|
|
96
|
+
'Total tokens used in LLM requests by model and type',
|
|
97
|
+
['model', 'token_type'],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# Active streaming connections
|
|
102
|
+
STREAMING_CONNECTIONS_ACTIVE = Gauge(
|
|
103
|
+
'kubiya_streaming_connections_active',
|
|
104
|
+
'Number of active SSE streaming connections',
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Tool execution duration
|
|
108
|
+
TOOL_EXECUTION_DURATION_SECONDS = Gauge(
|
|
109
|
+
'kubiya_tool_execution_duration_seconds',
|
|
110
|
+
'Average tool execution duration in seconds by tool name',
|
|
111
|
+
['tool_name'],
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Tool executions total
|
|
115
|
+
TOOL_EXECUTIONS_TOTAL = Gauge(
|
|
116
|
+
'kubiya_tool_executions_total',
|
|
117
|
+
'Total number of tool executions by tool name and status',
|
|
118
|
+
['tool_name', 'status'],
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Execution wait time (time from created to running)
|
|
122
|
+
EXECUTION_WAIT_TIME_SECONDS = Gauge(
|
|
123
|
+
'kubiya_execution_wait_time_seconds',
|
|
124
|
+
'Average wait time from creation to running in seconds by type',
|
|
125
|
+
['execution_type'],
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Webhook requests total
|
|
129
|
+
WEBHOOK_REQUESTS_TOTAL = Gauge(
|
|
130
|
+
'kubiya_webhook_requests_total',
|
|
131
|
+
'Total number of webhook trigger requests by status',
|
|
132
|
+
['status'],
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# Executions by organization
|
|
137
|
+
EXECUTIONS_BY_ORG_TOTAL = Gauge(
|
|
138
|
+
'kubiya_executions_by_org_total',
|
|
139
|
+
'Total number of executions by organization',
|
|
140
|
+
['organization_id'],
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Active agents
|
|
144
|
+
AGENTS_ACTIVE = Gauge(
|
|
145
|
+
'kubiya_agents_active',
|
|
146
|
+
'Number of active agents by organization',
|
|
147
|
+
['organization_id'],
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Scheduled jobs total
|
|
151
|
+
SCHEDULED_JOBS_TOTAL = Gauge(
|
|
152
|
+
'kubiya_scheduled_jobs_total',
|
|
153
|
+
'Total number of scheduled jobs by status',
|
|
154
|
+
['status'],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Conversation turns total
|
|
158
|
+
CONVERSATION_TURNS_TOTAL = Gauge(
|
|
159
|
+
'kubiya_conversation_turns_total',
|
|
160
|
+
'Total conversation turns by execution type',
|
|
161
|
+
['execution_type'],
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
UUID_PATTERN = re.compile(
|
|
166
|
+
r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
|
|
167
|
+
re.IGNORECASE
|
|
168
|
+
)
|
|
169
|
+
NUMERIC_ID_PATTERN = re.compile(r'/\d+(?=/|$)')
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@lru_cache(maxsize=1000)
|
|
173
|
+
def normalize_endpoint(path: str) -> str:
|
|
174
|
+
"""Normalize endpoint paths to reduce cardinality in metrics."""
|
|
175
|
+
normalized = UUID_PATTERN.sub('{id}', path)
|
|
176
|
+
normalized = NUMERIC_ID_PATTERN.sub('/{id}', normalized)
|
|
177
|
+
return normalized
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_metrics_response() -> Tuple[bytes, str]:
|
|
181
|
+
"""Generate Prometheus metrics response."""
|
|
182
|
+
try:
|
|
183
|
+
if PROMETHEUS_MULTIPROC_DIR:
|
|
184
|
+
from prometheus_client.multiprocess import MultiProcessCollector
|
|
185
|
+
registry = CollectorRegistry()
|
|
186
|
+
MultiProcessCollector(registry)
|
|
187
|
+
output = generate_latest(registry)
|
|
188
|
+
else:
|
|
189
|
+
output = generate_latest(REGISTRY)
|
|
190
|
+
return output, CONTENT_TYPE_LATEST
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error("metrics_generation_failed", error=str(e))
|
|
193
|
+
return b"# Error generating metrics\n", CONTENT_TYPE_LATEST
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def record_http_request(
|
|
197
|
+
method: str,
|
|
198
|
+
endpoint: str,
|
|
199
|
+
status_code: int,
|
|
200
|
+
duration_seconds: float,
|
|
201
|
+
trace_id: Optional[str] = None,
|
|
202
|
+
):
|
|
203
|
+
"""Record an HTTP request in Prometheus metrics."""
|
|
204
|
+
HTTP_REQUESTS_TOTAL.labels(
|
|
205
|
+
method=method,
|
|
206
|
+
endpoint=endpoint,
|
|
207
|
+
status_code=str(status_code),
|
|
208
|
+
).inc()
|
|
209
|
+
|
|
210
|
+
HTTP_REQUEST_DURATION_SECONDS.labels(
|
|
211
|
+
method=method,
|
|
212
|
+
endpoint=endpoint,
|
|
213
|
+
).observe(duration_seconds)
|
|
214
|
+
|
|
215
|
+
if status_code >= 500:
|
|
216
|
+
logger.error(
|
|
217
|
+
"prometheus_5xx_recorded",
|
|
218
|
+
method=method,
|
|
219
|
+
endpoint=endpoint,
|
|
220
|
+
status_code=status_code,
|
|
221
|
+
duration_seconds=round(duration_seconds, 4),
|
|
222
|
+
trace_id=trace_id or "unknown",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def update_active_tasks(execution_type: str, status: str, count: int):
|
|
227
|
+
"""Update the active tasks gauge."""
|
|
228
|
+
ACTIVE_TASKS_COUNT.labels(execution_type=execution_type, status=status).set(count)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def update_task_failures(execution_type: str, count: int):
|
|
232
|
+
"""Update the task failures gauge."""
|
|
233
|
+
TASK_FAILURE_TOTAL.labels(execution_type=execution_type).set(count)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def update_execution_duration(execution_type: str, status: str, avg_seconds: float):
|
|
237
|
+
"""Update average execution duration."""
|
|
238
|
+
EXECUTION_DURATION_SECONDS.labels(execution_type=execution_type, status=status).set(avg_seconds)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def update_worker_queue_depth(queue_id: str, count: int):
|
|
242
|
+
"""Update worker queue depth."""
|
|
243
|
+
WORKER_QUEUE_DEPTH.labels(queue_id=queue_id).set(count)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def update_llm_requests(model: str, status: str, count: int):
|
|
247
|
+
"""Update LLM requests total."""
|
|
248
|
+
LLM_REQUESTS_TOTAL.labels(model=model, status=status).set(count)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def update_llm_latency(model: str, avg_seconds: float):
|
|
252
|
+
"""Update average LLM latency."""
|
|
253
|
+
LLM_LATENCY_SECONDS.labels(model=model).set(avg_seconds)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def update_llm_tokens(model: str, token_type: str, count: int):
|
|
257
|
+
"""Update LLM tokens total."""
|
|
258
|
+
LLM_TOKENS_TOTAL.labels(model=model, token_type=token_type).set(count)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def update_streaming_connections(count: int):
|
|
262
|
+
"""Update active streaming connections."""
|
|
263
|
+
STREAMING_CONNECTIONS_ACTIVE.set(count)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def update_tool_execution_duration(tool_name: str, avg_seconds: float):
|
|
267
|
+
"""Update average tool execution duration."""
|
|
268
|
+
TOOL_EXECUTION_DURATION_SECONDS.labels(tool_name=tool_name).set(avg_seconds)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def update_tool_executions(tool_name: str, status: str, count: int):
|
|
272
|
+
"""Update tool executions total."""
|
|
273
|
+
TOOL_EXECUTIONS_TOTAL.labels(tool_name=tool_name, status=status).set(count)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def update_execution_wait_time(execution_type: str, avg_seconds: float):
|
|
277
|
+
"""Update average execution wait time."""
|
|
278
|
+
EXECUTION_WAIT_TIME_SECONDS.labels(execution_type=execution_type).set(avg_seconds)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def update_webhook_requests(status: str, count: int):
|
|
282
|
+
"""Update webhook requests total."""
|
|
283
|
+
WEBHOOK_REQUESTS_TOTAL.labels(status=status).set(count)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def update_executions_by_org(organization_id: str, count: int):
|
|
287
|
+
"""Update executions by organization."""
|
|
288
|
+
EXECUTIONS_BY_ORG_TOTAL.labels(organization_id=organization_id).set(count)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def update_agents_active(organization_id: str, count: int):
|
|
292
|
+
"""Update active agents count."""
|
|
293
|
+
AGENTS_ACTIVE.labels(organization_id=organization_id).set(count)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def update_scheduled_jobs(status: str, count: int):
|
|
297
|
+
"""Update scheduled jobs total."""
|
|
298
|
+
SCHEDULED_JOBS_TOTAL.labels(status=status).set(count)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def update_conversation_turns(execution_type: str, count: int):
|
|
302
|
+
"""Update conversation turns total."""
|
|
303
|
+
CONVERSATION_TURNS_TOTAL.labels(execution_type=execution_type).set(count)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenTelemetry middleware for FastAPI.
|
|
3
|
+
|
|
4
|
+
This module provides middleware to:
|
|
5
|
+
- Add trace ID to response headers (X-Trace-ID)
|
|
6
|
+
- Enrich spans with organizational and user context from request.state
|
|
7
|
+
- Set span status based on HTTP status codes
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import structlog
|
|
11
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
12
|
+
from starlette.requests import Request
|
|
13
|
+
from starlette.responses import Response
|
|
14
|
+
from opentelemetry import trace
|
|
15
|
+
from opentelemetry.trace import Status, StatusCode
|
|
16
|
+
|
|
17
|
+
logger = structlog.get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TraceContextMiddleware(BaseHTTPMiddleware):
|
|
21
|
+
"""
|
|
22
|
+
Middleware to enrich spans with organizational context and add trace ID to responses.
|
|
23
|
+
|
|
24
|
+
This middleware should be added AFTER the OpenTelemetry FastAPI instrumentation,
|
|
25
|
+
so that it can enrich the automatically created spans.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
# Paths to exclude from tracing (health checks, metrics, etc.)
|
|
29
|
+
EXCLUDED_PATHS = {
|
|
30
|
+
"/api/health",
|
|
31
|
+
"/health",
|
|
32
|
+
"/health/live",
|
|
33
|
+
"/health/ready",
|
|
34
|
+
"/health/detailed",
|
|
35
|
+
"/health/event-bus",
|
|
36
|
+
"/health/temporal-credentials",
|
|
37
|
+
"/ready",
|
|
38
|
+
"/metrics",
|
|
39
|
+
"/favicon.ico"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async def dispatch(self, request: Request, call_next):
|
|
43
|
+
"""
|
|
44
|
+
Process request and enrich span with organizational context.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
request: Incoming HTTP request
|
|
48
|
+
call_next: Next middleware in chain
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
HTTP response with X-Trace-ID header
|
|
52
|
+
"""
|
|
53
|
+
# Skip tracing for health checks and other excluded paths
|
|
54
|
+
if request.url.path in self.EXCLUDED_PATHS:
|
|
55
|
+
return await call_next(request)
|
|
56
|
+
|
|
57
|
+
# Get current span (created by FastAPI instrumentation)
|
|
58
|
+
span = trace.get_current_span()
|
|
59
|
+
|
|
60
|
+
# Add span event for request received
|
|
61
|
+
if span and span.is_recording():
|
|
62
|
+
from control_plane_api.app.observability import add_span_event
|
|
63
|
+
add_span_event(
|
|
64
|
+
f"HTTP request received: {request.method} {request.url.path}",
|
|
65
|
+
{
|
|
66
|
+
"http.method": request.method,
|
|
67
|
+
"http.path": request.url.path,
|
|
68
|
+
"http.query": request.url.query if request.url.query else "",
|
|
69
|
+
"client.host": request.client.host if request.client else "unknown",
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Capture request body for non-GET requests (for debugging)
|
|
74
|
+
request_body = None
|
|
75
|
+
if request.method in ("POST", "PUT", "PATCH"):
|
|
76
|
+
try:
|
|
77
|
+
body_bytes = await request.body()
|
|
78
|
+
if body_bytes and len(body_bytes) < 10000: # Only capture if < 10KB
|
|
79
|
+
request_body = body_bytes.decode('utf-8')
|
|
80
|
+
# Re-create request with body for downstream handlers
|
|
81
|
+
async def receive():
|
|
82
|
+
return {"type": "http.request", "body": body_bytes}
|
|
83
|
+
request._receive = receive
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.warning("failed_to_capture_request_body", error=str(e))
|
|
86
|
+
|
|
87
|
+
# Enrich span with organizational context from request.state
|
|
88
|
+
# These are set by the auth dependency (get_current_organization)
|
|
89
|
+
if span and span.is_recording():
|
|
90
|
+
try:
|
|
91
|
+
# Debug: Check if organization exists
|
|
92
|
+
has_org = hasattr(request.state, "organization")
|
|
93
|
+
logger.debug(
|
|
94
|
+
"trace_context_check",
|
|
95
|
+
has_organization=has_org,
|
|
96
|
+
path=request.url.path
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Add organization context
|
|
100
|
+
if has_org:
|
|
101
|
+
org = request.state.organization
|
|
102
|
+
if isinstance(org, dict):
|
|
103
|
+
span.set_attribute("organization.id", org.get("id", ""))
|
|
104
|
+
span.set_attribute("organization.name", org.get("name", ""))
|
|
105
|
+
|
|
106
|
+
from control_plane_api.app.observability import add_span_event
|
|
107
|
+
add_span_event(
|
|
108
|
+
"Organizational context added to span",
|
|
109
|
+
{
|
|
110
|
+
"organization.id": org.get("id", ""),
|
|
111
|
+
"organization.name": org.get("name", ""),
|
|
112
|
+
"user.email": org.get("user_email", ""),
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
logger.info(
|
|
117
|
+
"span_enriched_with_org",
|
|
118
|
+
org_id=org.get("id"),
|
|
119
|
+
path=request.url.path
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Add user context
|
|
123
|
+
if org.get("user_id"):
|
|
124
|
+
span.set_attribute("user.id", org["user_id"])
|
|
125
|
+
if org.get("user_email"):
|
|
126
|
+
span.set_attribute("user.email", org["user_email"])
|
|
127
|
+
if org.get("user_name"):
|
|
128
|
+
span.set_attribute("user.name", org["user_name"])
|
|
129
|
+
if org.get("user_avatar"):
|
|
130
|
+
span.set_attribute("user.avatar", org["user_avatar"])
|
|
131
|
+
|
|
132
|
+
# Add request ID
|
|
133
|
+
if hasattr(request.state, "request_id"):
|
|
134
|
+
span.set_attribute("request.id", request.state.request_id)
|
|
135
|
+
|
|
136
|
+
# Add request path and method
|
|
137
|
+
span.set_attribute("http.route", request.url.path)
|
|
138
|
+
span.set_attribute("http.method", request.method)
|
|
139
|
+
|
|
140
|
+
# Add query parameters
|
|
141
|
+
if request.url.query:
|
|
142
|
+
span.set_attribute("http.query", request.url.query)
|
|
143
|
+
|
|
144
|
+
# Add request body for debugging (sanitize sensitive data)
|
|
145
|
+
if request_body:
|
|
146
|
+
# Sanitize passwords, tokens, etc.
|
|
147
|
+
sanitized_body = request_body
|
|
148
|
+
for sensitive_key in ["password", "token", "secret", "api_key", "apiKey"]:
|
|
149
|
+
if sensitive_key in sanitized_body.lower():
|
|
150
|
+
sanitized_body = sanitized_body[:100] + "...[REDACTED]"
|
|
151
|
+
break
|
|
152
|
+
span.set_attribute("http.request.body", sanitized_body[:500]) # Max 500 chars
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.warning(
|
|
156
|
+
"span_enrichment_failed",
|
|
157
|
+
error=str(e),
|
|
158
|
+
exc_info=True
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Get trace ID before processing request for logging correlation
|
|
162
|
+
trace_id = None
|
|
163
|
+
span_id = None
|
|
164
|
+
if span and span.is_recording():
|
|
165
|
+
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
166
|
+
span_id = format(span.get_span_context().span_id, '016x')
|
|
167
|
+
|
|
168
|
+
# Process request
|
|
169
|
+
response = await call_next(request)
|
|
170
|
+
|
|
171
|
+
# Add trace ID to response headers and capture response
|
|
172
|
+
if span and span.is_recording():
|
|
173
|
+
try:
|
|
174
|
+
response.headers["X-Trace-ID"] = trace_id
|
|
175
|
+
response.headers["X-Span-ID"] = span_id
|
|
176
|
+
|
|
177
|
+
# Set span status based on HTTP status code
|
|
178
|
+
from control_plane_api.app.observability import add_span_event
|
|
179
|
+
if response.status_code >= 500:
|
|
180
|
+
span.set_status(Status(StatusCode.ERROR, f"HTTP {response.status_code}"))
|
|
181
|
+
span.set_attribute("error", True)
|
|
182
|
+
add_span_event(
|
|
183
|
+
f"HTTP response: Server error {response.status_code}",
|
|
184
|
+
{
|
|
185
|
+
"http.status_code": response.status_code,
|
|
186
|
+
"status": "error",
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
elif response.status_code >= 400:
|
|
190
|
+
# Client errors are not span errors (they're expected)
|
|
191
|
+
span.set_attribute("http.client_error", True)
|
|
192
|
+
span.set_status(Status(StatusCode.OK))
|
|
193
|
+
add_span_event(
|
|
194
|
+
f"HTTP response: Client error {response.status_code}",
|
|
195
|
+
{
|
|
196
|
+
"http.status_code": response.status_code,
|
|
197
|
+
"status": "client_error",
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
span.set_status(Status(StatusCode.OK))
|
|
202
|
+
add_span_event(
|
|
203
|
+
f"HTTP response: Success {response.status_code}",
|
|
204
|
+
{
|
|
205
|
+
"http.status_code": response.status_code,
|
|
206
|
+
"status": "success",
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Add HTTP status code attribute
|
|
211
|
+
span.set_attribute("http.status_code", response.status_code)
|
|
212
|
+
|
|
213
|
+
# NOW enrich span with user context (AFTER route handler ran, so organization is set)
|
|
214
|
+
# This is the correct place because auth dependency sets request.state.organization
|
|
215
|
+
if hasattr(request.state, "organization"):
|
|
216
|
+
org = request.state.organization
|
|
217
|
+
if isinstance(org, dict):
|
|
218
|
+
span.set_attribute("organization.id", org.get("id", ""))
|
|
219
|
+
if org.get("user_id"):
|
|
220
|
+
span.set_attribute("user.id", org["user_id"])
|
|
221
|
+
if org.get("user_email"):
|
|
222
|
+
span.set_attribute("user.email", org["user_email"])
|
|
223
|
+
if org.get("user_name"):
|
|
224
|
+
span.set_attribute("user.name", org["user_name"])
|
|
225
|
+
if org.get("user_avatar"):
|
|
226
|
+
span.set_attribute("user.avatar", org["user_avatar"])
|
|
227
|
+
|
|
228
|
+
# Log request completion with trace correlation
|
|
229
|
+
logger.info(
|
|
230
|
+
"http_request_completed",
|
|
231
|
+
method=request.method,
|
|
232
|
+
path=request.url.path,
|
|
233
|
+
status_code=response.status_code,
|
|
234
|
+
trace_id=trace_id,
|
|
235
|
+
span_id=span_id,
|
|
236
|
+
organization_id=getattr(request.state, "organization", {}).get("id") if hasattr(request.state, "organization") else None
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
logger.warning(
|
|
241
|
+
"trace_id_header_failed",
|
|
242
|
+
error=str(e),
|
|
243
|
+
exc_info=True
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return response
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Optional OpenTelemetry imports for environments with size constraints.
|
|
3
|
+
|
|
4
|
+
This module provides safe imports for OpenTelemetry that gracefully degrade
|
|
5
|
+
when the packages are not available (e.g., in Vercel serverless deployments
|
|
6
|
+
where package size matters).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Optional, Any
|
|
11
|
+
from contextlib import nullcontext
|
|
12
|
+
|
|
13
|
+
# Check if we should disable tracing entirely
|
|
14
|
+
TRACING_ENABLED = os.getenv("TRACING_ENABLED", "true").lower() in ("true", "1", "yes")
|
|
15
|
+
|
|
16
|
+
# Try to import OpenTelemetry, but fall back gracefully
|
|
17
|
+
try:
|
|
18
|
+
if not TRACING_ENABLED:
|
|
19
|
+
raise ImportError("Tracing disabled via TRACING_ENABLED env var")
|
|
20
|
+
|
|
21
|
+
from opentelemetry import trace as _trace
|
|
22
|
+
from opentelemetry.trace import Status, StatusCode, Span, Tracer
|
|
23
|
+
|
|
24
|
+
HAS_OPENTELEMETRY = True
|
|
25
|
+
trace = _trace
|
|
26
|
+
|
|
27
|
+
except ImportError:
|
|
28
|
+
HAS_OPENTELEMETRY = False
|
|
29
|
+
|
|
30
|
+
# Create no-op implementations
|
|
31
|
+
class NoOpSpan:
|
|
32
|
+
"""No-op span that does nothing."""
|
|
33
|
+
def set_attribute(self, key: str, value: Any) -> None:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def set_attributes(self, attributes: dict) -> None:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def set_status(self, status: Any) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
def record_exception(self, exception: Exception) -> None:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def is_recording(self) -> bool:
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
def get_span_context(self) -> Any:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def __enter__(self):
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def __exit__(self, *args):
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
class NoOpTracer:
|
|
58
|
+
"""No-op tracer that does nothing."""
|
|
59
|
+
def start_as_current_span(self, name: str, *args, **kwargs):
|
|
60
|
+
return nullcontext(NoOpSpan())
|
|
61
|
+
|
|
62
|
+
def start_span(self, name: str, *args, **kwargs):
|
|
63
|
+
return NoOpSpan()
|
|
64
|
+
|
|
65
|
+
class NoOpTracerProvider:
|
|
66
|
+
"""No-op tracer provider."""
|
|
67
|
+
def get_tracer(self, *args, **kwargs) -> NoOpTracer:
|
|
68
|
+
return NoOpTracer()
|
|
69
|
+
|
|
70
|
+
class NoOpTrace:
|
|
71
|
+
"""No-op trace module replacement."""
|
|
72
|
+
@staticmethod
|
|
73
|
+
def get_tracer(name: str, version: str = "") -> NoOpTracer:
|
|
74
|
+
return NoOpTracer()
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def get_current_span() -> NoOpSpan:
|
|
78
|
+
return NoOpSpan()
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def get_tracer_provider() -> NoOpTracerProvider:
|
|
82
|
+
return NoOpTracerProvider()
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def set_tracer_provider(provider: Any) -> None:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
# Status and StatusCode placeholders
|
|
89
|
+
class Status:
|
|
90
|
+
"""No-op Status."""
|
|
91
|
+
def __init__(self, status_code: Any, description: str = ""):
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
class StatusCode:
|
|
95
|
+
"""No-op StatusCode."""
|
|
96
|
+
OK = "OK"
|
|
97
|
+
ERROR = "ERROR"
|
|
98
|
+
UNSET = "UNSET"
|
|
99
|
+
|
|
100
|
+
# Type hints
|
|
101
|
+
Span = NoOpSpan
|
|
102
|
+
Tracer = NoOpTracer
|
|
103
|
+
|
|
104
|
+
# Create the no-op trace module
|
|
105
|
+
trace = NoOpTrace()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_tracer(name: str, version: str = "") -> Tracer:
|
|
109
|
+
"""Get a tracer, or a no-op tracer if OpenTelemetry is not available."""
|
|
110
|
+
return trace.get_tracer(name, version)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_current_span() -> Span:
|
|
114
|
+
"""Get the current span, or a no-op span if OpenTelemetry is not available."""
|
|
115
|
+
return trace.get_current_span()
|