kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Trace Retention Service.
|
|
3
|
+
|
|
4
|
+
Handles automatic cleanup of old traces based on configurable retention period.
|
|
5
|
+
Also provides storage statistics per organization.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Configurable retention period (default: 30 days)
|
|
9
|
+
- Per-organization storage stats
|
|
10
|
+
- Batch deletion for performance
|
|
11
|
+
- Scheduled cleanup job support
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import structlog
|
|
15
|
+
from datetime import datetime, timezone, timedelta
|
|
16
|
+
from typing import Optional, Dict, Any
|
|
17
|
+
|
|
18
|
+
from sqlalchemy.orm import Session
|
|
19
|
+
from sqlalchemy import func, delete
|
|
20
|
+
|
|
21
|
+
from control_plane_api.app.config import settings
|
|
22
|
+
from control_plane_api.app.database import get_session_local
|
|
23
|
+
from control_plane_api.app.models.trace import Trace, Span
|
|
24
|
+
|
|
25
|
+
logger = structlog.get_logger()
|
|
26
|
+
|
|
27
|
+
# Configuration
|
|
28
|
+
RETENTION_DAYS = getattr(settings, 'OTEL_LOCAL_STORAGE_RETENTION_DAYS', 30)
|
|
29
|
+
BATCH_SIZE = 1000 # Number of traces to delete per batch
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TraceRetentionService:
|
|
33
|
+
"""Service for managing trace retention and cleanup."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, retention_days: int = None):
|
|
36
|
+
"""
|
|
37
|
+
Initialize the retention service.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
retention_days: Number of days to retain traces (default from config)
|
|
41
|
+
"""
|
|
42
|
+
self.retention_days = retention_days or RETENTION_DAYS
|
|
43
|
+
self._stats = {
|
|
44
|
+
"last_cleanup": None,
|
|
45
|
+
"traces_deleted": 0,
|
|
46
|
+
"spans_deleted": 0,
|
|
47
|
+
"errors": 0,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def get_cutoff_date(self) -> datetime:
|
|
51
|
+
"""Get the cutoff date for trace retention."""
|
|
52
|
+
return datetime.now(timezone.utc) - timedelta(days=self.retention_days)
|
|
53
|
+
|
|
54
|
+
async def cleanup_old_traces(
|
|
55
|
+
self,
|
|
56
|
+
organization_id: Optional[str] = None,
|
|
57
|
+
batch_size: int = BATCH_SIZE,
|
|
58
|
+
) -> Dict[str, int]:
|
|
59
|
+
"""
|
|
60
|
+
Delete traces older than the retention period.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
organization_id: Optional org to limit cleanup to
|
|
64
|
+
batch_size: Number of traces to delete per batch
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Dict with deletion statistics
|
|
68
|
+
"""
|
|
69
|
+
cutoff_date = self.get_cutoff_date()
|
|
70
|
+
total_traces_deleted = 0
|
|
71
|
+
total_spans_deleted = 0
|
|
72
|
+
|
|
73
|
+
logger.info(
|
|
74
|
+
"trace_cleanup_starting",
|
|
75
|
+
retention_days=self.retention_days,
|
|
76
|
+
cutoff_date=cutoff_date.isoformat(),
|
|
77
|
+
organization_id=organization_id,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
SessionLocal = get_session_local()
|
|
82
|
+
session = SessionLocal()
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
while True:
|
|
86
|
+
# Find old traces to delete
|
|
87
|
+
query = session.query(Trace).filter(
|
|
88
|
+
Trace.started_at < cutoff_date
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if organization_id:
|
|
92
|
+
query = query.filter(Trace.organization_id == organization_id)
|
|
93
|
+
|
|
94
|
+
# Get batch of trace IDs
|
|
95
|
+
traces_to_delete = query.limit(batch_size).all()
|
|
96
|
+
|
|
97
|
+
if not traces_to_delete:
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
trace_ids = [t.trace_id for t in traces_to_delete]
|
|
101
|
+
|
|
102
|
+
# Count spans being deleted (for stats)
|
|
103
|
+
span_count = session.query(func.count(Span.id)).filter(
|
|
104
|
+
Span.trace_id.in_(trace_ids)
|
|
105
|
+
).scalar()
|
|
106
|
+
|
|
107
|
+
# Delete traces (cascade will delete spans due to FK)
|
|
108
|
+
for trace in traces_to_delete:
|
|
109
|
+
session.delete(trace)
|
|
110
|
+
|
|
111
|
+
session.commit()
|
|
112
|
+
|
|
113
|
+
total_traces_deleted += len(trace_ids)
|
|
114
|
+
total_spans_deleted += span_count
|
|
115
|
+
|
|
116
|
+
logger.info(
|
|
117
|
+
"trace_cleanup_batch_completed",
|
|
118
|
+
traces_deleted=len(trace_ids),
|
|
119
|
+
spans_deleted=span_count,
|
|
120
|
+
total_traces_deleted=total_traces_deleted,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
session.rollback()
|
|
125
|
+
self._stats["errors"] += 1
|
|
126
|
+
logger.error("trace_cleanup_batch_failed", error=str(e), exc_info=True)
|
|
127
|
+
raise
|
|
128
|
+
|
|
129
|
+
finally:
|
|
130
|
+
session.close()
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
self._stats["errors"] += 1
|
|
134
|
+
logger.error("trace_cleanup_failed", error=str(e), exc_info=True)
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
# Update stats
|
|
138
|
+
self._stats["last_cleanup"] = datetime.now(timezone.utc).isoformat()
|
|
139
|
+
self._stats["traces_deleted"] += total_traces_deleted
|
|
140
|
+
self._stats["spans_deleted"] += total_spans_deleted
|
|
141
|
+
|
|
142
|
+
logger.info(
|
|
143
|
+
"trace_cleanup_completed",
|
|
144
|
+
total_traces_deleted=total_traces_deleted,
|
|
145
|
+
total_spans_deleted=total_spans_deleted,
|
|
146
|
+
retention_days=self.retention_days,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"traces_deleted": total_traces_deleted,
|
|
151
|
+
"spans_deleted": total_spans_deleted,
|
|
152
|
+
"cutoff_date": cutoff_date.isoformat(),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async def get_storage_stats(
|
|
156
|
+
self,
|
|
157
|
+
organization_id: Optional[str] = None,
|
|
158
|
+
) -> Dict[str, Any]:
|
|
159
|
+
"""
|
|
160
|
+
Get storage statistics for traces.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
organization_id: Optional org to get stats for
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Dict with storage statistics
|
|
167
|
+
"""
|
|
168
|
+
try:
|
|
169
|
+
SessionLocal = get_session_local()
|
|
170
|
+
session = SessionLocal()
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# Base queries
|
|
174
|
+
trace_query = session.query(Trace)
|
|
175
|
+
span_query = session.query(Span)
|
|
176
|
+
|
|
177
|
+
if organization_id:
|
|
178
|
+
trace_query = trace_query.filter(Trace.organization_id == organization_id)
|
|
179
|
+
span_query = span_query.filter(Span.organization_id == organization_id)
|
|
180
|
+
|
|
181
|
+
# Total counts
|
|
182
|
+
total_traces = trace_query.count()
|
|
183
|
+
total_spans = span_query.count()
|
|
184
|
+
|
|
185
|
+
# Counts by status
|
|
186
|
+
success_count = trace_query.filter(Trace.status == "success").count()
|
|
187
|
+
error_count = trace_query.filter(Trace.status == "error").count()
|
|
188
|
+
running_count = trace_query.filter(Trace.status == "running").count()
|
|
189
|
+
|
|
190
|
+
# Date range
|
|
191
|
+
oldest_trace = trace_query.order_by(Trace.started_at.asc()).first()
|
|
192
|
+
newest_trace = trace_query.order_by(Trace.started_at.desc()).first()
|
|
193
|
+
|
|
194
|
+
# Average metrics
|
|
195
|
+
avg_duration = session.query(func.avg(Trace.duration_ms)).filter(
|
|
196
|
+
Trace.duration_ms.isnot(None)
|
|
197
|
+
)
|
|
198
|
+
avg_span_count = session.query(func.avg(Trace.span_count))
|
|
199
|
+
|
|
200
|
+
if organization_id:
|
|
201
|
+
avg_duration = avg_duration.filter(Trace.organization_id == organization_id)
|
|
202
|
+
avg_span_count = avg_span_count.filter(Trace.organization_id == organization_id)
|
|
203
|
+
|
|
204
|
+
avg_duration_val = avg_duration.scalar()
|
|
205
|
+
avg_span_count_val = avg_span_count.scalar()
|
|
206
|
+
|
|
207
|
+
# Retention info
|
|
208
|
+
cutoff_date = self.get_cutoff_date()
|
|
209
|
+
traces_to_expire = trace_query.filter(Trace.started_at < cutoff_date).count()
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"total_traces": total_traces,
|
|
213
|
+
"total_spans": total_spans,
|
|
214
|
+
"status_breakdown": {
|
|
215
|
+
"success": success_count,
|
|
216
|
+
"error": error_count,
|
|
217
|
+
"running": running_count,
|
|
218
|
+
},
|
|
219
|
+
"date_range": {
|
|
220
|
+
"oldest": oldest_trace.started_at.isoformat() if oldest_trace else None,
|
|
221
|
+
"newest": newest_trace.started_at.isoformat() if newest_trace else None,
|
|
222
|
+
},
|
|
223
|
+
"averages": {
|
|
224
|
+
"duration_ms": round(avg_duration_val, 2) if avg_duration_val else None,
|
|
225
|
+
"span_count": round(avg_span_count_val, 2) if avg_span_count_val else None,
|
|
226
|
+
},
|
|
227
|
+
"retention": {
|
|
228
|
+
"retention_days": self.retention_days,
|
|
229
|
+
"cutoff_date": cutoff_date.isoformat(),
|
|
230
|
+
"traces_to_expire": traces_to_expire,
|
|
231
|
+
},
|
|
232
|
+
"organization_id": organization_id,
|
|
233
|
+
"retrieved_at": datetime.now(timezone.utc).isoformat(),
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
finally:
|
|
237
|
+
session.close()
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
logger.error("get_storage_stats_failed", error=str(e), exc_info=True)
|
|
241
|
+
raise
|
|
242
|
+
|
|
243
|
+
def get_service_stats(self) -> Dict[str, Any]:
|
|
244
|
+
"""Get retention service statistics."""
|
|
245
|
+
return {
|
|
246
|
+
**self._stats,
|
|
247
|
+
"retention_days": self.retention_days,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# Singleton instance
|
|
252
|
+
_retention_service: Optional[TraceRetentionService] = None
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def get_retention_service() -> TraceRetentionService:
|
|
256
|
+
"""Get the singleton retention service instance."""
|
|
257
|
+
global _retention_service
|
|
258
|
+
if _retention_service is None:
|
|
259
|
+
_retention_service = TraceRetentionService()
|
|
260
|
+
return _retention_service
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
async def run_cleanup_job():
|
|
264
|
+
"""
|
|
265
|
+
Run the trace cleanup job.
|
|
266
|
+
|
|
267
|
+
This function can be called from a scheduled job (e.g., Temporal workflow,
|
|
268
|
+
cron job, or APScheduler).
|
|
269
|
+
"""
|
|
270
|
+
service = get_retention_service()
|
|
271
|
+
result = await service.cleanup_old_traces()
|
|
272
|
+
|
|
273
|
+
logger.info(
|
|
274
|
+
"scheduled_trace_cleanup_completed",
|
|
275
|
+
**result
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return result
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# Scheduler configuration
|
|
282
|
+
CLEANUP_INTERVAL_HOURS = 6 # Run cleanup every 6 hours
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
async def _retention_scheduler_loop():
|
|
286
|
+
"""
|
|
287
|
+
Background loop that runs retention cleanup periodically.
|
|
288
|
+
|
|
289
|
+
Runs every CLEANUP_INTERVAL_HOURS to clean up old traces.
|
|
290
|
+
Uses asyncio.sleep for non-blocking scheduling.
|
|
291
|
+
"""
|
|
292
|
+
import asyncio
|
|
293
|
+
|
|
294
|
+
logger.info(
|
|
295
|
+
"retention_scheduler_started",
|
|
296
|
+
interval_hours=CLEANUP_INTERVAL_HOURS,
|
|
297
|
+
retention_days=RETENTION_DAYS,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
while True:
|
|
301
|
+
try:
|
|
302
|
+
# Wait for the interval
|
|
303
|
+
await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600)
|
|
304
|
+
|
|
305
|
+
# Run cleanup
|
|
306
|
+
logger.info("retention_cleanup_starting")
|
|
307
|
+
result = await run_cleanup_job()
|
|
308
|
+
logger.info(
|
|
309
|
+
"retention_cleanup_completed",
|
|
310
|
+
traces_deleted=result.get("traces_deleted", 0),
|
|
311
|
+
spans_deleted=result.get("spans_deleted", 0),
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
except asyncio.CancelledError:
|
|
315
|
+
logger.info("retention_scheduler_cancelled")
|
|
316
|
+
break
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.error(
|
|
319
|
+
"retention_scheduler_error",
|
|
320
|
+
error=str(e),
|
|
321
|
+
exc_info=True,
|
|
322
|
+
)
|
|
323
|
+
# Continue running despite errors
|
|
324
|
+
await asyncio.sleep(60) # Brief pause before retrying
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
async def start_retention_scheduler():
|
|
328
|
+
"""
|
|
329
|
+
Start the retention cleanup scheduler as a background task.
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
asyncio.Task: The scheduler task (for cancellation on shutdown)
|
|
333
|
+
"""
|
|
334
|
+
import asyncio
|
|
335
|
+
|
|
336
|
+
task = asyncio.create_task(_retention_scheduler_loop())
|
|
337
|
+
return task
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
async def stop_retention_scheduler(task):
|
|
341
|
+
"""
|
|
342
|
+
Stop the retention cleanup scheduler.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
task: The scheduler task returned by start_retention_scheduler()
|
|
346
|
+
"""
|
|
347
|
+
if task and not task.done():
|
|
348
|
+
task.cancel()
|
|
349
|
+
try:
|
|
350
|
+
await task
|
|
351
|
+
except Exception:
|
|
352
|
+
pass # Task cancelled, ignore
|
|
353
|
+
|
|
354
|
+
logger.info("retention_scheduler_stopped")
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Worker Queue Metrics Service.
|
|
3
|
+
|
|
4
|
+
This service provides business logic for calculating worker queue metrics
|
|
5
|
+
including worker health, task statistics, and performance metrics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from typing import Optional, Dict
|
|
11
|
+
from sqlalchemy.orm import Session
|
|
12
|
+
from sqlalchemy import func
|
|
13
|
+
|
|
14
|
+
from control_plane_api.app.models.worker import WorkerQueue, WorkerHeartbeat
|
|
15
|
+
from control_plane_api.app.models.execution import Execution
|
|
16
|
+
from control_plane_api.app.schemas.worker_queue_observability_schemas import WorkerQueueMetricsResponse
|
|
17
|
+
|
|
18
|
+
logger = structlog.get_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class WorkerQueueMetricsService:
|
|
22
|
+
"""Service for calculating worker queue metrics"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db: Session):
|
|
25
|
+
self.db = db
|
|
26
|
+
|
|
27
|
+
async def get_queue_metrics(
|
|
28
|
+
self,
|
|
29
|
+
queue_id: str,
|
|
30
|
+
organization_id: str
|
|
31
|
+
) -> WorkerQueueMetricsResponse:
|
|
32
|
+
"""
|
|
33
|
+
Calculate comprehensive metrics for a worker queue.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
queue_id: Worker queue UUID
|
|
37
|
+
organization_id: Organization ID
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
WorkerQueueMetricsResponse with calculated metrics
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValueError: If queue not found or doesn't belong to organization
|
|
44
|
+
"""
|
|
45
|
+
# Verify queue exists and belongs to organization
|
|
46
|
+
queue = self.db.query(WorkerQueue).filter(
|
|
47
|
+
WorkerQueue.id == queue_id,
|
|
48
|
+
WorkerQueue.organization_id == organization_id
|
|
49
|
+
).first()
|
|
50
|
+
|
|
51
|
+
if not queue:
|
|
52
|
+
raise ValueError("Worker queue not found")
|
|
53
|
+
|
|
54
|
+
now = datetime.utcnow()
|
|
55
|
+
|
|
56
|
+
# Calculate worker status counts
|
|
57
|
+
worker_stats = self._get_worker_status_counts(queue_id, now)
|
|
58
|
+
|
|
59
|
+
# Calculate 24h task metrics
|
|
60
|
+
task_metrics = self._get_task_metrics_24h(queue_id, now)
|
|
61
|
+
|
|
62
|
+
# Get last activity timestamp
|
|
63
|
+
last_activity = self._get_last_activity(queue_id)
|
|
64
|
+
|
|
65
|
+
# Build response
|
|
66
|
+
return WorkerQueueMetricsResponse(
|
|
67
|
+
queue_id=queue_id,
|
|
68
|
+
active_workers=worker_stats["active"],
|
|
69
|
+
idle_workers=worker_stats["idle"],
|
|
70
|
+
busy_workers=worker_stats["busy"],
|
|
71
|
+
total_workers=worker_stats["total"],
|
|
72
|
+
tasks_processed_24h=task_metrics["processed"],
|
|
73
|
+
tasks_failed_24h=task_metrics["failed"],
|
|
74
|
+
tasks_pending=task_metrics["pending"],
|
|
75
|
+
avg_task_duration_ms=task_metrics["avg_duration_ms"],
|
|
76
|
+
error_rate_percent=task_metrics["error_rate"],
|
|
77
|
+
last_error_at=task_metrics["last_error_at"],
|
|
78
|
+
task_queue_backlog=0, # TODO: Implement Temporal queue metrics
|
|
79
|
+
task_queue_pollers=0, # TODO: Implement Temporal queue metrics
|
|
80
|
+
last_activity_at=last_activity,
|
|
81
|
+
updated_at=now
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _get_worker_status_counts(self, queue_id: str, now: datetime) -> Dict[str, int]:
|
|
85
|
+
"""
|
|
86
|
+
Get worker status counts from WorkerHeartbeat table.
|
|
87
|
+
|
|
88
|
+
Workers are considered stale if last_heartbeat > 90 seconds ago.
|
|
89
|
+
"""
|
|
90
|
+
stale_threshold = now - timedelta(seconds=90)
|
|
91
|
+
|
|
92
|
+
# Query recent heartbeats
|
|
93
|
+
heartbeats = self.db.query(WorkerHeartbeat).filter(
|
|
94
|
+
WorkerHeartbeat.worker_queue_id == queue_id,
|
|
95
|
+
WorkerHeartbeat.last_heartbeat > stale_threshold
|
|
96
|
+
).all()
|
|
97
|
+
|
|
98
|
+
active = sum(1 for hb in heartbeats if hb.status == "active")
|
|
99
|
+
idle = sum(1 for hb in heartbeats if hb.status == "idle")
|
|
100
|
+
busy = sum(1 for hb in heartbeats if hb.status == "busy")
|
|
101
|
+
total = len(heartbeats)
|
|
102
|
+
|
|
103
|
+
logger.info(
|
|
104
|
+
"worker_status_calculated",
|
|
105
|
+
queue_id=queue_id,
|
|
106
|
+
active=active,
|
|
107
|
+
idle=idle,
|
|
108
|
+
busy=busy,
|
|
109
|
+
total=total
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
"active": active,
|
|
114
|
+
"idle": idle,
|
|
115
|
+
"busy": busy,
|
|
116
|
+
"total": total
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
def _get_task_metrics_24h(self, queue_id: str, now: datetime) -> Dict:
|
|
120
|
+
"""Calculate task metrics for the last 24 hours"""
|
|
121
|
+
twenty_four_hours_ago = now - timedelta(hours=24)
|
|
122
|
+
|
|
123
|
+
# Get executions in last 24h
|
|
124
|
+
executions_24h = self.db.query(Execution).filter(
|
|
125
|
+
Execution.worker_queue_id == queue_id,
|
|
126
|
+
Execution.created_at >= twenty_four_hours_ago
|
|
127
|
+
).all()
|
|
128
|
+
|
|
129
|
+
# Count processed and failed tasks
|
|
130
|
+
processed = sum(1 for e in executions_24h if e.status in ["completed", "failed"])
|
|
131
|
+
failed = sum(1 for e in executions_24h if e.status == "failed")
|
|
132
|
+
|
|
133
|
+
# Get pending tasks count
|
|
134
|
+
pending = self.db.query(Execution).filter(
|
|
135
|
+
Execution.worker_queue_id == queue_id,
|
|
136
|
+
Execution.status == "pending"
|
|
137
|
+
).count()
|
|
138
|
+
|
|
139
|
+
# Calculate average duration for completed tasks
|
|
140
|
+
completed_executions = [
|
|
141
|
+
e for e in executions_24h
|
|
142
|
+
if e.status == "completed" and e.started_at and e.completed_at
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
if completed_executions:
|
|
146
|
+
total_duration_ms = sum(
|
|
147
|
+
(e.completed_at - e.started_at).total_seconds() * 1000
|
|
148
|
+
for e in completed_executions
|
|
149
|
+
)
|
|
150
|
+
avg_duration_ms = total_duration_ms / len(completed_executions)
|
|
151
|
+
else:
|
|
152
|
+
avg_duration_ms = 0
|
|
153
|
+
|
|
154
|
+
# Calculate error rate
|
|
155
|
+
error_rate = (failed / processed * 100) if processed > 0 else 0
|
|
156
|
+
|
|
157
|
+
# Get last error timestamp
|
|
158
|
+
last_error = self.db.query(Execution).filter(
|
|
159
|
+
Execution.worker_queue_id == queue_id,
|
|
160
|
+
Execution.status == "failed"
|
|
161
|
+
).order_by(Execution.completed_at.desc()).first()
|
|
162
|
+
|
|
163
|
+
last_error_at = last_error.completed_at if last_error else None
|
|
164
|
+
|
|
165
|
+
logger.info(
|
|
166
|
+
"task_metrics_calculated",
|
|
167
|
+
queue_id=queue_id,
|
|
168
|
+
processed=processed,
|
|
169
|
+
failed=failed,
|
|
170
|
+
pending=pending,
|
|
171
|
+
avg_duration_ms=avg_duration_ms,
|
|
172
|
+
error_rate=error_rate
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"processed": processed,
|
|
177
|
+
"failed": failed,
|
|
178
|
+
"pending": pending,
|
|
179
|
+
"avg_duration_ms": avg_duration_ms,
|
|
180
|
+
"error_rate": error_rate,
|
|
181
|
+
"last_error_at": last_error_at
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
def _get_last_activity(self, queue_id: str) -> Optional[datetime]:
|
|
185
|
+
"""Get timestamp of last worker activity"""
|
|
186
|
+
last_activity = self.db.query(WorkerHeartbeat).filter(
|
|
187
|
+
WorkerHeartbeat.worker_queue_id == queue_id
|
|
188
|
+
).order_by(WorkerHeartbeat.last_heartbeat.desc()).first()
|
|
189
|
+
|
|
190
|
+
return last_activity.last_heartbeat if last_activity else None
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow Cancellation Manager - handles workflow-specific cancellation without affecting the agent execution.
|
|
3
|
+
|
|
4
|
+
This allows cancelling individual workflow tool calls while the agent continues to run.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Set, Any, Optional
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
import structlog
|
|
10
|
+
import threading
|
|
11
|
+
|
|
12
|
+
logger = structlog.get_logger()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WorkflowCancellationManager:
|
|
16
|
+
"""
|
|
17
|
+
Manages cancellation flags for active workflow executions.
|
|
18
|
+
|
|
19
|
+
Workflow tool calls check these flags periodically to see if they should stop,
|
|
20
|
+
without affecting the parent agent/team execution.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
# Key: workflow_execution_key (execution_id + workflow_message_id), Value: cancellation time
|
|
25
|
+
self._cancelled_workflows: Dict[str, str] = {}
|
|
26
|
+
# Key: workflow_execution_key, Value: threading.Event for immediate cancellation
|
|
27
|
+
self._cancellation_events: Dict[str, threading.Event] = {}
|
|
28
|
+
self._lock = threading.Lock()
|
|
29
|
+
|
|
30
|
+
def _make_key(self, execution_id: str, workflow_message_id: str) -> str:
|
|
31
|
+
"""Create a unique key for a workflow execution."""
|
|
32
|
+
return f"{execution_id}:{workflow_message_id}"
|
|
33
|
+
|
|
34
|
+
def register_workflow(self, execution_id: str, workflow_message_id: str) -> threading.Event:
|
|
35
|
+
"""
|
|
36
|
+
Register a workflow execution and get a cancellation event.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
execution_id: The agent execution ID
|
|
40
|
+
workflow_message_id: The unique workflow message ID
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
threading.Event that will be set when cancellation is requested
|
|
44
|
+
"""
|
|
45
|
+
with self._lock:
|
|
46
|
+
key = self._make_key(execution_id, workflow_message_id)
|
|
47
|
+
event = threading.Event()
|
|
48
|
+
self._cancellation_events[key] = event
|
|
49
|
+
|
|
50
|
+
logger.info(
|
|
51
|
+
"workflow_registered",
|
|
52
|
+
execution_id=execution_id[:8],
|
|
53
|
+
workflow_message_id=workflow_message_id[-12:],
|
|
54
|
+
key=key
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return event
|
|
58
|
+
|
|
59
|
+
def request_cancellation(self, execution_id: str, workflow_message_id: str) -> bool:
|
|
60
|
+
"""
|
|
61
|
+
Request cancellation of a specific workflow.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
execution_id: The agent execution ID
|
|
65
|
+
workflow_message_id: The unique workflow message ID
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
True if cancellation was requested, False if workflow not found
|
|
69
|
+
"""
|
|
70
|
+
with self._lock:
|
|
71
|
+
key = self._make_key(execution_id, workflow_message_id)
|
|
72
|
+
self._cancelled_workflows[key] = datetime.now(timezone.utc).isoformat()
|
|
73
|
+
|
|
74
|
+
# Signal the cancellation event immediately
|
|
75
|
+
if key in self._cancellation_events:
|
|
76
|
+
self._cancellation_events[key].set()
|
|
77
|
+
logger.info(
|
|
78
|
+
"workflow_cancellation_event_signaled",
|
|
79
|
+
execution_id=execution_id[:8],
|
|
80
|
+
workflow_message_id=workflow_message_id[-12:],
|
|
81
|
+
key=key
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
logger.info(
|
|
85
|
+
"workflow_cancellation_requested",
|
|
86
|
+
execution_id=execution_id[:8],
|
|
87
|
+
workflow_message_id=workflow_message_id[-12:],
|
|
88
|
+
key=key
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
def is_cancelled(self, execution_id: str, workflow_message_id: str) -> bool:
|
|
94
|
+
"""
|
|
95
|
+
Check if a workflow has been cancelled.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
execution_id: The agent execution ID
|
|
99
|
+
workflow_message_id: The unique workflow message ID
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
True if the workflow has been cancelled
|
|
103
|
+
"""
|
|
104
|
+
with self._lock:
|
|
105
|
+
key = self._make_key(execution_id, workflow_message_id)
|
|
106
|
+
return key in self._cancelled_workflows
|
|
107
|
+
|
|
108
|
+
def clear_cancellation(self, execution_id: str, workflow_message_id: str) -> None:
|
|
109
|
+
"""
|
|
110
|
+
Clear the cancellation flag for a workflow (called when workflow completes/fails).
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
execution_id: The agent execution ID
|
|
114
|
+
workflow_message_id: The unique workflow message ID
|
|
115
|
+
"""
|
|
116
|
+
with self._lock:
|
|
117
|
+
key = self._make_key(execution_id, workflow_message_id)
|
|
118
|
+
if key in self._cancelled_workflows:
|
|
119
|
+
del self._cancelled_workflows[key]
|
|
120
|
+
if key in self._cancellation_events:
|
|
121
|
+
del self._cancellation_events[key]
|
|
122
|
+
logger.info(
|
|
123
|
+
"workflow_cancellation_cleared",
|
|
124
|
+
execution_id=execution_id[:8],
|
|
125
|
+
workflow_message_id=workflow_message_id[-12:]
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def get_active_count(self) -> int:
|
|
129
|
+
"""Get number of workflows with pending cancellation."""
|
|
130
|
+
with self._lock:
|
|
131
|
+
return len(self._cancelled_workflows)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# Global singleton instance
|
|
135
|
+
workflow_cancellation_manager = WorkflowCancellationManager()
|