kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,1491 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Control Plane Client - Clean API for worker to communicate with Control Plane.
|
|
3
|
+
|
|
4
|
+
This centralizes all HTTP and WebSocket communication between worker and Control Plane,
|
|
5
|
+
providing a clean interface for:
|
|
6
|
+
- Event streaming (real-time UI updates via WebSocket or HTTP fallback)
|
|
7
|
+
- Session persistence (history storage)
|
|
8
|
+
- Metadata caching (execution types)
|
|
9
|
+
- Skill resolution
|
|
10
|
+
- Bi-directional control messages
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
from control_plane_client import get_control_plane_client
|
|
14
|
+
|
|
15
|
+
client = get_control_plane_client()
|
|
16
|
+
await client.start_websocket() # If WebSocket enabled
|
|
17
|
+
await client.publish_event_async(execution_id, "message_chunk", {...})
|
|
18
|
+
client.persist_session(execution_id, session_id, user_id, messages)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import httpx
|
|
23
|
+
import asyncio
|
|
24
|
+
import threading
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
from typing import Optional, Dict, List, Any
|
|
27
|
+
import structlog
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ControlPlaneClient:
|
|
33
|
+
"""Client for communicating with the Control Plane API from workers."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
base_url: str,
|
|
38
|
+
api_key: str,
|
|
39
|
+
websocket_enabled: bool = False,
|
|
40
|
+
websocket_url: Optional[str] = None,
|
|
41
|
+
worker_id: Optional[str] = None,
|
|
42
|
+
event_bus_config: Optional[Dict[str, Any]] = None
|
|
43
|
+
):
|
|
44
|
+
"""
|
|
45
|
+
Initialize Control Plane client.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
base_url: Control Plane URL (e.g., http://localhost:8000)
|
|
49
|
+
api_key: Kubiya API key for authentication
|
|
50
|
+
websocket_enabled: Whether WebSocket is enabled
|
|
51
|
+
websocket_url: WebSocket URL if enabled
|
|
52
|
+
worker_id: Worker ID for WebSocket connection
|
|
53
|
+
event_bus_config: Optional event bus configuration dict
|
|
54
|
+
"""
|
|
55
|
+
self.base_url = base_url.rstrip("/")
|
|
56
|
+
self.api_key = api_key
|
|
57
|
+
self.headers = {"Authorization": f"UserKey {api_key}"}
|
|
58
|
+
self.worker_id = worker_id
|
|
59
|
+
|
|
60
|
+
# Event bus manager for multi-provider support
|
|
61
|
+
self.event_bus_manager = None
|
|
62
|
+
if event_bus_config:
|
|
63
|
+
try:
|
|
64
|
+
from control_plane_api.app.lib.event_bus.manager import (
|
|
65
|
+
EventBusManager,
|
|
66
|
+
EventBusManagerConfig,
|
|
67
|
+
)
|
|
68
|
+
from control_plane_api.app.lib.event_bus.providers.http_provider import HTTPConfig
|
|
69
|
+
from control_plane_api.app.lib.event_bus.providers.redis_provider import RedisConfig
|
|
70
|
+
from control_plane_api.app.lib.event_bus.providers.websocket_provider import WebSocketConfig
|
|
71
|
+
|
|
72
|
+
# Parse config dicts into config objects
|
|
73
|
+
parsed_config = {}
|
|
74
|
+
|
|
75
|
+
if "http" in event_bus_config and event_bus_config["http"]:
|
|
76
|
+
parsed_config["http"] = HTTPConfig(**event_bus_config["http"])
|
|
77
|
+
|
|
78
|
+
if "redis" in event_bus_config and event_bus_config["redis"]:
|
|
79
|
+
parsed_config["redis"] = RedisConfig(**event_bus_config["redis"])
|
|
80
|
+
|
|
81
|
+
if "websocket" in event_bus_config and event_bus_config["websocket"]:
|
|
82
|
+
parsed_config["websocket"] = WebSocketConfig(**event_bus_config["websocket"])
|
|
83
|
+
|
|
84
|
+
if "nats" in event_bus_config and event_bus_config["nats"]:
|
|
85
|
+
try:
|
|
86
|
+
from control_plane_api.app.lib.event_bus.providers.nats_provider import NATSConfig
|
|
87
|
+
parsed_config["nats"] = NATSConfig(**event_bus_config["nats"])
|
|
88
|
+
except ImportError:
|
|
89
|
+
logger.warning("nats_provider_not_installed", message="Install with: pip install kubiya-control-plane-api[nats]")
|
|
90
|
+
|
|
91
|
+
manager_config = EventBusManagerConfig(**parsed_config)
|
|
92
|
+
self.event_bus_manager = EventBusManager(manager_config)
|
|
93
|
+
logger.info(
|
|
94
|
+
"worker_event_bus_initialized",
|
|
95
|
+
worker_id=worker_id[:8] if worker_id else "unknown",
|
|
96
|
+
providers=list(parsed_config.keys())
|
|
97
|
+
)
|
|
98
|
+
except ImportError as e:
|
|
99
|
+
logger.warning(
|
|
100
|
+
"event_bus_dependencies_missing",
|
|
101
|
+
error=str(e),
|
|
102
|
+
message="Install event bus dependencies with: pip install kubiya-control-plane-api[event-bus]"
|
|
103
|
+
)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(
|
|
106
|
+
"worker_event_bus_init_failed",
|
|
107
|
+
error=str(e),
|
|
108
|
+
worker_id=worker_id[:8] if worker_id else "unknown"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Thread-local storage for event loop reuse in sync context
|
|
112
|
+
# This prevents creating a new event loop per publish_event() call
|
|
113
|
+
self._thread_local = threading.local()
|
|
114
|
+
|
|
115
|
+
# Use BOTH sync and async clients for different use cases
|
|
116
|
+
# Sync client for backwards compatibility with non-async code
|
|
117
|
+
self._client = httpx.Client(
|
|
118
|
+
timeout=httpx.Timeout(30.0, connect=5.0, read=30.0, write=10.0),
|
|
119
|
+
limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Async client for streaming/real-time operations
|
|
123
|
+
# Longer read timeout to handle streaming scenarios
|
|
124
|
+
self._async_client = httpx.AsyncClient(
|
|
125
|
+
timeout=httpx.Timeout(60.0, connect=5.0, read=60.0, write=10.0),
|
|
126
|
+
limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# WebSocket client for persistent connection
|
|
130
|
+
self.websocket_client: Optional[Any] = None
|
|
131
|
+
|
|
132
|
+
# Initialize WebSocket client if enabled and environment supports it
|
|
133
|
+
if websocket_enabled and websocket_url and worker_id:
|
|
134
|
+
from control_plane_api.worker.utils.environment import should_use_websocket
|
|
135
|
+
|
|
136
|
+
if should_use_websocket():
|
|
137
|
+
from control_plane_api.worker.websocket_client import WorkerWebSocketClient
|
|
138
|
+
|
|
139
|
+
self.websocket_client = WorkerWebSocketClient(
|
|
140
|
+
worker_id=worker_id,
|
|
141
|
+
websocket_url=websocket_url,
|
|
142
|
+
api_key=api_key,
|
|
143
|
+
on_control_message=self._handle_control_message
|
|
144
|
+
)
|
|
145
|
+
logger.info("websocket_client_initialized", worker_id=worker_id[:8])
|
|
146
|
+
else:
|
|
147
|
+
logger.info("websocket_skipped_serverless_environment")
|
|
148
|
+
|
|
149
|
+
# SSE stream completion tracking for single execution mode
|
|
150
|
+
# This allows the worker to wait for SSE streaming to complete before shutdown
|
|
151
|
+
self._sse_stream_completed: Dict[str, asyncio.Event] = {}
|
|
152
|
+
self._sse_completion_lock = asyncio.Lock()
|
|
153
|
+
|
|
154
|
+
def __del__(self):
|
|
155
|
+
"""Close the HTTP clients on cleanup."""
|
|
156
|
+
try:
|
|
157
|
+
self._client.close()
|
|
158
|
+
except:
|
|
159
|
+
pass
|
|
160
|
+
# Async client cleanup happens via context manager or explicit close
|
|
161
|
+
|
|
162
|
+
def _get_thread_event_loop(self) -> asyncio.AbstractEventLoop:
|
|
163
|
+
"""
|
|
164
|
+
Get or create a persistent event loop for the current thread.
|
|
165
|
+
|
|
166
|
+
This reuses the same event loop for all publish_event() calls within
|
|
167
|
+
a thread, preventing resource leaks and "await wasn't used with future"
|
|
168
|
+
errors that occur when creating a new loop per call.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
The thread-local event loop
|
|
172
|
+
"""
|
|
173
|
+
if not hasattr(self._thread_local, 'loop') or self._thread_local.loop is None or self._thread_local.loop.is_closed():
|
|
174
|
+
self._thread_local.loop = asyncio.new_event_loop()
|
|
175
|
+
asyncio.set_event_loop(self._thread_local.loop)
|
|
176
|
+
logger.debug(
|
|
177
|
+
"created_thread_local_event_loop",
|
|
178
|
+
thread_id=threading.current_thread().ident,
|
|
179
|
+
thread_name=threading.current_thread().name,
|
|
180
|
+
)
|
|
181
|
+
return self._thread_local.loop
|
|
182
|
+
|
|
183
|
+
def close_thread_event_loop(self):
|
|
184
|
+
"""
|
|
185
|
+
Close the thread-local event loop if it exists.
|
|
186
|
+
|
|
187
|
+
Call this when the thread is done publishing events (e.g., at end of
|
|
188
|
+
Agno streaming execution) to properly clean up resources.
|
|
189
|
+
"""
|
|
190
|
+
if hasattr(self._thread_local, 'loop') and self._thread_local.loop is not None:
|
|
191
|
+
loop = self._thread_local.loop
|
|
192
|
+
if not loop.is_closed():
|
|
193
|
+
try:
|
|
194
|
+
# Cancel any pending tasks
|
|
195
|
+
pending = asyncio.all_tasks(loop)
|
|
196
|
+
for task in pending:
|
|
197
|
+
task.cancel()
|
|
198
|
+
|
|
199
|
+
# Run loop until all tasks are cancelled
|
|
200
|
+
if pending:
|
|
201
|
+
loop.run_until_complete(
|
|
202
|
+
asyncio.gather(*pending, return_exceptions=True)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
loop.close()
|
|
206
|
+
logger.debug(
|
|
207
|
+
"closed_thread_local_event_loop",
|
|
208
|
+
thread_id=threading.current_thread().ident,
|
|
209
|
+
pending_tasks_cancelled=len(pending) if pending else 0,
|
|
210
|
+
)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.warning(
|
|
213
|
+
"thread_event_loop_close_error",
|
|
214
|
+
error=str(e),
|
|
215
|
+
thread_id=threading.current_thread().ident,
|
|
216
|
+
)
|
|
217
|
+
self._thread_local.loop = None
|
|
218
|
+
|
|
219
|
+
# =========================================================================
|
|
220
|
+
# SSE Stream Completion Tracking (for single execution mode)
|
|
221
|
+
# =========================================================================
|
|
222
|
+
|
|
223
|
+
def register_execution_for_sse_tracking(self, execution_id: str):
|
|
224
|
+
"""
|
|
225
|
+
Register an execution for SSE completion tracking.
|
|
226
|
+
|
|
227
|
+
Call this when an execution starts so the worker can later wait for
|
|
228
|
+
the SSE stream to complete before shutting down.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
execution_id: The execution ID to track
|
|
232
|
+
"""
|
|
233
|
+
if execution_id not in self._sse_stream_completed:
|
|
234
|
+
self._sse_stream_completed[execution_id] = asyncio.Event()
|
|
235
|
+
logger.debug(
|
|
236
|
+
"sse_tracking_registered",
|
|
237
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def mark_sse_stream_completed(self, execution_id: str):
|
|
241
|
+
"""
|
|
242
|
+
Signal that SSE streaming has completed for an execution.
|
|
243
|
+
|
|
244
|
+
Call this from the SSE streamer after sending the 'done' event.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
execution_id: The execution ID whose SSE stream completed
|
|
248
|
+
"""
|
|
249
|
+
if execution_id in self._sse_stream_completed:
|
|
250
|
+
self._sse_stream_completed[execution_id].set()
|
|
251
|
+
logger.info(
|
|
252
|
+
"sse_stream_marked_completed",
|
|
253
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
# Auto-register and mark complete if not pre-registered
|
|
257
|
+
self._sse_stream_completed[execution_id] = asyncio.Event()
|
|
258
|
+
self._sse_stream_completed[execution_id].set()
|
|
259
|
+
logger.debug(
|
|
260
|
+
"sse_stream_marked_completed_auto_registered",
|
|
261
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
async def wait_for_sse_stream_completion(
|
|
265
|
+
self,
|
|
266
|
+
execution_id: str,
|
|
267
|
+
timeout: float = 30.0
|
|
268
|
+
) -> bool:
|
|
269
|
+
"""
|
|
270
|
+
Wait for SSE stream to complete, with timeout.
|
|
271
|
+
|
|
272
|
+
Call this from the single execution monitor before shutting down
|
|
273
|
+
to ensure all SSE events have been sent to the client.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
execution_id: The execution ID to wait for
|
|
277
|
+
timeout: Maximum seconds to wait (default: 30s)
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
True if SSE stream completed, False if timeout reached
|
|
281
|
+
"""
|
|
282
|
+
# Auto-register if not already tracked
|
|
283
|
+
if execution_id not in self._sse_stream_completed:
|
|
284
|
+
self._sse_stream_completed[execution_id] = asyncio.Event()
|
|
285
|
+
|
|
286
|
+
try:
|
|
287
|
+
await asyncio.wait_for(
|
|
288
|
+
self._sse_stream_completed[execution_id].wait(),
|
|
289
|
+
timeout=timeout
|
|
290
|
+
)
|
|
291
|
+
logger.info(
|
|
292
|
+
"sse_stream_wait_completed",
|
|
293
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
294
|
+
)
|
|
295
|
+
return True
|
|
296
|
+
except asyncio.TimeoutError:
|
|
297
|
+
logger.warning(
|
|
298
|
+
"sse_stream_wait_timeout",
|
|
299
|
+
execution_id=execution_id[:8] if execution_id else None,
|
|
300
|
+
timeout_seconds=timeout
|
|
301
|
+
)
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
def cleanup_sse_tracking(self, execution_id: str):
|
|
305
|
+
"""
|
|
306
|
+
Clean up SSE tracking for an execution.
|
|
307
|
+
|
|
308
|
+
Call this after the execution is fully complete and the worker
|
|
309
|
+
has confirmed SSE streaming is done.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
execution_id: The execution ID to clean up
|
|
313
|
+
"""
|
|
314
|
+
if execution_id in self._sse_stream_completed:
|
|
315
|
+
del self._sse_stream_completed[execution_id]
|
|
316
|
+
logger.debug(
|
|
317
|
+
"sse_tracking_cleaned_up",
|
|
318
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
async def initialize_event_bus(self):
|
|
322
|
+
"""Initialize event bus manager asynchronously with connection testing."""
|
|
323
|
+
if self.event_bus_manager and not self.event_bus_manager.is_initialized():
|
|
324
|
+
try:
|
|
325
|
+
await self.event_bus_manager.initialize()
|
|
326
|
+
|
|
327
|
+
# Test provider connectivity (especially Redis)
|
|
328
|
+
provider_health = {}
|
|
329
|
+
for provider_name, provider in self.event_bus_manager.providers.items():
|
|
330
|
+
try:
|
|
331
|
+
health = await provider.health_check()
|
|
332
|
+
provider_health[provider_name] = health.get("healthy", False)
|
|
333
|
+
except Exception as e:
|
|
334
|
+
logger.warning(
|
|
335
|
+
"provider_health_check_failed",
|
|
336
|
+
provider=provider_name,
|
|
337
|
+
error=str(e),
|
|
338
|
+
worker_id=self.worker_id[:8] if self.worker_id else "unknown"
|
|
339
|
+
)
|
|
340
|
+
provider_health[provider_name] = False
|
|
341
|
+
|
|
342
|
+
# Log provider status
|
|
343
|
+
healthy_providers = [name for name, healthy in provider_health.items() if healthy]
|
|
344
|
+
unhealthy_providers = [name for name, healthy in provider_health.items() if not healthy]
|
|
345
|
+
|
|
346
|
+
if healthy_providers:
|
|
347
|
+
logger.info(
|
|
348
|
+
"worker_event_bus_ready",
|
|
349
|
+
worker_id=self.worker_id[:8] if self.worker_id else "unknown",
|
|
350
|
+
providers=self.event_bus_manager.get_provider_names(),
|
|
351
|
+
healthy_providers=healthy_providers,
|
|
352
|
+
unhealthy_providers=unhealthy_providers if unhealthy_providers else None
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# If Redis failed but was configured, log warning about falling back to HTTP
|
|
356
|
+
if "redis" in unhealthy_providers:
|
|
357
|
+
logger.warning(
|
|
358
|
+
"redis_connection_failed_will_fallback",
|
|
359
|
+
worker_id=self.worker_id[:8] if self.worker_id else "unknown",
|
|
360
|
+
message="Redis unavailable, will fallback to HTTP endpoint for event streaming"
|
|
361
|
+
)
|
|
362
|
+
else:
|
|
363
|
+
logger.warning(
|
|
364
|
+
"all_event_bus_providers_unhealthy",
|
|
365
|
+
worker_id=self.worker_id[:8] if self.worker_id else "unknown",
|
|
366
|
+
providers=list(provider_health.keys()),
|
|
367
|
+
message="Will fallback to HTTP endpoint for event streaming"
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.error(
|
|
372
|
+
"worker_event_bus_init_failed",
|
|
373
|
+
error=str(e),
|
|
374
|
+
worker_id=self.worker_id[:8] if self.worker_id else "unknown"
|
|
375
|
+
)
|
|
376
|
+
# Don't fail initialization - just won't use event bus
|
|
377
|
+
self.event_bus_manager = None
|
|
378
|
+
|
|
379
|
+
async def aclose(self):
|
|
380
|
+
"""Async cleanup for async client and event bus."""
|
|
381
|
+
try:
|
|
382
|
+
# Shutdown event bus first
|
|
383
|
+
if self.event_bus_manager:
|
|
384
|
+
await self.event_bus_manager.shutdown()
|
|
385
|
+
logger.info("worker_event_bus_shutdown", worker_id=self.worker_id[:8] if self.worker_id else "unknown")
|
|
386
|
+
|
|
387
|
+
# Then close async client
|
|
388
|
+
await self._async_client.aclose()
|
|
389
|
+
except:
|
|
390
|
+
pass
|
|
391
|
+
|
|
392
|
+
async def start_websocket(self):
|
|
393
|
+
"""Start WebSocket client if enabled."""
|
|
394
|
+
if self.websocket_client:
|
|
395
|
+
await self.websocket_client.start()
|
|
396
|
+
logger.info("websocket_started")
|
|
397
|
+
|
|
398
|
+
async def stop_websocket(self):
|
|
399
|
+
"""Stop WebSocket client if running."""
|
|
400
|
+
if self.websocket_client:
|
|
401
|
+
await self.websocket_client.stop()
|
|
402
|
+
logger.info("websocket_stopped")
|
|
403
|
+
|
|
404
|
+
def _get_running_loop_safe(self) -> Optional[asyncio.AbstractEventLoop]:
|
|
405
|
+
"""
|
|
406
|
+
Safely get the running event loop if one exists.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
The running event loop, or None if not in an async context
|
|
410
|
+
"""
|
|
411
|
+
try:
|
|
412
|
+
return asyncio.get_running_loop()
|
|
413
|
+
except RuntimeError:
|
|
414
|
+
# No running loop in this thread
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
def publish_event(
|
|
418
|
+
self,
|
|
419
|
+
execution_id: str,
|
|
420
|
+
event_type: str,
|
|
421
|
+
data: Dict[str, Any],
|
|
422
|
+
) -> bool:
|
|
423
|
+
"""
|
|
424
|
+
Publish a streaming event for real-time UI updates (SYNC version).
|
|
425
|
+
|
|
426
|
+
NOTE: This is the BLOCKING version. For real-time streaming,
|
|
427
|
+
use publish_event_async() instead to avoid blocking the event loop.
|
|
428
|
+
|
|
429
|
+
IMPORTANT: This method now auto-detects if it's being called from within
|
|
430
|
+
an async context (like Claude Code SDK hooks) and schedules tasks
|
|
431
|
+
appropriately to avoid "Cannot run the event loop while another loop
|
|
432
|
+
is running" errors.
|
|
433
|
+
|
|
434
|
+
Strategy (in order):
|
|
435
|
+
1. Try Event Bus (multi-provider) if configured [DEFAULT: includes Redis for fast path]
|
|
436
|
+
2. Try WebSocket if connected
|
|
437
|
+
3. Fallback to HTTP endpoint
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
execution_id: Execution ID
|
|
441
|
+
event_type: Event type (message_chunk, tool_started, etc.)
|
|
442
|
+
data: Event payload
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
True if successful, False otherwise
|
|
446
|
+
"""
|
|
447
|
+
# Check if we're in an async context
|
|
448
|
+
running_loop = self._get_running_loop_safe()
|
|
449
|
+
in_async_context = running_loop is not None
|
|
450
|
+
|
|
451
|
+
# Strategy 1: Try Event Bus first (Redis is auto-configured by default)
|
|
452
|
+
if self.event_bus_manager and self.event_bus_manager.is_initialized():
|
|
453
|
+
metadata = {}
|
|
454
|
+
if self.worker_id:
|
|
455
|
+
metadata["worker_id"] = self.worker_id
|
|
456
|
+
|
|
457
|
+
if in_async_context:
|
|
458
|
+
# We're in an async context - schedule task directly without creating coroutine first
|
|
459
|
+
try:
|
|
460
|
+
# Create and schedule the task in one go
|
|
461
|
+
coro = self.event_bus_manager.publish_event(
|
|
462
|
+
execution_id=execution_id,
|
|
463
|
+
event_type=event_type,
|
|
464
|
+
data=data,
|
|
465
|
+
metadata=metadata
|
|
466
|
+
)
|
|
467
|
+
task = running_loop.create_task(coro)
|
|
468
|
+
|
|
469
|
+
# Add error callback
|
|
470
|
+
def handle_task_error(t):
|
|
471
|
+
try:
|
|
472
|
+
exc = t.exception()
|
|
473
|
+
if exc:
|
|
474
|
+
logger.warning(
|
|
475
|
+
"background_event_bus_task_error",
|
|
476
|
+
error=str(exc),
|
|
477
|
+
execution_id=execution_id[:8],
|
|
478
|
+
event_type=event_type,
|
|
479
|
+
)
|
|
480
|
+
except asyncio.CancelledError:
|
|
481
|
+
pass
|
|
482
|
+
except Exception:
|
|
483
|
+
pass
|
|
484
|
+
|
|
485
|
+
task.add_done_callback(handle_task_error)
|
|
486
|
+
logger.debug(
|
|
487
|
+
"worker_event_scheduled_via_event_bus_async",
|
|
488
|
+
execution_id=execution_id[:8],
|
|
489
|
+
event_type=event_type,
|
|
490
|
+
note="Task scheduled in running event loop"
|
|
491
|
+
)
|
|
492
|
+
return True
|
|
493
|
+
except Exception as e:
|
|
494
|
+
logger.warning(
|
|
495
|
+
"failed_to_schedule_event_bus_task",
|
|
496
|
+
error=str(e),
|
|
497
|
+
execution_id=execution_id[:8],
|
|
498
|
+
event_type=event_type,
|
|
499
|
+
)
|
|
500
|
+
# Fall through to fallback strategies
|
|
501
|
+
else:
|
|
502
|
+
# Not in async context - use blocking execution
|
|
503
|
+
try:
|
|
504
|
+
loop = self._get_thread_event_loop()
|
|
505
|
+
coro = self.event_bus_manager.publish_event(
|
|
506
|
+
execution_id=execution_id,
|
|
507
|
+
event_type=event_type,
|
|
508
|
+
data=data,
|
|
509
|
+
metadata=metadata
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
results = loop.run_until_complete(coro)
|
|
514
|
+
except RuntimeError as re:
|
|
515
|
+
# Handle nested event loop case
|
|
516
|
+
if "Cannot run the event loop while another loop is running" in str(re):
|
|
517
|
+
logger.warning(
|
|
518
|
+
"nested_event_loop_detected",
|
|
519
|
+
execution_id=execution_id[:8],
|
|
520
|
+
event_type=event_type,
|
|
521
|
+
note="Skipping event bus publish due to nested loop"
|
|
522
|
+
)
|
|
523
|
+
coro.close()
|
|
524
|
+
# Fall through to fallback strategies
|
|
525
|
+
results = None
|
|
526
|
+
else:
|
|
527
|
+
coro.close()
|
|
528
|
+
raise
|
|
529
|
+
|
|
530
|
+
if results is not None:
|
|
531
|
+
# Success if any provider succeeded
|
|
532
|
+
success_count = sum(1 for success in results.values() if success)
|
|
533
|
+
if success_count > 0:
|
|
534
|
+
logger.debug(
|
|
535
|
+
"worker_event_published_via_event_bus_sync",
|
|
536
|
+
execution_id=execution_id[:8],
|
|
537
|
+
event_type=event_type,
|
|
538
|
+
success_count=success_count,
|
|
539
|
+
total_providers=len(results)
|
|
540
|
+
)
|
|
541
|
+
return True
|
|
542
|
+
else:
|
|
543
|
+
logger.warning(
|
|
544
|
+
"worker_event_bus_all_providers_failed_fallback_sync",
|
|
545
|
+
execution_id=execution_id[:8],
|
|
546
|
+
event_type=event_type,
|
|
547
|
+
results=results
|
|
548
|
+
)
|
|
549
|
+
# Fall through to WebSocket/HTTP fallback
|
|
550
|
+
except Exception as e:
|
|
551
|
+
logger.error(
|
|
552
|
+
"worker_event_bus_publish_error_sync",
|
|
553
|
+
error=str(e),
|
|
554
|
+
execution_id=execution_id[:8],
|
|
555
|
+
event_type=event_type
|
|
556
|
+
)
|
|
557
|
+
# Fall through to WebSocket/HTTP fallback
|
|
558
|
+
|
|
559
|
+
# Strategy 2: Try WebSocket if available and connected
|
|
560
|
+
if self.websocket_client and self.websocket_client.is_connected():
|
|
561
|
+
if in_async_context:
|
|
562
|
+
# Schedule WebSocket send as a task
|
|
563
|
+
try:
|
|
564
|
+
coro = self.websocket_client.send_event(execution_id, event_type, data)
|
|
565
|
+
task = running_loop.create_task(coro)
|
|
566
|
+
logger.debug(
|
|
567
|
+
"worker_event_scheduled_via_websocket_async",
|
|
568
|
+
execution_id=execution_id[:8],
|
|
569
|
+
event_type=event_type
|
|
570
|
+
)
|
|
571
|
+
return True
|
|
572
|
+
except Exception as e:
|
|
573
|
+
logger.warning(
|
|
574
|
+
"failed_to_schedule_websocket_task",
|
|
575
|
+
error=str(e),
|
|
576
|
+
execution_id=execution_id[:8],
|
|
577
|
+
)
|
|
578
|
+
# Fall through to HTTP fallback
|
|
579
|
+
else:
|
|
580
|
+
# WebSocket send_event is async, need to run it in event loop
|
|
581
|
+
try:
|
|
582
|
+
loop = self._get_thread_event_loop()
|
|
583
|
+
coro = self.websocket_client.send_event(execution_id, event_type, data)
|
|
584
|
+
|
|
585
|
+
try:
|
|
586
|
+
success = loop.run_until_complete(coro)
|
|
587
|
+
except RuntimeError as re:
|
|
588
|
+
if "Cannot run the event loop while another loop is running" in str(re):
|
|
589
|
+
coro.close()
|
|
590
|
+
success = None
|
|
591
|
+
else:
|
|
592
|
+
coro.close()
|
|
593
|
+
raise
|
|
594
|
+
|
|
595
|
+
if success:
|
|
596
|
+
logger.debug(
|
|
597
|
+
"worker_event_published_via_websocket_sync",
|
|
598
|
+
execution_id=execution_id[:8],
|
|
599
|
+
event_type=event_type
|
|
600
|
+
)
|
|
601
|
+
return True
|
|
602
|
+
|
|
603
|
+
# Queue full - fallback to HTTP immediately
|
|
604
|
+
if success is not None:
|
|
605
|
+
logger.warning("websocket_queue_full_fallback_http_sync", execution_id=execution_id[:8])
|
|
606
|
+
except Exception as e:
|
|
607
|
+
logger.error(
|
|
608
|
+
"websocket_publish_error_sync",
|
|
609
|
+
error=str(e),
|
|
610
|
+
execution_id=execution_id[:8]
|
|
611
|
+
)
|
|
612
|
+
# Fall through to HTTP fallback
|
|
613
|
+
|
|
614
|
+
# Strategy 3: Fallback to HTTP
|
|
615
|
+
logger.debug(
|
|
616
|
+
"worker_event_publishing_via_http_fallback_sync",
|
|
617
|
+
execution_id=execution_id[:8],
|
|
618
|
+
event_type=event_type
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
try:
|
|
622
|
+
# Sanitize data to remove non-JSON-serializable objects
|
|
623
|
+
import json
|
|
624
|
+
import asyncio
|
|
625
|
+
|
|
626
|
+
def sanitize_value(val):
|
|
627
|
+
"""Remove non-JSON-serializable objects"""
|
|
628
|
+
try:
|
|
629
|
+
# Fast path for JSON primitives
|
|
630
|
+
if val is None or isinstance(val, (bool, int, float, str)):
|
|
631
|
+
return val
|
|
632
|
+
|
|
633
|
+
# Check type name to avoid event loop issues
|
|
634
|
+
type_name = type(val).__name__
|
|
635
|
+
type_module = str(type(val).__module__)
|
|
636
|
+
|
|
637
|
+
# Remove asyncio objects by checking module and type name
|
|
638
|
+
if 'asyncio' in type_module or any(x in type_name for x in ['Event', 'Lock', 'Queue', 'Semaphore', 'Condition']):
|
|
639
|
+
return f"<{type_name}>"
|
|
640
|
+
elif isinstance(val, dict):
|
|
641
|
+
return {k: sanitize_value(v) for k, v in val.items()}
|
|
642
|
+
elif isinstance(val, (list, tuple)):
|
|
643
|
+
return [sanitize_value(v) for v in val]
|
|
644
|
+
else:
|
|
645
|
+
try:
|
|
646
|
+
json.dumps(val)
|
|
647
|
+
return val
|
|
648
|
+
except (TypeError, ValueError, RuntimeError):
|
|
649
|
+
# RuntimeError catches "bound to different event loop" errors
|
|
650
|
+
return f"<non-serializable: {type_name}>"
|
|
651
|
+
except Exception as e:
|
|
652
|
+
# Catch-all for ANY errors during sanitization itself
|
|
653
|
+
# Do NOT attempt to inspect the value here - it may cause event loop errors
|
|
654
|
+
return "<sanitization-error>"
|
|
655
|
+
|
|
656
|
+
sanitized_data = sanitize_value(data)
|
|
657
|
+
|
|
658
|
+
url = f"{self.base_url}/api/v1/executions/{execution_id}/events"
|
|
659
|
+
payload = {
|
|
660
|
+
"event_type": event_type,
|
|
661
|
+
"data": sanitized_data,
|
|
662
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
# Double-check: Try to serialize the payload before sending
|
|
666
|
+
try:
|
|
667
|
+
json.dumps(payload)
|
|
668
|
+
except Exception as serialize_err:
|
|
669
|
+
logger.error(
|
|
670
|
+
"payload_serialization_test_failed",
|
|
671
|
+
execution_id=execution_id[:8],
|
|
672
|
+
event_type=event_type,
|
|
673
|
+
error=str(serialize_err)[:200],
|
|
674
|
+
)
|
|
675
|
+
# If we can't serialize it, don't even try to send
|
|
676
|
+
return False
|
|
677
|
+
|
|
678
|
+
response = self._client.post(url, json=payload, headers=self.headers)
|
|
679
|
+
|
|
680
|
+
if response.status_code not in (200, 202):
|
|
681
|
+
logger.warning(
|
|
682
|
+
"event_publish_failed",
|
|
683
|
+
status=response.status_code,
|
|
684
|
+
execution_id=execution_id[:8],
|
|
685
|
+
event_type=event_type,
|
|
686
|
+
)
|
|
687
|
+
return False
|
|
688
|
+
|
|
689
|
+
return True
|
|
690
|
+
|
|
691
|
+
except Exception as e:
|
|
692
|
+
# Sanitize error message to avoid serialization issues
|
|
693
|
+
import re
|
|
694
|
+
error_str = str(e) or "(empty)"
|
|
695
|
+
error_type = type(e).__name__
|
|
696
|
+
# Remove asyncio object references that cause serialization errors
|
|
697
|
+
error_str = re.sub(r'<asyncio\.\w+\.\w+ object at 0x[0-9a-f]+ \[[\w\s]+\]>', '[asyncio-object]', error_str)
|
|
698
|
+
|
|
699
|
+
logger.warning(
|
|
700
|
+
"event_publish_error",
|
|
701
|
+
error=error_str[:500], # Truncate to prevent huge error messages
|
|
702
|
+
error_type=error_type,
|
|
703
|
+
execution_id=execution_id[:8],
|
|
704
|
+
event_type=event_type,
|
|
705
|
+
)
|
|
706
|
+
return False
|
|
707
|
+
|
|
708
|
+
async def publish_event_async(
|
|
709
|
+
self,
|
|
710
|
+
execution_id: str,
|
|
711
|
+
event_type: str,
|
|
712
|
+
data: Dict[str, Any],
|
|
713
|
+
) -> bool:
|
|
714
|
+
"""
|
|
715
|
+
Publish a streaming event for real-time UI updates (ASYNC version).
|
|
716
|
+
|
|
717
|
+
Strategy (in order):
|
|
718
|
+
1. Try Event Bus (multi-provider) if configured [DEFAULT: includes Redis for fast path]
|
|
719
|
+
2. Try WebSocket if connected
|
|
720
|
+
3. Fallback to HTTP endpoint (control plane handles Redis internally)
|
|
721
|
+
|
|
722
|
+
By default, workers receive Redis credentials during registration and
|
|
723
|
+
publish directly to Redis (fast path). If Redis is unavailable, falls
|
|
724
|
+
back to HTTP endpoint.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
execution_id: Execution ID
|
|
728
|
+
event_type: Event type (message_chunk, tool_started, etc.)
|
|
729
|
+
data: Event payload
|
|
730
|
+
|
|
731
|
+
Returns:
|
|
732
|
+
True if at least one provider succeeded, False otherwise
|
|
733
|
+
"""
|
|
734
|
+
# Strategy 1: Try Event Bus first (Redis is auto-configured by default)
|
|
735
|
+
if self.event_bus_manager and self.event_bus_manager.is_initialized():
|
|
736
|
+
try:
|
|
737
|
+
metadata = {}
|
|
738
|
+
if self.worker_id:
|
|
739
|
+
metadata["worker_id"] = self.worker_id
|
|
740
|
+
|
|
741
|
+
results = await self.event_bus_manager.publish_event(
|
|
742
|
+
execution_id=execution_id,
|
|
743
|
+
event_type=event_type,
|
|
744
|
+
data=data,
|
|
745
|
+
metadata=metadata
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
# Success if any provider succeeded
|
|
749
|
+
success_count = sum(1 for success in results.values() if success)
|
|
750
|
+
if success_count > 0:
|
|
751
|
+
logger.debug(
|
|
752
|
+
"worker_event_published_via_event_bus",
|
|
753
|
+
execution_id=execution_id[:8],
|
|
754
|
+
event_type=event_type,
|
|
755
|
+
success_count=success_count,
|
|
756
|
+
total_providers=len(results)
|
|
757
|
+
)
|
|
758
|
+
return True
|
|
759
|
+
else:
|
|
760
|
+
logger.warning(
|
|
761
|
+
"worker_event_bus_all_providers_failed_fallback",
|
|
762
|
+
execution_id=execution_id[:8],
|
|
763
|
+
event_type=event_type,
|
|
764
|
+
results=results
|
|
765
|
+
)
|
|
766
|
+
# Fall through to WebSocket/HTTP fallback
|
|
767
|
+
except Exception as e:
|
|
768
|
+
logger.error(
|
|
769
|
+
"worker_event_bus_publish_error",
|
|
770
|
+
error=str(e),
|
|
771
|
+
execution_id=execution_id[:8],
|
|
772
|
+
event_type=event_type
|
|
773
|
+
)
|
|
774
|
+
# Fall through to WebSocket/HTTP fallback
|
|
775
|
+
|
|
776
|
+
# Strategy 2: Try WebSocket if available and connected
|
|
777
|
+
if self.websocket_client and self.websocket_client.is_connected():
|
|
778
|
+
success = await self.websocket_client.send_event(execution_id, event_type, data)
|
|
779
|
+
if success:
|
|
780
|
+
logger.debug(
|
|
781
|
+
"worker_event_published_via_websocket",
|
|
782
|
+
execution_id=execution_id[:8],
|
|
783
|
+
event_type=event_type
|
|
784
|
+
)
|
|
785
|
+
return True
|
|
786
|
+
|
|
787
|
+
# Queue full - fallback to HTTP immediately
|
|
788
|
+
logger.warning("websocket_queue_full_fallback_http", execution_id=execution_id[:8])
|
|
789
|
+
|
|
790
|
+
# Strategy 3: Fallback to HTTP
|
|
791
|
+
logger.debug(
|
|
792
|
+
"worker_event_publishing_via_http_fallback",
|
|
793
|
+
execution_id=execution_id[:8],
|
|
794
|
+
event_type=event_type
|
|
795
|
+
)
|
|
796
|
+
return await self._publish_event_http(execution_id, event_type, data)
|
|
797
|
+
|
|
798
|
+
async def _publish_event_http(
|
|
799
|
+
self,
|
|
800
|
+
execution_id: str,
|
|
801
|
+
event_type: str,
|
|
802
|
+
data: Dict[str, Any],
|
|
803
|
+
) -> bool:
|
|
804
|
+
"""
|
|
805
|
+
Publish event via HTTP (internal method for fallback).
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
execution_id: Execution ID
|
|
809
|
+
event_type: Event type
|
|
810
|
+
data: Event payload
|
|
811
|
+
|
|
812
|
+
Returns:
|
|
813
|
+
True if successful, False otherwise
|
|
814
|
+
"""
|
|
815
|
+
try:
|
|
816
|
+
# Sanitize data to remove non-JSON-serializable objects
|
|
817
|
+
import json
|
|
818
|
+
import asyncio
|
|
819
|
+
|
|
820
|
+
def sanitize_value(val):
|
|
821
|
+
"""Remove non-JSON-serializable objects"""
|
|
822
|
+
try:
|
|
823
|
+
# Fast path for JSON primitives
|
|
824
|
+
if val is None or isinstance(val, (bool, int, float, str)):
|
|
825
|
+
return val
|
|
826
|
+
|
|
827
|
+
# Check type name to avoid event loop issues
|
|
828
|
+
type_name = type(val).__name__
|
|
829
|
+
type_module = str(type(val).__module__)
|
|
830
|
+
|
|
831
|
+
# Remove asyncio objects by checking module and type name
|
|
832
|
+
if 'asyncio' in type_module or any(x in type_name for x in ['Event', 'Lock', 'Queue', 'Semaphore', 'Condition']):
|
|
833
|
+
return f"<{type_name}>"
|
|
834
|
+
elif isinstance(val, dict):
|
|
835
|
+
return {k: sanitize_value(v) for k, v in val.items()}
|
|
836
|
+
elif isinstance(val, (list, tuple)):
|
|
837
|
+
return [sanitize_value(v) for v in val]
|
|
838
|
+
else:
|
|
839
|
+
try:
|
|
840
|
+
json.dumps(val)
|
|
841
|
+
return val
|
|
842
|
+
except (TypeError, ValueError, RuntimeError):
|
|
843
|
+
# RuntimeError catches "bound to different event loop" errors
|
|
844
|
+
return f"<non-serializable: {type_name}>"
|
|
845
|
+
except Exception as e:
|
|
846
|
+
# Catch-all for ANY errors during sanitization itself
|
|
847
|
+
# Do NOT attempt to inspect the value here - it may cause event loop errors
|
|
848
|
+
return "<sanitization-error>"
|
|
849
|
+
|
|
850
|
+
sanitized_data = sanitize_value(data)
|
|
851
|
+
|
|
852
|
+
url = f"{self.base_url}/api/v1/executions/{execution_id}/events"
|
|
853
|
+
payload = {
|
|
854
|
+
"event_type": event_type,
|
|
855
|
+
"data": sanitized_data,
|
|
856
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
# Double-check: Try to serialize the payload before sending
|
|
860
|
+
try:
|
|
861
|
+
json.dumps(payload)
|
|
862
|
+
except Exception as serialize_err:
|
|
863
|
+
logger.error(
|
|
864
|
+
"payload_serialization_test_failed",
|
|
865
|
+
execution_id=execution_id[:8],
|
|
866
|
+
event_type=event_type,
|
|
867
|
+
error=str(serialize_err)[:200],
|
|
868
|
+
)
|
|
869
|
+
# If we can't serialize it, don't even try to send
|
|
870
|
+
return False
|
|
871
|
+
|
|
872
|
+
response = await self._async_client.post(url, json=payload, headers=self.headers)
|
|
873
|
+
|
|
874
|
+
if response.status_code not in (200, 202):
|
|
875
|
+
logger.warning(
|
|
876
|
+
"event_publish_failed",
|
|
877
|
+
status=response.status_code,
|
|
878
|
+
execution_id=execution_id[:8],
|
|
879
|
+
event_type=event_type,
|
|
880
|
+
)
|
|
881
|
+
return False
|
|
882
|
+
|
|
883
|
+
return True
|
|
884
|
+
|
|
885
|
+
except Exception as e:
|
|
886
|
+
# Sanitize error message to avoid serialization issues
|
|
887
|
+
import re
|
|
888
|
+
error_str = str(e) or "(empty)"
|
|
889
|
+
error_type = type(e).__name__
|
|
890
|
+
# Remove asyncio object references that cause serialization errors
|
|
891
|
+
error_str = re.sub(r'<asyncio\.\w+\.\w+ object at 0x[0-9a-f]+ \[[\w\s]+\]>', '[asyncio-object]', error_str)
|
|
892
|
+
|
|
893
|
+
logger.warning(
|
|
894
|
+
"event_publish_error",
|
|
895
|
+
error=error_str[:500], # Truncate to prevent huge error messages
|
|
896
|
+
error_type=error_type,
|
|
897
|
+
execution_id=execution_id[:8],
|
|
898
|
+
event_type=event_type,
|
|
899
|
+
)
|
|
900
|
+
return False
|
|
901
|
+
|
|
902
|
+
def cache_metadata(
|
|
903
|
+
self,
|
|
904
|
+
execution_id: str,
|
|
905
|
+
execution_type: str,
|
|
906
|
+
) -> bool:
|
|
907
|
+
"""
|
|
908
|
+
Cache execution metadata in Redis for fast SSE lookups.
|
|
909
|
+
|
|
910
|
+
This eliminates the need for database queries on every SSE connection.
|
|
911
|
+
|
|
912
|
+
Args:
|
|
913
|
+
execution_id: Execution ID
|
|
914
|
+
execution_type: "AGENT" or "TEAM"
|
|
915
|
+
|
|
916
|
+
Returns:
|
|
917
|
+
True if successful, False otherwise
|
|
918
|
+
"""
|
|
919
|
+
return self.publish_event(
|
|
920
|
+
execution_id=execution_id,
|
|
921
|
+
event_type="metadata",
|
|
922
|
+
data={"execution_type": execution_type},
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
def get_session(
|
|
926
|
+
self,
|
|
927
|
+
execution_id: str,
|
|
928
|
+
session_id: Optional[str] = None,
|
|
929
|
+
) -> Optional[Dict[str, Any]]:
|
|
930
|
+
"""
|
|
931
|
+
Retrieve session history from Control Plane database.
|
|
932
|
+
|
|
933
|
+
This loads conversation history so workers can restore context
|
|
934
|
+
across multiple execution turns.
|
|
935
|
+
|
|
936
|
+
Args:
|
|
937
|
+
execution_id: Execution ID
|
|
938
|
+
session_id: Session ID (defaults to execution_id if not provided)
|
|
939
|
+
|
|
940
|
+
Returns:
|
|
941
|
+
Dict with session data including messages, or None if not found
|
|
942
|
+
"""
|
|
943
|
+
try:
|
|
944
|
+
session_id = session_id or execution_id
|
|
945
|
+
url = f"{self.base_url}/api/v1/executions/{execution_id}/session"
|
|
946
|
+
|
|
947
|
+
response = self._client.get(url, headers=self.headers)
|
|
948
|
+
|
|
949
|
+
if response.status_code == 200:
|
|
950
|
+
session_data = response.json()
|
|
951
|
+
logger.info(
|
|
952
|
+
"session_loaded",
|
|
953
|
+
execution_id=execution_id[:8],
|
|
954
|
+
message_count=len(session_data.get("messages", [])),
|
|
955
|
+
)
|
|
956
|
+
return session_data
|
|
957
|
+
elif response.status_code == 404:
|
|
958
|
+
logger.info(
|
|
959
|
+
"session_not_found",
|
|
960
|
+
execution_id=execution_id[:8],
|
|
961
|
+
)
|
|
962
|
+
return None
|
|
963
|
+
else:
|
|
964
|
+
logger.warning(
|
|
965
|
+
"session_load_failed",
|
|
966
|
+
status=response.status_code,
|
|
967
|
+
execution_id=execution_id[:8],
|
|
968
|
+
)
|
|
969
|
+
return None
|
|
970
|
+
|
|
971
|
+
except Exception as e:
|
|
972
|
+
logger.warning(
|
|
973
|
+
"session_load_error",
|
|
974
|
+
error=str(e),
|
|
975
|
+
execution_id=execution_id[:8],
|
|
976
|
+
)
|
|
977
|
+
return None
|
|
978
|
+
|
|
979
|
+
def persist_session(
|
|
980
|
+
self,
|
|
981
|
+
execution_id: str,
|
|
982
|
+
session_id: str,
|
|
983
|
+
user_id: Optional[str],
|
|
984
|
+
messages: List[Dict[str, Any]],
|
|
985
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
986
|
+
) -> bool:
|
|
987
|
+
"""
|
|
988
|
+
Persist session history to Control Plane database.
|
|
989
|
+
|
|
990
|
+
This ensures history is available even when worker is offline.
|
|
991
|
+
|
|
992
|
+
Args:
|
|
993
|
+
execution_id: Execution ID
|
|
994
|
+
session_id: Session ID
|
|
995
|
+
user_id: User ID
|
|
996
|
+
messages: List of session messages
|
|
997
|
+
metadata: Optional metadata
|
|
998
|
+
|
|
999
|
+
Returns:
|
|
1000
|
+
True if successful, False otherwise
|
|
1001
|
+
"""
|
|
1002
|
+
try:
|
|
1003
|
+
url = f"{self.base_url}/api/v1/executions/{execution_id}/session"
|
|
1004
|
+
payload = {
|
|
1005
|
+
"session_id": session_id,
|
|
1006
|
+
"user_id": user_id,
|
|
1007
|
+
"messages": messages,
|
|
1008
|
+
"metadata": metadata or {},
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
response = self._client.post(url, json=payload, headers=self.headers)
|
|
1012
|
+
|
|
1013
|
+
if response.status_code in (200, 201):
|
|
1014
|
+
logger.info(
|
|
1015
|
+
"session_persisted",
|
|
1016
|
+
execution_id=execution_id[:8],
|
|
1017
|
+
message_count=len(messages),
|
|
1018
|
+
)
|
|
1019
|
+
return True
|
|
1020
|
+
else:
|
|
1021
|
+
logger.warning(
|
|
1022
|
+
"session_persistence_failed",
|
|
1023
|
+
status=response.status_code,
|
|
1024
|
+
execution_id=execution_id[:8],
|
|
1025
|
+
)
|
|
1026
|
+
return False
|
|
1027
|
+
|
|
1028
|
+
except Exception as e:
|
|
1029
|
+
logger.warning(
|
|
1030
|
+
"session_persistence_error",
|
|
1031
|
+
error=str(e),
|
|
1032
|
+
execution_id=execution_id[:8],
|
|
1033
|
+
)
|
|
1034
|
+
return False
|
|
1035
|
+
|
|
1036
|
+
def get_skills(
|
|
1037
|
+
self,
|
|
1038
|
+
agent_id: str,
|
|
1039
|
+
) -> List[Dict[str, Any]]:
|
|
1040
|
+
"""
|
|
1041
|
+
Fetch resolved skills for an agent from Control Plane.
|
|
1042
|
+
|
|
1043
|
+
This endpoint returns skills merged from all layers:
|
|
1044
|
+
- All agent environments (many-to-many)
|
|
1045
|
+
- Team skills (if agent has team)
|
|
1046
|
+
- All team environments (many-to-many)
|
|
1047
|
+
- Agent's own skills
|
|
1048
|
+
|
|
1049
|
+
Args:
|
|
1050
|
+
agent_id: Agent ID
|
|
1051
|
+
|
|
1052
|
+
Returns:
|
|
1053
|
+
List of skill configurations with source and inheritance info
|
|
1054
|
+
"""
|
|
1055
|
+
try:
|
|
1056
|
+
url = f"{self.base_url}/api/v1/skills/associations/agents/{agent_id}/skills/resolved"
|
|
1057
|
+
response = self._client.get(url, headers=self.headers)
|
|
1058
|
+
|
|
1059
|
+
if response.status_code == 200:
|
|
1060
|
+
skills = response.json()
|
|
1061
|
+
logger.info(
|
|
1062
|
+
"skills_fetched",
|
|
1063
|
+
agent_id=agent_id[:8],
|
|
1064
|
+
skill_count=len(skills),
|
|
1065
|
+
)
|
|
1066
|
+
return skills
|
|
1067
|
+
else:
|
|
1068
|
+
logger.warning(
|
|
1069
|
+
"skills_fetch_failed",
|
|
1070
|
+
status=response.status_code,
|
|
1071
|
+
agent_id=agent_id[:8],
|
|
1072
|
+
)
|
|
1073
|
+
return []
|
|
1074
|
+
|
|
1075
|
+
except Exception as e:
|
|
1076
|
+
logger.warning(
|
|
1077
|
+
"skills_fetch_error",
|
|
1078
|
+
error=str(e),
|
|
1079
|
+
agent_id=agent_id[:8],
|
|
1080
|
+
)
|
|
1081
|
+
return []
|
|
1082
|
+
|
|
1083
|
+
def get_team_skills(
|
|
1084
|
+
self,
|
|
1085
|
+
team_id: str,
|
|
1086
|
+
) -> List[Dict[str, Any]]:
|
|
1087
|
+
"""
|
|
1088
|
+
Fetch resolved skills for a team from Control Plane.
|
|
1089
|
+
|
|
1090
|
+
This endpoint returns skills merged from all layers:
|
|
1091
|
+
- All team environments (many-to-many)
|
|
1092
|
+
- Team's own skills
|
|
1093
|
+
|
|
1094
|
+
Args:
|
|
1095
|
+
team_id: Team ID
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
List of skill configurations with source and inheritance info
|
|
1099
|
+
"""
|
|
1100
|
+
try:
|
|
1101
|
+
url = f"{self.base_url}/api/v1/skills/associations/teams/{team_id}/skills/resolved"
|
|
1102
|
+
response = self._client.get(url, headers=self.headers)
|
|
1103
|
+
|
|
1104
|
+
if response.status_code == 200:
|
|
1105
|
+
skills = response.json()
|
|
1106
|
+
logger.info(
|
|
1107
|
+
"team_skills_fetched",
|
|
1108
|
+
team_id=team_id[:8],
|
|
1109
|
+
skill_count=len(skills),
|
|
1110
|
+
)
|
|
1111
|
+
return skills
|
|
1112
|
+
else:
|
|
1113
|
+
logger.warning(
|
|
1114
|
+
"team_skills_fetch_failed",
|
|
1115
|
+
status=response.status_code,
|
|
1116
|
+
team_id=team_id[:8],
|
|
1117
|
+
)
|
|
1118
|
+
return []
|
|
1119
|
+
|
|
1120
|
+
except Exception as e:
|
|
1121
|
+
logger.warning(
|
|
1122
|
+
"team_skills_fetch_error",
|
|
1123
|
+
error=str(e),
|
|
1124
|
+
team_id=team_id[:8],
|
|
1125
|
+
)
|
|
1126
|
+
return []
|
|
1127
|
+
|
|
1128
|
+
def get_agent_execution_environment(
|
|
1129
|
+
self,
|
|
1130
|
+
agent_id: str,
|
|
1131
|
+
) -> Dict[str, str]:
|
|
1132
|
+
"""
|
|
1133
|
+
Fetch resolved execution environment for an agent from Control Plane.
|
|
1134
|
+
|
|
1135
|
+
This endpoint returns a fully resolved environment variable dict with:
|
|
1136
|
+
- Custom env vars from agent configuration
|
|
1137
|
+
- Secret values (resolved from Kubiya vault)
|
|
1138
|
+
- Integration tokens (resolved and mapped to env var names like GH_TOKEN, JIRA_TOKEN)
|
|
1139
|
+
|
|
1140
|
+
Args:
|
|
1141
|
+
agent_id: Agent ID
|
|
1142
|
+
|
|
1143
|
+
Returns:
|
|
1144
|
+
Dict of environment variables ready to inject into agent execution
|
|
1145
|
+
"""
|
|
1146
|
+
try:
|
|
1147
|
+
url = f"{self.base_url}/api/v1/execution-environment/agents/{agent_id}/resolved"
|
|
1148
|
+
response = self._client.get(url, headers=self.headers)
|
|
1149
|
+
|
|
1150
|
+
if response.status_code == 200:
|
|
1151
|
+
env_vars = response.json()
|
|
1152
|
+
logger.info(
|
|
1153
|
+
"agent_execution_environment_fetched",
|
|
1154
|
+
agent_id=agent_id[:8],
|
|
1155
|
+
env_var_count=len(env_vars),
|
|
1156
|
+
env_var_keys=list(env_vars.keys()),
|
|
1157
|
+
)
|
|
1158
|
+
return env_vars
|
|
1159
|
+
else:
|
|
1160
|
+
logger.warning(
|
|
1161
|
+
"agent_execution_environment_fetch_failed",
|
|
1162
|
+
status=response.status_code,
|
|
1163
|
+
agent_id=agent_id[:8],
|
|
1164
|
+
)
|
|
1165
|
+
return {}
|
|
1166
|
+
|
|
1167
|
+
except Exception as e:
|
|
1168
|
+
logger.warning(
|
|
1169
|
+
"agent_execution_environment_fetch_error",
|
|
1170
|
+
error=str(e),
|
|
1171
|
+
agent_id=agent_id[:8],
|
|
1172
|
+
)
|
|
1173
|
+
return {}
|
|
1174
|
+
|
|
1175
|
+
def get_team_execution_environment(
|
|
1176
|
+
self,
|
|
1177
|
+
team_id: str,
|
|
1178
|
+
) -> Dict[str, str]:
|
|
1179
|
+
"""
|
|
1180
|
+
Fetch resolved execution environment for a team from Control Plane.
|
|
1181
|
+
|
|
1182
|
+
This endpoint returns a fully resolved environment variable dict with:
|
|
1183
|
+
- Custom env vars from team configuration
|
|
1184
|
+
- Secret values (resolved from Kubiya vault)
|
|
1185
|
+
- Integration tokens (resolved and mapped to env var names like GH_TOKEN, JIRA_TOKEN)
|
|
1186
|
+
|
|
1187
|
+
Args:
|
|
1188
|
+
team_id: Team ID
|
|
1189
|
+
|
|
1190
|
+
Returns:
|
|
1191
|
+
Dict of environment variables ready to inject into team execution
|
|
1192
|
+
"""
|
|
1193
|
+
try:
|
|
1194
|
+
url = f"{self.base_url}/api/v1/execution-environment/teams/{team_id}/resolved"
|
|
1195
|
+
response = self._client.get(url, headers=self.headers)
|
|
1196
|
+
|
|
1197
|
+
if response.status_code == 200:
|
|
1198
|
+
env_vars = response.json()
|
|
1199
|
+
logger.info(
|
|
1200
|
+
"team_execution_environment_fetched",
|
|
1201
|
+
team_id=team_id[:8],
|
|
1202
|
+
env_var_count=len(env_vars),
|
|
1203
|
+
env_var_keys=list(env_vars.keys()),
|
|
1204
|
+
)
|
|
1205
|
+
return env_vars
|
|
1206
|
+
else:
|
|
1207
|
+
logger.warning(
|
|
1208
|
+
"team_execution_environment_fetch_failed",
|
|
1209
|
+
status=response.status_code,
|
|
1210
|
+
team_id=team_id[:8],
|
|
1211
|
+
)
|
|
1212
|
+
return {}
|
|
1213
|
+
|
|
1214
|
+
except Exception as e:
|
|
1215
|
+
logger.warning(
|
|
1216
|
+
"team_execution_environment_fetch_error",
|
|
1217
|
+
error=str(e),
|
|
1218
|
+
team_id=team_id[:8],
|
|
1219
|
+
)
|
|
1220
|
+
return {}
|
|
1221
|
+
|
|
1222
|
+
async def create_job_execution_record(
|
|
1223
|
+
self,
|
|
1224
|
+
execution_id: str,
|
|
1225
|
+
job_id: Optional[str],
|
|
1226
|
+
organization_id: str,
|
|
1227
|
+
entity_type: str,
|
|
1228
|
+
entity_id: Optional[str],
|
|
1229
|
+
prompt: str,
|
|
1230
|
+
trigger_type: str,
|
|
1231
|
+
trigger_metadata: Dict[str, Any],
|
|
1232
|
+
) -> Dict[str, Any]:
|
|
1233
|
+
"""
|
|
1234
|
+
Create execution and job_executions records for a scheduled job.
|
|
1235
|
+
|
|
1236
|
+
This calls the Control Plane API to create execution records
|
|
1237
|
+
instead of directly accessing Supabase.
|
|
1238
|
+
|
|
1239
|
+
Args:
|
|
1240
|
+
execution_id: Execution ID
|
|
1241
|
+
job_id: Job ID (optional)
|
|
1242
|
+
organization_id: Organization ID
|
|
1243
|
+
entity_type: "agent" or "team"
|
|
1244
|
+
entity_id: Agent or team ID
|
|
1245
|
+
prompt: Prompt text
|
|
1246
|
+
trigger_type: "cron", "webhook", or "manual"
|
|
1247
|
+
trigger_metadata: Additional trigger metadata
|
|
1248
|
+
|
|
1249
|
+
Returns:
|
|
1250
|
+
Dict with execution_id, status, and created_at
|
|
1251
|
+
"""
|
|
1252
|
+
try:
|
|
1253
|
+
url = f"{self.base_url}/api/v1/executions/create"
|
|
1254
|
+
payload = {
|
|
1255
|
+
"execution_id": execution_id,
|
|
1256
|
+
"job_id": job_id,
|
|
1257
|
+
"organization_id": organization_id,
|
|
1258
|
+
"entity_type": entity_type,
|
|
1259
|
+
"entity_id": entity_id,
|
|
1260
|
+
"prompt": prompt,
|
|
1261
|
+
"trigger_type": trigger_type,
|
|
1262
|
+
"trigger_metadata": trigger_metadata,
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
response = await self._async_client.post(url, json=payload, headers=self.headers)
|
|
1266
|
+
|
|
1267
|
+
if response.status_code == 201:
|
|
1268
|
+
result = response.json()
|
|
1269
|
+
logger.info(
|
|
1270
|
+
"job_execution_record_created",
|
|
1271
|
+
execution_id=execution_id[:8],
|
|
1272
|
+
job_id=job_id[:8] if job_id else None,
|
|
1273
|
+
)
|
|
1274
|
+
return result
|
|
1275
|
+
else:
|
|
1276
|
+
logger.error(
|
|
1277
|
+
"job_execution_record_creation_failed",
|
|
1278
|
+
status=response.status_code,
|
|
1279
|
+
execution_id=execution_id[:8],
|
|
1280
|
+
response=response.text,
|
|
1281
|
+
)
|
|
1282
|
+
raise Exception(f"Failed to create execution record: HTTP {response.status_code}")
|
|
1283
|
+
|
|
1284
|
+
except Exception as e:
|
|
1285
|
+
logger.error(
|
|
1286
|
+
"job_execution_record_creation_error",
|
|
1287
|
+
error=str(e),
|
|
1288
|
+
execution_id=execution_id[:8],
|
|
1289
|
+
)
|
|
1290
|
+
raise
|
|
1291
|
+
|
|
1292
|
+
async def update_job_execution_status(
|
|
1293
|
+
self,
|
|
1294
|
+
execution_id: str,
|
|
1295
|
+
job_id: str,
|
|
1296
|
+
status: str,
|
|
1297
|
+
duration_ms: Optional[int] = None,
|
|
1298
|
+
error_message: Optional[str] = None,
|
|
1299
|
+
) -> Dict[str, Any]:
|
|
1300
|
+
"""
|
|
1301
|
+
Update job_executions record with execution results.
|
|
1302
|
+
|
|
1303
|
+
This calls the Control Plane API to update job execution status
|
|
1304
|
+
instead of directly accessing Supabase.
|
|
1305
|
+
|
|
1306
|
+
Args:
|
|
1307
|
+
execution_id: Execution ID
|
|
1308
|
+
job_id: Job ID
|
|
1309
|
+
status: Final status ("completed" or "failed")
|
|
1310
|
+
duration_ms: Execution duration in milliseconds
|
|
1311
|
+
error_message: Error message if failed
|
|
1312
|
+
|
|
1313
|
+
Returns:
|
|
1314
|
+
Dict with job_id, execution_id, and status
|
|
1315
|
+
"""
|
|
1316
|
+
try:
|
|
1317
|
+
url = f"{self.base_url}/api/v1/executions/{execution_id}/job/{job_id}/status"
|
|
1318
|
+
payload = {
|
|
1319
|
+
"status": status,
|
|
1320
|
+
"duration_ms": duration_ms,
|
|
1321
|
+
"error_message": error_message,
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
response = await self._async_client.post(url, json=payload, headers=self.headers)
|
|
1325
|
+
|
|
1326
|
+
if response.status_code == 200:
|
|
1327
|
+
result = response.json()
|
|
1328
|
+
logger.info(
|
|
1329
|
+
"job_execution_status_updated",
|
|
1330
|
+
execution_id=execution_id[:8],
|
|
1331
|
+
job_id=job_id[:8],
|
|
1332
|
+
status=status,
|
|
1333
|
+
)
|
|
1334
|
+
return result
|
|
1335
|
+
else:
|
|
1336
|
+
logger.error(
|
|
1337
|
+
"job_execution_status_update_failed",
|
|
1338
|
+
status_code=response.status_code,
|
|
1339
|
+
execution_id=execution_id[:8],
|
|
1340
|
+
job_id=job_id[:8],
|
|
1341
|
+
response=response.text,
|
|
1342
|
+
)
|
|
1343
|
+
raise Exception(f"Failed to update job execution status: HTTP {response.status_code}")
|
|
1344
|
+
|
|
1345
|
+
except Exception as e:
|
|
1346
|
+
logger.error(
|
|
1347
|
+
"job_execution_status_update_error",
|
|
1348
|
+
error=str(e),
|
|
1349
|
+
execution_id=execution_id[:8],
|
|
1350
|
+
job_id=job_id[:8],
|
|
1351
|
+
)
|
|
1352
|
+
raise
|
|
1353
|
+
|
|
1354
|
+
async def _handle_control_message(self, message: Dict[str, Any]):
|
|
1355
|
+
"""
|
|
1356
|
+
Handle control messages from control plane via WebSocket.
|
|
1357
|
+
|
|
1358
|
+
This method is called when the worker receives a control message
|
|
1359
|
+
from the control plane (pause, resume, cancel, reload_config).
|
|
1360
|
+
|
|
1361
|
+
Args:
|
|
1362
|
+
message: Control message with command, execution_id, and data
|
|
1363
|
+
"""
|
|
1364
|
+
command = message.get("command")
|
|
1365
|
+
execution_id = message.get("execution_id")
|
|
1366
|
+
|
|
1367
|
+
try:
|
|
1368
|
+
# Import Temporal client here to avoid circular import
|
|
1369
|
+
from control_plane_api.app.lib.temporal_client import get_temporal_client
|
|
1370
|
+
|
|
1371
|
+
temporal_client = get_temporal_client()
|
|
1372
|
+
workflow_handle = temporal_client.get_workflow_handle(execution_id)
|
|
1373
|
+
|
|
1374
|
+
if command == "pause":
|
|
1375
|
+
await workflow_handle.signal("pause_execution")
|
|
1376
|
+
logger.info("control_command_executed", command="pause", execution_id=execution_id[:8])
|
|
1377
|
+
|
|
1378
|
+
elif command == "resume":
|
|
1379
|
+
await workflow_handle.signal("resume_execution")
|
|
1380
|
+
logger.info("control_command_executed", command="resume", execution_id=execution_id[:8])
|
|
1381
|
+
|
|
1382
|
+
elif command == "cancel":
|
|
1383
|
+
await workflow_handle.cancel()
|
|
1384
|
+
logger.info("control_command_executed", command="cancel", execution_id=execution_id[:8])
|
|
1385
|
+
|
|
1386
|
+
elif command == "reload_config":
|
|
1387
|
+
# Future: Reload config without restart
|
|
1388
|
+
logger.info("control_command_not_implemented", command="reload_config", execution_id=execution_id[:8])
|
|
1389
|
+
|
|
1390
|
+
else:
|
|
1391
|
+
logger.warning("unknown_control_command", command=command, execution_id=execution_id[:8])
|
|
1392
|
+
|
|
1393
|
+
except Exception as e:
|
|
1394
|
+
logger.error(
|
|
1395
|
+
"control_command_error",
|
|
1396
|
+
error=str(e),
|
|
1397
|
+
command=command,
|
|
1398
|
+
execution_id=execution_id[:8] if execution_id else None
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
# Singleton instance
|
|
1403
|
+
_control_plane_client: Optional[ControlPlaneClient] = None
|
|
1404
|
+
|
|
1405
|
+
|
|
1406
|
+
def get_control_plane_client() -> ControlPlaneClient:
|
|
1407
|
+
"""
|
|
1408
|
+
Get or create the Control Plane client singleton.
|
|
1409
|
+
|
|
1410
|
+
Reads configuration from environment variables:
|
|
1411
|
+
- CONTROL_PLANE_URL: Control Plane URL
|
|
1412
|
+
- KUBIYA_API_KEY: API key for authentication
|
|
1413
|
+
- REDIS_URL: Redis URL for direct event streaming (from registration)
|
|
1414
|
+
- REDIS_PASSWORD: Redis password if needed (from registration)
|
|
1415
|
+
- REDIS_ENABLED: Whether Redis is enabled (from registration)
|
|
1416
|
+
- WEBSOCKET_ENABLED: Whether WebSocket is enabled (from registration)
|
|
1417
|
+
- WEBSOCKET_URL: WebSocket URL (from registration)
|
|
1418
|
+
- WORKER_ID: Worker ID (from registration)
|
|
1419
|
+
- EVENT_BUS_CONFIG: JSON string with event bus configuration (from registration, optional)
|
|
1420
|
+
|
|
1421
|
+
Returns:
|
|
1422
|
+
ControlPlaneClient instance
|
|
1423
|
+
|
|
1424
|
+
Raises:
|
|
1425
|
+
ValueError: If required environment variables are not set
|
|
1426
|
+
"""
|
|
1427
|
+
global _control_plane_client
|
|
1428
|
+
|
|
1429
|
+
if _control_plane_client is None:
|
|
1430
|
+
base_url = os.environ.get("CONTROL_PLANE_URL")
|
|
1431
|
+
api_key = os.environ.get("KUBIYA_API_KEY")
|
|
1432
|
+
|
|
1433
|
+
# WebSocket config from environment (set by worker.py after registration)
|
|
1434
|
+
websocket_enabled = os.environ.get("WEBSOCKET_ENABLED", "false").lower() == "true"
|
|
1435
|
+
websocket_url = os.environ.get("WEBSOCKET_URL")
|
|
1436
|
+
worker_id = os.environ.get("WORKER_ID")
|
|
1437
|
+
|
|
1438
|
+
# Redis config from environment (set by worker.py after registration)
|
|
1439
|
+
# This is the DEFAULT fast path for event streaming
|
|
1440
|
+
redis_url = os.environ.get("REDIS_URL")
|
|
1441
|
+
redis_password = os.environ.get("REDIS_PASSWORD")
|
|
1442
|
+
redis_enabled = os.environ.get("REDIS_ENABLED", "false").lower() == "true"
|
|
1443
|
+
|
|
1444
|
+
# Event bus config from environment (set by worker.py after registration)
|
|
1445
|
+
event_bus_config = None
|
|
1446
|
+
event_bus_config_str = os.environ.get("EVENT_BUS_CONFIG")
|
|
1447
|
+
if event_bus_config_str:
|
|
1448
|
+
try:
|
|
1449
|
+
import json
|
|
1450
|
+
event_bus_config = json.loads(event_bus_config_str)
|
|
1451
|
+
logger.info("event_bus_config_loaded_from_env", providers=list(event_bus_config.keys()))
|
|
1452
|
+
except Exception as e:
|
|
1453
|
+
logger.warning("event_bus_config_parse_failed", error=str(e))
|
|
1454
|
+
|
|
1455
|
+
# AUTO-CONFIGURE: If Redis credentials provided, auto-enable Redis provider
|
|
1456
|
+
# This makes Redis the default fast path without explicit event_bus_config
|
|
1457
|
+
if redis_enabled and redis_url and not event_bus_config:
|
|
1458
|
+
event_bus_config = {
|
|
1459
|
+
"redis": {
|
|
1460
|
+
"enabled": True,
|
|
1461
|
+
"redis_url": redis_url,
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
logger.info(
|
|
1465
|
+
"redis_auto_configured_as_default",
|
|
1466
|
+
worker_id=worker_id[:8] if worker_id else "unknown",
|
|
1467
|
+
redis_url=redis_url.split("@")[-1] if "@" in redis_url else redis_url, # Log without password
|
|
1468
|
+
)
|
|
1469
|
+
|
|
1470
|
+
if not base_url:
|
|
1471
|
+
raise ValueError("CONTROL_PLANE_URL environment variable not set")
|
|
1472
|
+
if not api_key:
|
|
1473
|
+
raise ValueError("KUBIYA_API_KEY environment variable not set")
|
|
1474
|
+
|
|
1475
|
+
_control_plane_client = ControlPlaneClient(
|
|
1476
|
+
base_url=base_url,
|
|
1477
|
+
api_key=api_key,
|
|
1478
|
+
websocket_enabled=websocket_enabled,
|
|
1479
|
+
websocket_url=websocket_url,
|
|
1480
|
+
worker_id=worker_id,
|
|
1481
|
+
event_bus_config=event_bus_config
|
|
1482
|
+
)
|
|
1483
|
+
|
|
1484
|
+
logger.info(
|
|
1485
|
+
"control_plane_client_initialized",
|
|
1486
|
+
base_url=base_url,
|
|
1487
|
+
websocket_enabled=websocket_enabled,
|
|
1488
|
+
event_bus_configured=event_bus_config is not None
|
|
1489
|
+
)
|
|
1490
|
+
|
|
1491
|
+
return _control_plane_client
|