kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,1297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal worker for Agent Control Plane - Decoupled Architecture.
|
|
3
|
+
|
|
4
|
+
This worker:
|
|
5
|
+
1. Registers with Control Plane API on startup using KUBIYA_API_KEY
|
|
6
|
+
2. Gets dynamic configuration (Temporal credentials, task queue name, etc.)
|
|
7
|
+
3. Connects to Temporal Cloud with provided credentials
|
|
8
|
+
4. Sends periodic heartbeats to Control Plane
|
|
9
|
+
5. Has NO direct database access - all state managed via Control Plane API
|
|
10
|
+
|
|
11
|
+
Environment variables REQUIRED:
|
|
12
|
+
- KUBIYA_API_KEY: Kubiya API key for authentication (required)
|
|
13
|
+
- CONTROL_PLANE_URL: Control Plane API URL (e.g., https://control-plane.kubiya.ai)
|
|
14
|
+
- ENVIRONMENT_NAME: Environment/task queue name to join (default: "default")
|
|
15
|
+
|
|
16
|
+
Environment variables OPTIONAL:
|
|
17
|
+
- WORKER_HOSTNAME: Custom hostname for worker (default: auto-detected)
|
|
18
|
+
- HEARTBEAT_INTERVAL: Seconds between heartbeats (default: 60, lightweight mode)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
import os
|
|
23
|
+
import sys
|
|
24
|
+
import structlog
|
|
25
|
+
import httpx
|
|
26
|
+
import socket
|
|
27
|
+
import platform
|
|
28
|
+
import psutil
|
|
29
|
+
import time
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from typing import Optional, List
|
|
32
|
+
from temporalio.worker import Worker
|
|
33
|
+
from temporalio.worker.workflow_sandbox import SandboxedWorkflowRunner, SandboxRestrictions
|
|
34
|
+
from temporalio.client import Client, TLSConfig
|
|
35
|
+
from collections import deque
|
|
36
|
+
|
|
37
|
+
from control_plane_api.app.utils.helpers import is_local_temporal
|
|
38
|
+
# Import workflows and activities from local package
|
|
39
|
+
from control_plane_api.worker.workflows.agent_execution import AgentExecutionWorkflow
|
|
40
|
+
from control_plane_api.worker.workflows.team_execution import TeamExecutionWorkflow
|
|
41
|
+
from control_plane_api.worker.workflows.scheduled_job_wrapper import ScheduledJobWrapperWorkflow
|
|
42
|
+
from control_plane_api.worker.activities.agent_activities import (
|
|
43
|
+
execute_agent_llm,
|
|
44
|
+
update_execution_status,
|
|
45
|
+
update_agent_status,
|
|
46
|
+
get_execution_details,
|
|
47
|
+
persist_conversation_history,
|
|
48
|
+
submit_runtime_analytics_activity,
|
|
49
|
+
)
|
|
50
|
+
from control_plane_api.worker.activities.team_activities import (
|
|
51
|
+
get_team_agents,
|
|
52
|
+
execute_team_coordination,
|
|
53
|
+
)
|
|
54
|
+
from control_plane_api.worker.activities.runtime_activities import (
|
|
55
|
+
execute_with_runtime,
|
|
56
|
+
publish_user_message,
|
|
57
|
+
)
|
|
58
|
+
from control_plane_api.worker.activities.job_activities import (
|
|
59
|
+
create_job_execution_record,
|
|
60
|
+
update_job_execution_status,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Configure structured logging
|
|
64
|
+
import logging
|
|
65
|
+
from control_plane_api.worker.utils.logging_config import configure_logging
|
|
66
|
+
|
|
67
|
+
# Configure logging with dynamic settings from environment variables
|
|
68
|
+
configure_logging()
|
|
69
|
+
|
|
70
|
+
logger = structlog.get_logger()
|
|
71
|
+
|
|
72
|
+
# Global log buffer to collect logs since last heartbeat
|
|
73
|
+
log_buffer = deque(maxlen=500) # Keep last 500 log lines
|
|
74
|
+
worker_start_time = time.time()
|
|
75
|
+
|
|
76
|
+
# Global state for differential heartbeats (optimization)
|
|
77
|
+
_last_full_heartbeat_time: float = 0
|
|
78
|
+
_cached_system_info: Optional[dict] = None
|
|
79
|
+
_last_log_index_sent: int = 0
|
|
80
|
+
_full_heartbeat_interval: int = 300 # Full heartbeat every 5 minutes (vs lightweight every 60s)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ProgressUI:
|
|
84
|
+
"""Minimal animated UI for worker startup - minikube style"""
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def step(emoji: str, message: str, status: str = ""):
|
|
88
|
+
"""Log a step with emoji and optional status"""
|
|
89
|
+
if status:
|
|
90
|
+
logger.info("worker_progress", emoji=emoji, message=message, status=status)
|
|
91
|
+
else:
|
|
92
|
+
logger.info("worker_progress", emoji=emoji, message=message)
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def success(emoji: str, message: str):
|
|
96
|
+
"""Log success message"""
|
|
97
|
+
logger.info("worker_success", emoji=emoji, message=message)
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def error(emoji: str, message: str):
|
|
101
|
+
"""Log error message"""
|
|
102
|
+
logger.error("worker_error", emoji=emoji, message=message)
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def warning(emoji: str, message: str):
|
|
106
|
+
"""Log warning message"""
|
|
107
|
+
logger.warning("worker_warning", emoji=emoji, message=message)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def header(text: str):
|
|
111
|
+
"""Log section header"""
|
|
112
|
+
logger.info("worker_header", text=text)
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def banner():
|
|
116
|
+
"""Log startup banner"""
|
|
117
|
+
logger.info("worker_banner", title="Kubiya Agent Worker")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def collect_system_info() -> dict:
|
|
121
|
+
"""
|
|
122
|
+
Collect current system metrics and information.
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
126
|
+
memory = psutil.virtual_memory()
|
|
127
|
+
disk = psutil.disk_usage('/')
|
|
128
|
+
|
|
129
|
+
# Get Kubiya CLI version from environment variable (set by CLI) - skipped for now
|
|
130
|
+
cli_version = None
|
|
131
|
+
|
|
132
|
+
# Get SDK version
|
|
133
|
+
from control_plane_api.version import get_sdk_version
|
|
134
|
+
sdk_version = get_sdk_version()
|
|
135
|
+
|
|
136
|
+
# Get process ID
|
|
137
|
+
pid = os.getpid()
|
|
138
|
+
|
|
139
|
+
# Get current working directory
|
|
140
|
+
cwd = os.getcwd()
|
|
141
|
+
|
|
142
|
+
# Get supported runtimes (both are always available)
|
|
143
|
+
supported_runtimes = ["agno", "claude_code"]
|
|
144
|
+
|
|
145
|
+
# Check Docker availability
|
|
146
|
+
docker_available = False
|
|
147
|
+
docker_version = None
|
|
148
|
+
try:
|
|
149
|
+
import subprocess
|
|
150
|
+
import shutil
|
|
151
|
+
|
|
152
|
+
# First try to find docker in PATH using shutil.which
|
|
153
|
+
docker_path = shutil.which('docker')
|
|
154
|
+
logger.debug("docker_which_result", path=docker_path)
|
|
155
|
+
|
|
156
|
+
# Fallback to common locations if not in PATH
|
|
157
|
+
if not docker_path:
|
|
158
|
+
docker_paths = [
|
|
159
|
+
'/usr/local/bin/docker',
|
|
160
|
+
'/usr/bin/docker',
|
|
161
|
+
'/opt/homebrew/bin/docker',
|
|
162
|
+
]
|
|
163
|
+
for path in docker_paths:
|
|
164
|
+
logger.debug("docker_checking_path", path=path, exists=os.path.exists(path))
|
|
165
|
+
if os.path.exists(path):
|
|
166
|
+
docker_path = path
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
if docker_path:
|
|
170
|
+
logger.debug("docker_running_version_check", path=docker_path)
|
|
171
|
+
result = subprocess.run(
|
|
172
|
+
[docker_path, '--version'],
|
|
173
|
+
capture_output=True,
|
|
174
|
+
text=True,
|
|
175
|
+
timeout=3,
|
|
176
|
+
shell=False
|
|
177
|
+
)
|
|
178
|
+
logger.debug(
|
|
179
|
+
"docker_version_output",
|
|
180
|
+
returncode=result.returncode,
|
|
181
|
+
stdout=result.stdout[:200],
|
|
182
|
+
stderr=result.stderr[:200] if result.stderr else None
|
|
183
|
+
)
|
|
184
|
+
if result.returncode == 0:
|
|
185
|
+
docker_available = True
|
|
186
|
+
# Parse "Docker version 28.1.1, build 4eba377"
|
|
187
|
+
output = result.stdout.strip()
|
|
188
|
+
if ',' in output:
|
|
189
|
+
docker_version = output.split(',')[0].replace('Docker version', '').strip()
|
|
190
|
+
else:
|
|
191
|
+
docker_version = output.replace('Docker version', '').strip()
|
|
192
|
+
logger.debug("docker_detected", version=docker_version, path=docker_path)
|
|
193
|
+
else:
|
|
194
|
+
logger.warning("docker_version_check_failed", returncode=result.returncode)
|
|
195
|
+
else:
|
|
196
|
+
logger.warning("docker_not_found_in_path_or_common_locations")
|
|
197
|
+
except Exception as e:
|
|
198
|
+
# Log for debugging but don't fail
|
|
199
|
+
logger.warning("docker_detection_failed", error=str(e), error_type=type(e).__name__)
|
|
200
|
+
import traceback
|
|
201
|
+
logger.debug("docker_detection_traceback", traceback=traceback.format_exc())
|
|
202
|
+
|
|
203
|
+
# Parse OS details from platform
|
|
204
|
+
os_name = platform.system() # Darwin, Linux, Windows
|
|
205
|
+
os_version = platform.release()
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
"hostname": socket.gethostname(),
|
|
209
|
+
"platform": platform.platform(),
|
|
210
|
+
"os_name": os_name,
|
|
211
|
+
"os_version": os_version,
|
|
212
|
+
"python_version": platform.python_version(),
|
|
213
|
+
"cli_version": cli_version,
|
|
214
|
+
"sdk_version": sdk_version,
|
|
215
|
+
"pid": pid,
|
|
216
|
+
"cwd": cwd,
|
|
217
|
+
"supported_runtimes": supported_runtimes,
|
|
218
|
+
"docker_available": docker_available,
|
|
219
|
+
"docker_version": docker_version,
|
|
220
|
+
"cpu_count": psutil.cpu_count(),
|
|
221
|
+
"cpu_percent": cpu_percent,
|
|
222
|
+
"memory_total": memory.total,
|
|
223
|
+
"memory_used": memory.used,
|
|
224
|
+
"memory_percent": memory.percent,
|
|
225
|
+
"disk_total": disk.total,
|
|
226
|
+
"disk_used": disk.used,
|
|
227
|
+
"disk_percent": disk.percent,
|
|
228
|
+
"uptime_seconds": time.time() - worker_start_time,
|
|
229
|
+
}
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logger.warning("failed_to_collect_system_info", error=str(e))
|
|
232
|
+
return {
|
|
233
|
+
"hostname": socket.gethostname(),
|
|
234
|
+
"platform": platform.platform(),
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def get_recent_logs() -> List[str]:
|
|
239
|
+
"""
|
|
240
|
+
Get logs collected since last heartbeat and clear the buffer.
|
|
241
|
+
"""
|
|
242
|
+
logs = list(log_buffer)
|
|
243
|
+
log_buffer.clear()
|
|
244
|
+
return logs
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def log_to_buffer(message: str):
|
|
248
|
+
"""
|
|
249
|
+
Add a log message to the buffer for sending in next heartbeat.
|
|
250
|
+
"""
|
|
251
|
+
log_buffer.append(message)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@dataclass
|
|
255
|
+
class WorkerConfig:
|
|
256
|
+
"""Configuration received from Control Plane registration"""
|
|
257
|
+
worker_id: str
|
|
258
|
+
environment_name: str # Task queue name (org_id.environment)
|
|
259
|
+
temporal_namespace: str
|
|
260
|
+
temporal_host: str
|
|
261
|
+
temporal_api_key: str
|
|
262
|
+
organization_id: str
|
|
263
|
+
control_plane_url: str
|
|
264
|
+
litellm_api_url: str = "https://llm-proxy.kubiya.ai"
|
|
265
|
+
litellm_api_key: str = ""
|
|
266
|
+
# Redis configuration for direct event streaming
|
|
267
|
+
redis_url: str = ""
|
|
268
|
+
redis_password: str = ""
|
|
269
|
+
redis_enabled: bool = False
|
|
270
|
+
# WebSocket configuration
|
|
271
|
+
websocket_enabled: bool = True
|
|
272
|
+
websocket_url: str = ""
|
|
273
|
+
websocket_features: list = None
|
|
274
|
+
# Queue configuration for cleanup
|
|
275
|
+
queue_id: str = ""
|
|
276
|
+
queue_ephemeral: bool = False
|
|
277
|
+
queue_single_execution: bool = False
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
async def start_worker_for_queue(
|
|
281
|
+
control_plane_url: str,
|
|
282
|
+
kubiya_api_key: str,
|
|
283
|
+
queue_id: str,
|
|
284
|
+
) -> WorkerConfig:
|
|
285
|
+
"""
|
|
286
|
+
Start a worker for a specific queue ID.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
control_plane_url: Control Plane API URL
|
|
290
|
+
kubiya_api_key: Kubiya API key for authentication
|
|
291
|
+
queue_id: Worker queue ID (UUID)
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
WorkerConfig with all necessary configuration
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
Exception if start fails
|
|
298
|
+
"""
|
|
299
|
+
# Get worker SDK version for compatibility check
|
|
300
|
+
from control_plane_api.version import get_sdk_version
|
|
301
|
+
worker_sdk_version = get_sdk_version()
|
|
302
|
+
|
|
303
|
+
# Collect system info to send during registration
|
|
304
|
+
system_info = collect_system_info()
|
|
305
|
+
|
|
306
|
+
logger.info(
|
|
307
|
+
"starting_worker_for_queue",
|
|
308
|
+
queue_id=queue_id,
|
|
309
|
+
control_plane_url=control_plane_url,
|
|
310
|
+
sdk_version=worker_sdk_version,
|
|
311
|
+
pid=system_info.get("pid"),
|
|
312
|
+
cwd=system_info.get("cwd"),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
317
|
+
response = await client.post(
|
|
318
|
+
f"{control_plane_url}/api/v1/worker-queues/{queue_id}/start",
|
|
319
|
+
headers={"Authorization": f"Bearer {kubiya_api_key}"},
|
|
320
|
+
json={
|
|
321
|
+
"worker_sdk_version": worker_sdk_version,
|
|
322
|
+
"system_info": system_info,
|
|
323
|
+
"control_plane_url": control_plane_url
|
|
324
|
+
}
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Success case
|
|
328
|
+
if response.status_code == 200:
|
|
329
|
+
data = response.json()
|
|
330
|
+
|
|
331
|
+
ProgressUI.success("✓", f"Registered with control plane")
|
|
332
|
+
logger.info(
|
|
333
|
+
"worker_registered",
|
|
334
|
+
worker_id=data.get("worker_id")[:8],
|
|
335
|
+
queue_name=data.get("queue_name"),
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Check SDK version compatibility
|
|
339
|
+
control_plane_sdk_version = data.get("control_plane_sdk_version")
|
|
340
|
+
if control_plane_sdk_version and control_plane_sdk_version != worker_sdk_version:
|
|
341
|
+
ProgressUI.warning("⚠", "SDK version mismatch detected")
|
|
342
|
+
print(f"\n Worker SDK version: {worker_sdk_version}")
|
|
343
|
+
print(f" Control Plane SDK version: {control_plane_sdk_version}")
|
|
344
|
+
print(f"\n Consider updating your worker to match the control plane version.\n")
|
|
345
|
+
|
|
346
|
+
logger.warning(
|
|
347
|
+
"sdk_version_mismatch",
|
|
348
|
+
worker_version=worker_sdk_version,
|
|
349
|
+
control_plane_version=control_plane_sdk_version,
|
|
350
|
+
)
|
|
351
|
+
elif control_plane_sdk_version:
|
|
352
|
+
logger.info(
|
|
353
|
+
"sdk_version_match",
|
|
354
|
+
version=worker_sdk_version,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# The task_queue_name is now just the queue UUID
|
|
358
|
+
# Priority for LiteLLM API URL:
|
|
359
|
+
# 1. LITELLM_API_BASE environment variable (from local proxy via CLI)
|
|
360
|
+
# 2. Control plane litellm_api_url
|
|
361
|
+
# 3. Default (https://llm-proxy.kubiya.ai)
|
|
362
|
+
litellm_api_url = os.getenv("LITELLM_API_BASE") or data.get("litellm_api_url", "https://llm-proxy.kubiya.ai")
|
|
363
|
+
litellm_api_key = os.getenv("LITELLM_API_KEY") or data.get("litellm_api_key", "")
|
|
364
|
+
|
|
365
|
+
# Log which LiteLLM endpoint is being used
|
|
366
|
+
if os.getenv("LITELLM_API_BASE"):
|
|
367
|
+
logger.info(
|
|
368
|
+
"using_local_litellm_proxy",
|
|
369
|
+
litellm_api_url=litellm_api_url,
|
|
370
|
+
source="environment_variable"
|
|
371
|
+
)
|
|
372
|
+
elif "litellm_api_url" in data:
|
|
373
|
+
logger.info(
|
|
374
|
+
"using_control_plane_litellm_proxy",
|
|
375
|
+
litellm_api_url=litellm_api_url,
|
|
376
|
+
source="control_plane"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
return WorkerConfig(
|
|
380
|
+
worker_id=data["worker_id"],
|
|
381
|
+
environment_name=data["task_queue_name"], # This is now the queue UUID
|
|
382
|
+
temporal_namespace=data["temporal_namespace"],
|
|
383
|
+
temporal_host=data["temporal_host"],
|
|
384
|
+
temporal_api_key=data["temporal_api_key"],
|
|
385
|
+
organization_id=data["organization_id"],
|
|
386
|
+
control_plane_url=data["control_plane_url"],
|
|
387
|
+
litellm_api_url=litellm_api_url,
|
|
388
|
+
litellm_api_key=litellm_api_key,
|
|
389
|
+
# Redis configuration from control plane (for direct event streaming)
|
|
390
|
+
redis_url=data.get("redis_url", ""),
|
|
391
|
+
redis_password=data.get("redis_password", ""),
|
|
392
|
+
redis_enabled=data.get("redis_enabled", False),
|
|
393
|
+
# WebSocket configuration from control plane
|
|
394
|
+
websocket_enabled=data.get("websocket_enabled", True),
|
|
395
|
+
websocket_url=data.get("websocket_url", ""),
|
|
396
|
+
websocket_features=data.get("websocket_features", []),
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Handle errors
|
|
400
|
+
else:
|
|
401
|
+
# Try to extract error detail from response
|
|
402
|
+
error_message = response.text
|
|
403
|
+
try:
|
|
404
|
+
error_data = response.json()
|
|
405
|
+
error_message = error_data.get("detail", response.text)
|
|
406
|
+
except:
|
|
407
|
+
pass
|
|
408
|
+
|
|
409
|
+
ProgressUI.error("✗", "Worker registration failed")
|
|
410
|
+
print(f" {error_message}\n")
|
|
411
|
+
|
|
412
|
+
logger.error(
|
|
413
|
+
"worker_start_failed",
|
|
414
|
+
status_code=response.status_code,
|
|
415
|
+
queue_id=queue_id,
|
|
416
|
+
)
|
|
417
|
+
sys.exit(1)
|
|
418
|
+
|
|
419
|
+
except httpx.RequestError as e:
|
|
420
|
+
ProgressUI.error("✗", f"Connection failed: {control_plane_url}")
|
|
421
|
+
print(f" {str(e)}\n")
|
|
422
|
+
logger.error("control_plane_connection_failed", error=str(e))
|
|
423
|
+
sys.exit(1)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
async def send_heartbeat(
|
|
427
|
+
config: WorkerConfig,
|
|
428
|
+
kubiya_api_key: str,
|
|
429
|
+
status: str = "active",
|
|
430
|
+
tasks_processed: int = 0,
|
|
431
|
+
current_task_id: Optional[str] = None,
|
|
432
|
+
force_full: bool = False
|
|
433
|
+
) -> bool:
|
|
434
|
+
"""
|
|
435
|
+
Send heartbeat to Control Plane with differential data.
|
|
436
|
+
|
|
437
|
+
Optimization: Uses lightweight heartbeats (status only) by default,
|
|
438
|
+
and sends full heartbeats (with system info + logs) every 5 minutes.
|
|
439
|
+
This reduces server load by 90% while maintaining full visibility.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
config: Worker configuration
|
|
443
|
+
kubiya_api_key: Kubiya API key for authentication
|
|
444
|
+
status: Worker status (active, idle, busy)
|
|
445
|
+
tasks_processed: Number of tasks processed
|
|
446
|
+
current_task_id: Currently executing task ID
|
|
447
|
+
force_full: Force a full heartbeat (ignores timing logic)
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
True if successful, False otherwise
|
|
451
|
+
"""
|
|
452
|
+
global _last_full_heartbeat_time, _cached_system_info, _last_log_index_sent
|
|
453
|
+
|
|
454
|
+
current_time = time.time()
|
|
455
|
+
time_since_last_full = current_time - _last_full_heartbeat_time
|
|
456
|
+
|
|
457
|
+
# Determine if this should be a full heartbeat
|
|
458
|
+
# Full heartbeat: every 5 minutes, or on first run, or if forced
|
|
459
|
+
is_full_heartbeat = (
|
|
460
|
+
force_full or
|
|
461
|
+
_last_full_heartbeat_time == 0 or
|
|
462
|
+
time_since_last_full >= _full_heartbeat_interval
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Build base heartbeat data (always included)
|
|
466
|
+
heartbeat_data = {
|
|
467
|
+
"status": status,
|
|
468
|
+
"tasks_processed": tasks_processed,
|
|
469
|
+
"current_task_id": current_task_id,
|
|
470
|
+
"worker_metadata": {},
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
# Add system info and logs only for full heartbeats
|
|
474
|
+
if is_full_heartbeat:
|
|
475
|
+
# Collect fresh system info (expensive operation)
|
|
476
|
+
system_info = collect_system_info()
|
|
477
|
+
_cached_system_info = system_info
|
|
478
|
+
heartbeat_data["system_info"] = system_info
|
|
479
|
+
|
|
480
|
+
# Get logs since last full heartbeat (only new logs)
|
|
481
|
+
logs = get_recent_logs()
|
|
482
|
+
if logs:
|
|
483
|
+
heartbeat_data["logs"] = logs
|
|
484
|
+
|
|
485
|
+
# Update last full heartbeat time
|
|
486
|
+
_last_full_heartbeat_time = current_time
|
|
487
|
+
heartbeat_type = "full"
|
|
488
|
+
else:
|
|
489
|
+
# Lightweight heartbeat - no system info or logs
|
|
490
|
+
# Server will use cached system info from Redis
|
|
491
|
+
heartbeat_type = "lightweight"
|
|
492
|
+
|
|
493
|
+
try:
|
|
494
|
+
# Normalize URL to prevent double-slash issues
|
|
495
|
+
control_plane_url = config.control_plane_url.rstrip("/")
|
|
496
|
+
url = f"{control_plane_url}/api/v1/workers/{config.worker_id}/heartbeat"
|
|
497
|
+
|
|
498
|
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
499
|
+
response = await client.post(
|
|
500
|
+
url,
|
|
501
|
+
json=heartbeat_data,
|
|
502
|
+
headers={"Authorization": f"Bearer {kubiya_api_key}"}
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
if response.status_code in [200, 204]:
|
|
506
|
+
logger.debug(
|
|
507
|
+
"heartbeat_sent",
|
|
508
|
+
worker_id=config.worker_id,
|
|
509
|
+
type=heartbeat_type,
|
|
510
|
+
payload_size=len(str(heartbeat_data))
|
|
511
|
+
)
|
|
512
|
+
log_to_buffer(
|
|
513
|
+
f"[{time.strftime('%H:%M:%S')}] Heartbeat sent ({heartbeat_type})"
|
|
514
|
+
)
|
|
515
|
+
return True
|
|
516
|
+
else:
|
|
517
|
+
logger.warning(
|
|
518
|
+
"heartbeat_failed",
|
|
519
|
+
status_code=response.status_code,
|
|
520
|
+
response=response.text[:200],
|
|
521
|
+
type=heartbeat_type
|
|
522
|
+
)
|
|
523
|
+
log_to_buffer(
|
|
524
|
+
f"[{time.strftime('%H:%M:%S')}] Heartbeat failed: HTTP {response.status_code}"
|
|
525
|
+
)
|
|
526
|
+
return False
|
|
527
|
+
|
|
528
|
+
except Exception as e:
|
|
529
|
+
error_msg = f"{type(e).__name__}: {str(e)}" if str(e) else f"{type(e).__name__} (no message)"
|
|
530
|
+
logger.warning(
|
|
531
|
+
"heartbeat_error",
|
|
532
|
+
error=error_msg,
|
|
533
|
+
error_type=type(e).__name__,
|
|
534
|
+
worker_id=config.worker_id[:8] if config.worker_id else "unknown",
|
|
535
|
+
type=heartbeat_type
|
|
536
|
+
)
|
|
537
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Heartbeat error: {error_msg[:150]}")
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
async def create_temporal_client(config: WorkerConfig) -> Client:
|
|
542
|
+
"""
|
|
543
|
+
Create Temporal client using configuration from Control Plane.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
config: Worker configuration from Control Plane registration
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Connected Temporal client instance
|
|
550
|
+
"""
|
|
551
|
+
try:
|
|
552
|
+
if is_local_temporal():
|
|
553
|
+
# Connect to local Temporal without TLS or API key
|
|
554
|
+
logger.info("connecting_to_local_temporal", host=config.temporal_host)
|
|
555
|
+
client = await Client.connect(
|
|
556
|
+
config.temporal_host,
|
|
557
|
+
namespace=config.temporal_namespace,
|
|
558
|
+
)
|
|
559
|
+
else:
|
|
560
|
+
# Connect to Temporal Cloud with TLS and API key
|
|
561
|
+
logger.info("connecting_to_temporal_cloud", host=config.temporal_host)
|
|
562
|
+
client = await Client.connect(
|
|
563
|
+
config.temporal_host,
|
|
564
|
+
namespace=config.temporal_namespace,
|
|
565
|
+
tls=TLSConfig(), # TLS enabled
|
|
566
|
+
rpc_metadata={"authorization": f"Bearer {config.temporal_api_key}"}
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
return client
|
|
570
|
+
|
|
571
|
+
except Exception as e:
|
|
572
|
+
logger.error("connection_failed", error=str(e))
|
|
573
|
+
ProgressUI.error("✗", f"Temporal connection failed: {str(e)}")
|
|
574
|
+
raise
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
async def send_disconnect(
|
|
578
|
+
config: WorkerConfig,
|
|
579
|
+
kubiya_api_key: str,
|
|
580
|
+
reason: str = "shutdown",
|
|
581
|
+
exit_code: Optional[int] = None,
|
|
582
|
+
error_message: Optional[str] = None
|
|
583
|
+
) -> bool:
|
|
584
|
+
"""
|
|
585
|
+
Notify Control Plane that worker is disconnecting/exiting.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
config: Worker configuration
|
|
589
|
+
kubiya_api_key: Kubiya API key for authentication
|
|
590
|
+
reason: Disconnect reason (shutdown, error, crash, etc.)
|
|
591
|
+
exit_code: Exit code if applicable
|
|
592
|
+
error_message: Error message if applicable
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
True if successful, False otherwise
|
|
596
|
+
"""
|
|
597
|
+
disconnect_data = {
|
|
598
|
+
"reason": reason,
|
|
599
|
+
"exit_code": exit_code,
|
|
600
|
+
"error_message": error_message
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
try:
|
|
604
|
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
605
|
+
response = await client.post(
|
|
606
|
+
f"{config.control_plane_url}/api/v1/workers/{config.worker_id}/disconnect",
|
|
607
|
+
json=disconnect_data,
|
|
608
|
+
headers={"Authorization": f"Bearer {kubiya_api_key}"}
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
if response.status_code in [200, 204]:
|
|
612
|
+
logger.info(
|
|
613
|
+
"worker_disconnected",
|
|
614
|
+
worker_id=config.worker_id,
|
|
615
|
+
reason=reason,
|
|
616
|
+
exit_code=exit_code
|
|
617
|
+
)
|
|
618
|
+
return True
|
|
619
|
+
else:
|
|
620
|
+
logger.warning(
|
|
621
|
+
"disconnect_notification_failed",
|
|
622
|
+
status_code=response.status_code,
|
|
623
|
+
response=response.text[:200]
|
|
624
|
+
)
|
|
625
|
+
return False
|
|
626
|
+
|
|
627
|
+
except Exception as e:
|
|
628
|
+
logger.warning("disconnect_notification_error", error=str(e))
|
|
629
|
+
return False
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
async def delete_ephemeral_queue(
|
|
633
|
+
config: WorkerConfig,
|
|
634
|
+
kubiya_api_key: str,
|
|
635
|
+
queue_id: str,
|
|
636
|
+
timeout: int = 5
|
|
637
|
+
) -> bool:
|
|
638
|
+
"""
|
|
639
|
+
Delete ephemeral queue during worker shutdown.
|
|
640
|
+
|
|
641
|
+
This allows the worker to clean up its ephemeral queue immediately,
|
|
642
|
+
without requiring the CLI to wait for worker unregistration.
|
|
643
|
+
|
|
644
|
+
Args:
|
|
645
|
+
config: Worker configuration
|
|
646
|
+
kubiya_api_key: Kubiya API key for authentication
|
|
647
|
+
queue_id: Queue UUID to delete
|
|
648
|
+
timeout: Request timeout in seconds (short timeout - if it fails, TTL handles it)
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
True if successful, False otherwise
|
|
652
|
+
"""
|
|
653
|
+
try:
|
|
654
|
+
async with httpx.AsyncClient(timeout=float(timeout)) as client:
|
|
655
|
+
response = await client.delete(
|
|
656
|
+
f"{config.control_plane_url}/api/v1/worker-queues/{queue_id}",
|
|
657
|
+
headers={"Authorization": f"Bearer {kubiya_api_key}"}
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
if response.status_code in [200, 204]:
|
|
661
|
+
logger.info(
|
|
662
|
+
"ephemeral_queue_deleted",
|
|
663
|
+
queue_id=queue_id,
|
|
664
|
+
worker_id=config.worker_id
|
|
665
|
+
)
|
|
666
|
+
return True
|
|
667
|
+
else:
|
|
668
|
+
logger.warning(
|
|
669
|
+
"queue_delete_failed",
|
|
670
|
+
queue_id=queue_id,
|
|
671
|
+
status_code=response.status_code,
|
|
672
|
+
response=response.text[:200]
|
|
673
|
+
)
|
|
674
|
+
return False
|
|
675
|
+
|
|
676
|
+
except Exception as e:
|
|
677
|
+
logger.warning(
|
|
678
|
+
"queue_delete_error",
|
|
679
|
+
queue_id=queue_id,
|
|
680
|
+
error=str(e)
|
|
681
|
+
)
|
|
682
|
+
return False
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
async def heartbeat_loop(config: WorkerConfig, kubiya_api_key: str, interval: int = 60):
|
|
686
|
+
"""
|
|
687
|
+
Background task to send periodic heartbeats to Control Plane.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
config: Worker configuration
|
|
691
|
+
kubiya_api_key: Kubiya API key for authentication
|
|
692
|
+
interval: Seconds between heartbeats
|
|
693
|
+
"""
|
|
694
|
+
tasks_processed = 0
|
|
695
|
+
|
|
696
|
+
while True:
|
|
697
|
+
try:
|
|
698
|
+
await asyncio.sleep(interval)
|
|
699
|
+
await send_heartbeat(
|
|
700
|
+
config=config,
|
|
701
|
+
kubiya_api_key=kubiya_api_key,
|
|
702
|
+
status="active",
|
|
703
|
+
tasks_processed=tasks_processed
|
|
704
|
+
)
|
|
705
|
+
except asyncio.CancelledError:
|
|
706
|
+
logger.info("heartbeat_loop_cancelled")
|
|
707
|
+
break
|
|
708
|
+
except Exception as e:
|
|
709
|
+
logger.warning("heartbeat_loop_error", error=str(e))
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
async def run_worker():
|
|
713
|
+
"""
|
|
714
|
+
Run the Temporal worker with decoupled architecture.
|
|
715
|
+
|
|
716
|
+
The worker:
|
|
717
|
+
1. Registers with Control Plane API
|
|
718
|
+
2. Gets dynamic configuration (Temporal credentials, task queue, etc.)
|
|
719
|
+
3. Connects to Temporal Cloud
|
|
720
|
+
4. Starts heartbeat loop
|
|
721
|
+
5. Registers workflows and activities
|
|
722
|
+
6. Polls for tasks and executes them
|
|
723
|
+
"""
|
|
724
|
+
# Get configuration from environment
|
|
725
|
+
kubiya_api_key = os.environ.get("KUBIYA_API_KEY")
|
|
726
|
+
control_plane_url = os.environ.get("CONTROL_PLANE_URL")
|
|
727
|
+
queue_id = os.environ.get("QUEUE_ID")
|
|
728
|
+
heartbeat_interval = int(os.environ.get("HEARTBEAT_INTERVAL", "60"))
|
|
729
|
+
single_execution_mode = os.environ.get("SINGLE_EXECUTION", "").lower() in ("true", "1", "yes")
|
|
730
|
+
|
|
731
|
+
# Validate required configuration
|
|
732
|
+
if not kubiya_api_key:
|
|
733
|
+
logger.error(
|
|
734
|
+
"configuration_error",
|
|
735
|
+
message="KUBIYA_API_KEY environment variable is required"
|
|
736
|
+
)
|
|
737
|
+
sys.exit(1)
|
|
738
|
+
|
|
739
|
+
if not control_plane_url:
|
|
740
|
+
logger.error(
|
|
741
|
+
"configuration_error",
|
|
742
|
+
message="CONTROL_PLANE_URL environment variable is required"
|
|
743
|
+
)
|
|
744
|
+
sys.exit(1)
|
|
745
|
+
|
|
746
|
+
if not queue_id:
|
|
747
|
+
logger.error(
|
|
748
|
+
"configuration_error",
|
|
749
|
+
message="QUEUE_ID environment variable is required"
|
|
750
|
+
)
|
|
751
|
+
sys.exit(1)
|
|
752
|
+
|
|
753
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker starting for queue {queue_id}")
|
|
754
|
+
|
|
755
|
+
if single_execution_mode:
|
|
756
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Single execution mode: enabled (will exit after one task)")
|
|
757
|
+
logger.info("single_execution_mode_enabled", queue_id=queue_id)
|
|
758
|
+
|
|
759
|
+
# Check if agent-runtime mode is enabled
|
|
760
|
+
use_agent_runtime = os.environ.get("USE_AGENT_RUNTIME", "").lower() in ("true", "1", "yes")
|
|
761
|
+
agent_runtime_server = None
|
|
762
|
+
health_monitor = None
|
|
763
|
+
|
|
764
|
+
try:
|
|
765
|
+
# Print banner
|
|
766
|
+
ProgressUI.banner()
|
|
767
|
+
|
|
768
|
+
# Step 0: Setup agent-runtime if enabled
|
|
769
|
+
if use_agent_runtime:
|
|
770
|
+
from pathlib import Path
|
|
771
|
+
from control_plane_api.worker.binary_manager import BinaryManager
|
|
772
|
+
from control_plane_api.worker.agent_runtime_server import AgentRuntimeServer, ServerConfig
|
|
773
|
+
|
|
774
|
+
ProgressUI.step("⏳", "Setting up agent-runtime...")
|
|
775
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Downloading agent-runtime binary...")
|
|
776
|
+
|
|
777
|
+
config_dir = Path(os.environ.get("AGENT_RUNTIME_CONFIG_DIR", Path.home() / ".kubiya"))
|
|
778
|
+
binary_manager = BinaryManager(config_dir)
|
|
779
|
+
binary_path = await binary_manager.ensure_binary("latest")
|
|
780
|
+
|
|
781
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Starting agent-runtime server...")
|
|
782
|
+
server_config = ServerConfig(
|
|
783
|
+
grpc_port=int(os.environ.get("AGENT_RUNTIME_GRPC_PORT", "50052")),
|
|
784
|
+
http_port=int(os.environ.get("AGENT_RUNTIME_HTTP_PORT", "8082")),
|
|
785
|
+
health_port=int(os.environ.get("AGENT_RUNTIME_HEALTH_PORT", "8083")),
|
|
786
|
+
config_dir=config_dir,
|
|
787
|
+
log_level=os.environ.get("AGENT_RUNTIME_LOG_LEVEL", "info"),
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
agent_runtime_server = AgentRuntimeServer(binary_path, server_config)
|
|
791
|
+
await agent_runtime_server.start(wait_for_health=True, timeout=30)
|
|
792
|
+
|
|
793
|
+
# Set environment variable for runtime to use
|
|
794
|
+
os.environ["AGENT_RUNTIME_ADDRESS"] = agent_runtime_server.grpc_address
|
|
795
|
+
ProgressUI.success("✓", f"Agent runtime ready at {agent_runtime_server.grpc_address}")
|
|
796
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Agent runtime server started on {agent_runtime_server.grpc_address}")
|
|
797
|
+
|
|
798
|
+
# Step 1: Register with control plane
|
|
799
|
+
ProgressUI.step("⏳", "Registering with control plane...")
|
|
800
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Registering with control plane...")
|
|
801
|
+
config = await start_worker_for_queue(
|
|
802
|
+
control_plane_url=control_plane_url,
|
|
803
|
+
kubiya_api_key=kubiya_api_key,
|
|
804
|
+
queue_id=queue_id,
|
|
805
|
+
)
|
|
806
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker registered: {config.worker_id}")
|
|
807
|
+
|
|
808
|
+
# Set environment variables for activities to use
|
|
809
|
+
os.environ["CONTROL_PLANE_URL"] = config.control_plane_url
|
|
810
|
+
|
|
811
|
+
# Set single execution flag so event publisher can disable WebSocket
|
|
812
|
+
if single_execution_mode:
|
|
813
|
+
os.environ["KUBIYA_SINGLE_EXECUTION_MODE"] = "true"
|
|
814
|
+
os.environ["KUBIYA_API_KEY"] = kubiya_api_key
|
|
815
|
+
os.environ["WORKER_ID"] = config.worker_id
|
|
816
|
+
os.environ["LITELLM_API_BASE"] = config.litellm_api_url
|
|
817
|
+
os.environ["LITELLM_API_KEY"] = config.litellm_api_key
|
|
818
|
+
|
|
819
|
+
# Set WebSocket environment variables if enabled
|
|
820
|
+
from control_plane_api.worker.utils.environment import should_use_websocket
|
|
821
|
+
|
|
822
|
+
if config.websocket_enabled and config.websocket_url and should_use_websocket():
|
|
823
|
+
os.environ["WEBSOCKET_ENABLED"] = "true"
|
|
824
|
+
os.environ["WEBSOCKET_URL"] = config.websocket_url
|
|
825
|
+
logger.info(
|
|
826
|
+
"websocket_configured",
|
|
827
|
+
worker_id=config.worker_id[:8],
|
|
828
|
+
websocket_url=config.websocket_url
|
|
829
|
+
)
|
|
830
|
+
else:
|
|
831
|
+
os.environ["WEBSOCKET_ENABLED"] = "false"
|
|
832
|
+
if not should_use_websocket():
|
|
833
|
+
logger.info("websocket_disabled_serverless_environment")
|
|
834
|
+
else:
|
|
835
|
+
logger.info("websocket_disabled_using_http")
|
|
836
|
+
|
|
837
|
+
# Set Redis environment variables if provided (for Redis-first event streaming)
|
|
838
|
+
if config.redis_enabled and config.redis_url:
|
|
839
|
+
os.environ["REDIS_URL"] = config.redis_url
|
|
840
|
+
os.environ["REDIS_ENABLED"] = "true"
|
|
841
|
+
if config.redis_password:
|
|
842
|
+
os.environ["REDIS_PASSWORD"] = config.redis_password
|
|
843
|
+
logger.info(
|
|
844
|
+
"redis_configured_for_direct_streaming",
|
|
845
|
+
worker_id=config.worker_id[:8],
|
|
846
|
+
redis_url=config.redis_url.split("@")[-1] if "@" in config.redis_url else config.redis_url # Log without password
|
|
847
|
+
)
|
|
848
|
+
else:
|
|
849
|
+
os.environ["REDIS_ENABLED"] = "false"
|
|
850
|
+
logger.debug("redis_not_configured_will_use_http_endpoint")
|
|
851
|
+
|
|
852
|
+
# Step 2: Connect to Temporal
|
|
853
|
+
ProgressUI.step("⏳", "Connecting to Temporal...")
|
|
854
|
+
client = await create_temporal_client(config)
|
|
855
|
+
ProgressUI.success("✓", "Connected to Temporal")
|
|
856
|
+
|
|
857
|
+
# Step 3: Send initial heartbeat
|
|
858
|
+
ProgressUI.step("⏳", "Sending heartbeat...")
|
|
859
|
+
await send_heartbeat(
|
|
860
|
+
config=config,
|
|
861
|
+
kubiya_api_key=kubiya_api_key,
|
|
862
|
+
status="active",
|
|
863
|
+
tasks_processed=0
|
|
864
|
+
)
|
|
865
|
+
ProgressUI.success("✓", "Worker visible in UI")
|
|
866
|
+
|
|
867
|
+
# Start heartbeat loop in background
|
|
868
|
+
heartbeat_task = asyncio.create_task(
|
|
869
|
+
heartbeat_loop(config, kubiya_api_key, heartbeat_interval)
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
# Start health monitoring for agent-runtime if enabled
|
|
873
|
+
health_monitor_task = None
|
|
874
|
+
if agent_runtime_server is not None:
|
|
875
|
+
from control_plane_api.worker.health_monitor import HealthMonitor
|
|
876
|
+
# Note: os is already imported at module level (line 22)
|
|
877
|
+
|
|
878
|
+
check_interval = int(os.environ.get("AGENT_RUNTIME_HEALTH_CHECK_INTERVAL", "30"))
|
|
879
|
+
max_failures = int(os.environ.get("AGENT_RUNTIME_MAX_RESTART_ATTEMPTS", "3"))
|
|
880
|
+
restart_enabled = os.environ.get("AGENT_RUNTIME_AUTO_RESTART", "true").lower() in ("true", "1", "yes")
|
|
881
|
+
|
|
882
|
+
health_monitor = HealthMonitor(
|
|
883
|
+
agent_runtime_server=agent_runtime_server,
|
|
884
|
+
check_interval=check_interval,
|
|
885
|
+
max_failures=max_failures,
|
|
886
|
+
restart_enabled=restart_enabled,
|
|
887
|
+
)
|
|
888
|
+
await health_monitor.start()
|
|
889
|
+
ProgressUI.success("✓", "Health monitoring enabled")
|
|
890
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Health monitoring started (interval={check_interval}s)")
|
|
891
|
+
|
|
892
|
+
# Step 4: Create worker
|
|
893
|
+
ProgressUI.step("⏳", "Starting worker...")
|
|
894
|
+
|
|
895
|
+
# Configure workflow sandbox with passthrough modules
|
|
896
|
+
# These modules use non-deterministic operations at import time but are safe
|
|
897
|
+
# because they're only used in activities, not workflow logic
|
|
898
|
+
sandbox_restrictions = SandboxRestrictions.default.with_passthrough_modules(
|
|
899
|
+
"structlog",
|
|
900
|
+
"structlog.dev",
|
|
901
|
+
"structlog.processors",
|
|
902
|
+
"structlog.tracebacks",
|
|
903
|
+
"rich",
|
|
904
|
+
"rich.traceback",
|
|
905
|
+
"control_plane_api.version", # Version checking uses filesystem operations
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
worker = Worker(
|
|
909
|
+
client,
|
|
910
|
+
task_queue=config.environment_name,
|
|
911
|
+
workflows=[
|
|
912
|
+
AgentExecutionWorkflow,
|
|
913
|
+
TeamExecutionWorkflow,
|
|
914
|
+
ScheduledJobWrapperWorkflow, # Wrapper for scheduled jobs
|
|
915
|
+
],
|
|
916
|
+
activities=[
|
|
917
|
+
execute_agent_llm,
|
|
918
|
+
update_execution_status,
|
|
919
|
+
update_agent_status,
|
|
920
|
+
get_execution_details, # Get execution details from Control Plane
|
|
921
|
+
persist_conversation_history, # Conversation persistence
|
|
922
|
+
submit_runtime_analytics_activity, # Analytics submission
|
|
923
|
+
get_team_agents,
|
|
924
|
+
execute_team_coordination,
|
|
925
|
+
execute_with_runtime, # RuntimeFactory-based execution
|
|
926
|
+
publish_user_message, # Publish user message to stream
|
|
927
|
+
create_job_execution_record, # Job execution record creation
|
|
928
|
+
update_job_execution_status, # Job execution status updates
|
|
929
|
+
],
|
|
930
|
+
max_concurrent_activities=10,
|
|
931
|
+
max_concurrent_workflow_tasks=10,
|
|
932
|
+
workflow_runner=SandboxedWorkflowRunner(restrictions=sandbox_restrictions),
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
ProgressUI.success("✓", "Worker ready")
|
|
936
|
+
|
|
937
|
+
# Start WebSocket client if enabled
|
|
938
|
+
from control_plane_api.worker.control_plane_client import get_control_plane_client
|
|
939
|
+
|
|
940
|
+
control_plane_client = get_control_plane_client()
|
|
941
|
+
if config.websocket_enabled and should_use_websocket():
|
|
942
|
+
await control_plane_client.start_websocket()
|
|
943
|
+
ProgressUI.step("✓", "WebSocket connected")
|
|
944
|
+
logger.info("websocket_started", worker_id=config.worker_id[:8])
|
|
945
|
+
|
|
946
|
+
if single_execution_mode:
|
|
947
|
+
ProgressUI.header("📡 Listening for one task... (will exit after completion)")
|
|
948
|
+
else:
|
|
949
|
+
ProgressUI.header("📡 Listening for tasks... (Ctrl+C to stop)")
|
|
950
|
+
|
|
951
|
+
logger.info(
|
|
952
|
+
"worker_ready",
|
|
953
|
+
worker_id=config.worker_id[:8],
|
|
954
|
+
single_execution_mode=single_execution_mode,
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
# Run worker (blocks until interrupted)
|
|
958
|
+
try:
|
|
959
|
+
if single_execution_mode:
|
|
960
|
+
# Single execution mode: run worker and monitor for workflow completion
|
|
961
|
+
logger.info("starting_worker_in_single_execution_mode")
|
|
962
|
+
|
|
963
|
+
# Create a task to run the worker
|
|
964
|
+
worker_run_task = asyncio.create_task(worker.run())
|
|
965
|
+
|
|
966
|
+
# Monitor for execution completion via Control Plane API
|
|
967
|
+
async def monitor_and_shutdown():
|
|
968
|
+
"""
|
|
969
|
+
Monitor execution status and shutdown after task completes.
|
|
970
|
+
Robustness improvements:
|
|
971
|
+
- Requires consecutive completion checks to avoid false positives
|
|
972
|
+
- Extends timeout for long-running tasks
|
|
973
|
+
"""
|
|
974
|
+
# Brief wait for worker to start and pick up the execution
|
|
975
|
+
# Reduced from 5s to 1s for faster ephemeral worker startup
|
|
976
|
+
await asyncio.sleep(1)
|
|
977
|
+
|
|
978
|
+
# Monitor for 30 minutes max (extended from 10 minutes)
|
|
979
|
+
max_runtime = 1800
|
|
980
|
+
check_interval = 2 # Check every 2 seconds - balanced between speed and API load
|
|
981
|
+
elapsed = 0
|
|
982
|
+
execution_seen = False
|
|
983
|
+
execution_id = None
|
|
984
|
+
|
|
985
|
+
# Robustness: Require 2 consecutive "completed" checks before shutting down
|
|
986
|
+
# With 2s polling interval, this provides 4s buffer for async operations to settle
|
|
987
|
+
consecutive_completion_checks = 0
|
|
988
|
+
required_consecutive_checks = 2
|
|
989
|
+
|
|
990
|
+
logger.info("single_execution_monitor_started", queue_id=queue_id)
|
|
991
|
+
|
|
992
|
+
should_shutdown = False
|
|
993
|
+
while elapsed < max_runtime and not should_shutdown:
|
|
994
|
+
await asyncio.sleep(check_interval)
|
|
995
|
+
elapsed += check_interval
|
|
996
|
+
|
|
997
|
+
# Check if worker task completed unexpectedly
|
|
998
|
+
if worker_run_task.done():
|
|
999
|
+
logger.info("single_execution_worker_task_completed", elapsed=elapsed)
|
|
1000
|
+
break
|
|
1001
|
+
|
|
1002
|
+
# Query Control Plane for recent executions on this queue
|
|
1003
|
+
try:
|
|
1004
|
+
# Get the control plane client
|
|
1005
|
+
async with httpx.AsyncClient(timeout=10.0) as http_client:
|
|
1006
|
+
# List recent executions for this queue
|
|
1007
|
+
response = await http_client.get(
|
|
1008
|
+
f"{control_plane_url}/api/v1/worker-queues/{queue_id}/executions",
|
|
1009
|
+
headers={"Authorization": f"Bearer {kubiya_api_key}"},
|
|
1010
|
+
params={"limit": 5, "status": "all"}
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
if response.status_code == 200:
|
|
1014
|
+
executions = response.json()
|
|
1015
|
+
|
|
1016
|
+
# Look for any execution in a terminal or waiting state
|
|
1017
|
+
for execution in executions:
|
|
1018
|
+
exec_status = execution.get("status", "").lower()
|
|
1019
|
+
exec_id = execution.get("id")
|
|
1020
|
+
|
|
1021
|
+
if not execution_seen:
|
|
1022
|
+
if exec_status in ["running", "completed", "failed", "waiting_for_input"]:
|
|
1023
|
+
execution_seen = True
|
|
1024
|
+
execution_id = exec_id
|
|
1025
|
+
logger.info("single_execution_detected", execution_id=exec_id[:8] if exec_id else None, status=exec_status)
|
|
1026
|
+
|
|
1027
|
+
# If we've seen an execution and it's now in a terminal state, check if consistent
|
|
1028
|
+
# NOTE: We do NOT treat "waiting_for_input" as terminal in single execution mode
|
|
1029
|
+
# because the LLM may still be processing (e.g., tool calls) and the execution
|
|
1030
|
+
# should continue until truly completed or failed
|
|
1031
|
+
if execution_seen and exec_id == execution_id:
|
|
1032
|
+
if exec_status in ["completed", "failed", "cancelled"]:
|
|
1033
|
+
consecutive_completion_checks += 1
|
|
1034
|
+
logger.info("single_execution_completion_check",
|
|
1035
|
+
execution_id=exec_id[:8] if exec_id else None,
|
|
1036
|
+
status=exec_status,
|
|
1037
|
+
consecutive_checks=consecutive_completion_checks,
|
|
1038
|
+
required_checks=required_consecutive_checks,
|
|
1039
|
+
elapsed=elapsed)
|
|
1040
|
+
|
|
1041
|
+
# Only shutdown after consecutive checks confirm completion
|
|
1042
|
+
if consecutive_completion_checks >= required_consecutive_checks:
|
|
1043
|
+
logger.info("single_execution_completed",
|
|
1044
|
+
execution_id=exec_id[:8] if exec_id else None,
|
|
1045
|
+
status=exec_status,
|
|
1046
|
+
elapsed=elapsed)
|
|
1047
|
+
# Give SSE clients time to receive all final events
|
|
1048
|
+
# Reduced to 2s for faster shutdown while still allowing
|
|
1049
|
+
# SSE streams to complete
|
|
1050
|
+
logger.info("single_execution_grace_period_starting",
|
|
1051
|
+
execution_id=exec_id[:8] if exec_id else None,
|
|
1052
|
+
grace_seconds=2)
|
|
1053
|
+
await asyncio.sleep(2)
|
|
1054
|
+
should_shutdown = True
|
|
1055
|
+
break
|
|
1056
|
+
else:
|
|
1057
|
+
# Execution is back to running state - reset counter
|
|
1058
|
+
if consecutive_completion_checks > 0:
|
|
1059
|
+
logger.info("single_execution_still_active",
|
|
1060
|
+
execution_id=exec_id[:8] if exec_id else None,
|
|
1061
|
+
status=exec_status,
|
|
1062
|
+
resetting_counter=True)
|
|
1063
|
+
consecutive_completion_checks = 0
|
|
1064
|
+
else:
|
|
1065
|
+
logger.debug("single_execution_status_check_failed", status_code=response.status_code)
|
|
1066
|
+
# Reset consecutive checks on failed API call to be safe
|
|
1067
|
+
if consecutive_completion_checks > 0:
|
|
1068
|
+
logger.debug("single_execution_resetting_counter_after_failed_check")
|
|
1069
|
+
consecutive_completion_checks = 0
|
|
1070
|
+
|
|
1071
|
+
except Exception as e:
|
|
1072
|
+
logger.debug("single_execution_status_check_error", error=str(e))
|
|
1073
|
+
# Reset consecutive checks on error to be safe
|
|
1074
|
+
if consecutive_completion_checks > 0:
|
|
1075
|
+
logger.debug("single_execution_resetting_counter_after_error")
|
|
1076
|
+
consecutive_completion_checks = 0
|
|
1077
|
+
# Continue monitoring even if status check fails
|
|
1078
|
+
|
|
1079
|
+
# Check why we exited the loop
|
|
1080
|
+
if not should_shutdown and elapsed >= max_runtime:
|
|
1081
|
+
# Actual timeout
|
|
1082
|
+
logger.warning("single_execution_timeout_reached", elapsed=elapsed)
|
|
1083
|
+
|
|
1084
|
+
# Shutdown the worker gracefully
|
|
1085
|
+
logger.info("single_execution_triggering_shutdown", elapsed_seconds=elapsed, reason="completed" if should_shutdown else "timeout")
|
|
1086
|
+
ProgressUI.step("✓", "Task completed - shutting down worker...")
|
|
1087
|
+
log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Task completed, shutting down...")
|
|
1088
|
+
await worker.shutdown()
|
|
1089
|
+
|
|
1090
|
+
# Start monitoring task
|
|
1091
|
+
monitor_task = asyncio.create_task(monitor_and_shutdown())
|
|
1092
|
+
|
|
1093
|
+
try:
|
|
1094
|
+
# Wait for worker to complete
|
|
1095
|
+
await worker_run_task
|
|
1096
|
+
logger.info("single_execution_worker_stopped")
|
|
1097
|
+
finally:
|
|
1098
|
+
# Cancel monitor task if still running
|
|
1099
|
+
if not monitor_task.done():
|
|
1100
|
+
monitor_task.cancel()
|
|
1101
|
+
try:
|
|
1102
|
+
await monitor_task
|
|
1103
|
+
except asyncio.CancelledError:
|
|
1104
|
+
pass
|
|
1105
|
+
else:
|
|
1106
|
+
# Normal mode - run indefinitely
|
|
1107
|
+
await worker.run()
|
|
1108
|
+
finally:
|
|
1109
|
+
# Stop WebSocket client
|
|
1110
|
+
await control_plane_client.stop_websocket()
|
|
1111
|
+
|
|
1112
|
+
# Cancel heartbeat task when worker stops
|
|
1113
|
+
heartbeat_task.cancel()
|
|
1114
|
+
try:
|
|
1115
|
+
await heartbeat_task
|
|
1116
|
+
except asyncio.CancelledError:
|
|
1117
|
+
pass
|
|
1118
|
+
|
|
1119
|
+
# Notify control plane of graceful shutdown
|
|
1120
|
+
print()
|
|
1121
|
+
ProgressUI.step("⏳", "Shutting down gracefully...")
|
|
1122
|
+
|
|
1123
|
+
# Delete ephemeral queue if we're the owner (single execution mode)
|
|
1124
|
+
if config.queue_ephemeral and config.queue_single_execution and config.queue_id:
|
|
1125
|
+
try:
|
|
1126
|
+
await delete_ephemeral_queue(
|
|
1127
|
+
config=config,
|
|
1128
|
+
kubiya_api_key=kubiya_api_key,
|
|
1129
|
+
queue_id=config.queue_id
|
|
1130
|
+
)
|
|
1131
|
+
logger.info("ephemeral_queue_cleaned_up", queue_id=config.queue_id)
|
|
1132
|
+
except Exception as e:
|
|
1133
|
+
logger.warning(
|
|
1134
|
+
"ephemeral_queue_cleanup_failed",
|
|
1135
|
+
queue_id=config.queue_id,
|
|
1136
|
+
error=str(e)
|
|
1137
|
+
)
|
|
1138
|
+
# Continue shutdown even if delete fails (TTL will handle it)
|
|
1139
|
+
|
|
1140
|
+
await send_disconnect(
|
|
1141
|
+
config=config,
|
|
1142
|
+
kubiya_api_key=kubiya_api_key,
|
|
1143
|
+
reason="shutdown",
|
|
1144
|
+
exit_code=0
|
|
1145
|
+
)
|
|
1146
|
+
ProgressUI.success("✓", "Worker stopped")
|
|
1147
|
+
print()
|
|
1148
|
+
|
|
1149
|
+
except KeyboardInterrupt:
|
|
1150
|
+
print()
|
|
1151
|
+
ProgressUI.step("⏳", "Shutting down...")
|
|
1152
|
+
|
|
1153
|
+
# Stop health monitor if running
|
|
1154
|
+
if health_monitor is not None:
|
|
1155
|
+
try:
|
|
1156
|
+
await health_monitor.stop()
|
|
1157
|
+
except Exception as e:
|
|
1158
|
+
logger.warning("health_monitor_stop_failed", error=str(e))
|
|
1159
|
+
|
|
1160
|
+
# Stop agent-runtime server if running
|
|
1161
|
+
if agent_runtime_server is not None:
|
|
1162
|
+
try:
|
|
1163
|
+
ProgressUI.step("⏳", "Stopping agent-runtime server...")
|
|
1164
|
+
agent_runtime_server.stop(timeout=10)
|
|
1165
|
+
ProgressUI.success("✓", "Agent runtime stopped")
|
|
1166
|
+
except Exception as e:
|
|
1167
|
+
logger.warning("agent_runtime_stop_failed_on_interrupt", error=str(e))
|
|
1168
|
+
|
|
1169
|
+
# Stop WebSocket client
|
|
1170
|
+
from control_plane_api.worker.control_plane_client import get_control_plane_client
|
|
1171
|
+
try:
|
|
1172
|
+
control_plane_client = get_control_plane_client()
|
|
1173
|
+
await control_plane_client.stop_websocket()
|
|
1174
|
+
except:
|
|
1175
|
+
pass
|
|
1176
|
+
|
|
1177
|
+
# Notify control plane of keyboard interrupt (only if config was successfully obtained)
|
|
1178
|
+
try:
|
|
1179
|
+
if 'config' in locals():
|
|
1180
|
+
# Delete ephemeral queue if we're the owner
|
|
1181
|
+
if config.queue_ephemeral and config.queue_single_execution and config.queue_id:
|
|
1182
|
+
try:
|
|
1183
|
+
await delete_ephemeral_queue(
|
|
1184
|
+
config=config,
|
|
1185
|
+
kubiya_api_key=kubiya_api_key,
|
|
1186
|
+
queue_id=config.queue_id
|
|
1187
|
+
)
|
|
1188
|
+
except Exception as e:
|
|
1189
|
+
logger.warning(
|
|
1190
|
+
"ephemeral_queue_cleanup_on_interrupt_failed",
|
|
1191
|
+
error=str(e)
|
|
1192
|
+
)
|
|
1193
|
+
|
|
1194
|
+
await send_disconnect(
|
|
1195
|
+
config=config,
|
|
1196
|
+
kubiya_api_key=kubiya_api_key,
|
|
1197
|
+
reason="shutdown",
|
|
1198
|
+
exit_code=0
|
|
1199
|
+
)
|
|
1200
|
+
ProgressUI.success("✓", "Worker stopped")
|
|
1201
|
+
else:
|
|
1202
|
+
logger.info("shutdown_before_registration_completed")
|
|
1203
|
+
except Exception as e:
|
|
1204
|
+
logger.warning("disconnect_on_interrupt_failed", error=str(e))
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
import traceback
|
|
1207
|
+
logger.error("temporal_worker_error", error=str(e), traceback=traceback.format_exc())
|
|
1208
|
+
|
|
1209
|
+
# Stop health monitor if running
|
|
1210
|
+
if health_monitor is not None:
|
|
1211
|
+
try:
|
|
1212
|
+
await health_monitor.stop()
|
|
1213
|
+
except Exception as stop_error:
|
|
1214
|
+
logger.warning("health_monitor_stop_failed_on_error", error=str(stop_error))
|
|
1215
|
+
|
|
1216
|
+
# Stop agent-runtime server if running
|
|
1217
|
+
if agent_runtime_server is not None:
|
|
1218
|
+
try:
|
|
1219
|
+
logger.info("stopping_agent_runtime_on_error")
|
|
1220
|
+
agent_runtime_server.stop(timeout=10)
|
|
1221
|
+
logger.info("agent_runtime_stopped_on_error")
|
|
1222
|
+
except Exception as stop_error:
|
|
1223
|
+
logger.warning("agent_runtime_stop_failed_on_error", error=str(stop_error))
|
|
1224
|
+
|
|
1225
|
+
# Notify control plane of error (only if config was successfully obtained)
|
|
1226
|
+
try:
|
|
1227
|
+
if 'config' in locals():
|
|
1228
|
+
await send_disconnect(
|
|
1229
|
+
config=config,
|
|
1230
|
+
kubiya_api_key=kubiya_api_key,
|
|
1231
|
+
reason="error",
|
|
1232
|
+
exit_code=1,
|
|
1233
|
+
error_message=str(e)[:2000] + (" [truncated]" if len(str(e)) > 2000 else "")
|
|
1234
|
+
)
|
|
1235
|
+
else:
|
|
1236
|
+
logger.warning("disconnect_skipped_no_config", error="Worker failed before registration completed")
|
|
1237
|
+
except Exception as disconnect_error:
|
|
1238
|
+
logger.warning("disconnect_on_error_failed", error=str(disconnect_error))
|
|
1239
|
+
raise
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
def main():
|
|
1243
|
+
"""Main entry point with CLI argument support"""
|
|
1244
|
+
import argparse
|
|
1245
|
+
|
|
1246
|
+
# Parse CLI arguments
|
|
1247
|
+
parser = argparse.ArgumentParser(
|
|
1248
|
+
description="Kubiya Agent Worker - Temporal worker for agent execution"
|
|
1249
|
+
)
|
|
1250
|
+
parser.add_argument(
|
|
1251
|
+
"--queue-id",
|
|
1252
|
+
type=str,
|
|
1253
|
+
help="Worker queue ID (can also use QUEUE_ID env var)"
|
|
1254
|
+
)
|
|
1255
|
+
parser.add_argument(
|
|
1256
|
+
"--api-key",
|
|
1257
|
+
type=str,
|
|
1258
|
+
help="Kubiya API key (can also use KUBIYA_API_KEY env var)"
|
|
1259
|
+
)
|
|
1260
|
+
parser.add_argument(
|
|
1261
|
+
"--control-plane-url",
|
|
1262
|
+
type=str,
|
|
1263
|
+
help="Control plane URL (can also use CONTROL_PLANE_URL env var)"
|
|
1264
|
+
)
|
|
1265
|
+
parser.add_argument(
|
|
1266
|
+
"--heartbeat-interval",
|
|
1267
|
+
type=int,
|
|
1268
|
+
default=60,
|
|
1269
|
+
help="Heartbeat interval in seconds (default: 60, lightweight mode)"
|
|
1270
|
+
)
|
|
1271
|
+
|
|
1272
|
+
args = parser.parse_args()
|
|
1273
|
+
|
|
1274
|
+
# Set environment variables from CLI args if not already set
|
|
1275
|
+
# Environment variables take precedence over CLI args (safer)
|
|
1276
|
+
if args.queue_id and not os.environ.get("QUEUE_ID"):
|
|
1277
|
+
os.environ["QUEUE_ID"] = args.queue_id
|
|
1278
|
+
if args.api_key and not os.environ.get("KUBIYA_API_KEY"):
|
|
1279
|
+
os.environ["KUBIYA_API_KEY"] = args.api_key
|
|
1280
|
+
if args.control_plane_url and not os.environ.get("CONTROL_PLANE_URL"):
|
|
1281
|
+
os.environ["CONTROL_PLANE_URL"] = args.control_plane_url
|
|
1282
|
+
if args.heartbeat_interval and not os.environ.get("HEARTBEAT_INTERVAL"):
|
|
1283
|
+
os.environ["HEARTBEAT_INTERVAL"] = str(args.heartbeat_interval)
|
|
1284
|
+
|
|
1285
|
+
logger.info("worker_starting")
|
|
1286
|
+
|
|
1287
|
+
try:
|
|
1288
|
+
asyncio.run(run_worker())
|
|
1289
|
+
except KeyboardInterrupt:
|
|
1290
|
+
logger.info("worker_stopped")
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
logger.error("worker_failed", error=str(e))
|
|
1293
|
+
sys.exit(1)
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
if __name__ == "__main__":
|
|
1297
|
+
main()
|