PyPI - kubiya-control-plane-api - Versions diffs - 0.9.15__py3-none-any.whl - Mend

kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (479) hide show

control_plane_api/LICENSE +676 -0
control_plane_api/README.md +350 -0
control_plane_api/__init__.py +4 -0
control_plane_api/__version__.py +8 -0
control_plane_api/alembic/README +1 -0
control_plane_api/alembic/env.py +121 -0
control_plane_api/alembic/script.py.mako +28 -0
control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
control_plane_api/alembic.ini +148 -0
control_plane_api/api/index.py +12 -0
control_plane_api/app/__init__.py +11 -0
control_plane_api/app/activities/__init__.py +20 -0
control_plane_api/app/activities/agent_activities.py +384 -0
control_plane_api/app/activities/plan_generation_activities.py +499 -0
control_plane_api/app/activities/team_activities.py +424 -0
control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
control_plane_api/app/config/__init__.py +35 -0
control_plane_api/app/config/api_config.py +469 -0
control_plane_api/app/config/config_loader.py +224 -0
control_plane_api/app/config/model_pricing.py +323 -0
control_plane_api/app/config/storage_config.py +159 -0
control_plane_api/app/config.py +115 -0
control_plane_api/app/controllers/__init__.py +0 -0
control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
control_plane_api/app/database.py +135 -0
control_plane_api/app/exceptions.py +408 -0
control_plane_api/app/lib/__init__.py +11 -0
control_plane_api/app/lib/environment.py +65 -0
control_plane_api/app/lib/event_bus/__init__.py +17 -0
control_plane_api/app/lib/event_bus/base.py +136 -0
control_plane_api/app/lib/event_bus/manager.py +335 -0
control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
control_plane_api/app/lib/job_executor.py +330 -0
control_plane_api/app/lib/kubiya_client.py +293 -0
control_plane_api/app/lib/litellm_pricing.py +166 -0
control_plane_api/app/lib/mcp_validation.py +163 -0
control_plane_api/app/lib/nats/__init__.py +13 -0
control_plane_api/app/lib/nats/credentials_manager.py +288 -0
control_plane_api/app/lib/nats/listener.py +374 -0
control_plane_api/app/lib/planning_prompt_builder.py +153 -0
control_plane_api/app/lib/planning_tools/__init__.py +41 -0
control_plane_api/app/lib/planning_tools/agents.py +409 -0
control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
control_plane_api/app/lib/planning_tools/base.py +119 -0
control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
control_plane_api/app/lib/planning_tools/environments.py +218 -0
control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
control_plane_api/app/lib/planning_tools/models.py +93 -0
control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
control_plane_api/app/lib/planning_tools/resources.py +242 -0
control_plane_api/app/lib/planning_tools/teams.py +334 -0
control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
control_plane_api/app/lib/redis_client.py +803 -0
control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
control_plane_api/app/lib/storage/__init__.py +20 -0
control_plane_api/app/lib/storage/base_provider.py +274 -0
control_plane_api/app/lib/storage/provider_factory.py +157 -0
control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
control_plane_api/app/lib/supabase.py +71 -0
control_plane_api/app/lib/supabase_utils.py +138 -0
control_plane_api/app/lib/task_planning/__init__.py +138 -0
control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
control_plane_api/app/lib/task_planning/agents.py +389 -0
control_plane_api/app/lib/task_planning/cache.py +218 -0
control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
control_plane_api/app/lib/task_planning/helpers.py +293 -0
control_plane_api/app/lib/task_planning/hooks.py +474 -0
control_plane_api/app/lib/task_planning/models.py +503 -0
control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
control_plane_api/app/lib/task_planning/runner.py +656 -0
control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
control_plane_api/app/lib/task_planning/workflow.py +424 -0
control_plane_api/app/lib/templating/__init__.py +88 -0
control_plane_api/app/lib/templating/compiler.py +278 -0
control_plane_api/app/lib/templating/engine.py +178 -0
control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
control_plane_api/app/lib/templating/parsers/base.py +96 -0
control_plane_api/app/lib/templating/parsers/env.py +85 -0
control_plane_api/app/lib/templating/parsers/graph.py +112 -0
control_plane_api/app/lib/templating/parsers/secret.py +87 -0
control_plane_api/app/lib/templating/parsers/simple.py +81 -0
control_plane_api/app/lib/templating/resolver.py +366 -0
control_plane_api/app/lib/templating/types.py +214 -0
control_plane_api/app/lib/templating/validator.py +201 -0
control_plane_api/app/lib/temporal_client.py +232 -0
control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
control_plane_api/app/lib/temporal_credentials_service.py +203 -0
control_plane_api/app/lib/validation/__init__.py +24 -0
control_plane_api/app/lib/validation/runtime_validation.py +388 -0
control_plane_api/app/main.py +531 -0
control_plane_api/app/middleware/__init__.py +10 -0
control_plane_api/app/middleware/auth.py +645 -0
control_plane_api/app/middleware/exception_handler.py +267 -0
control_plane_api/app/middleware/prometheus_middleware.py +173 -0
control_plane_api/app/middleware/rate_limiting.py +384 -0
control_plane_api/app/middleware/request_id.py +202 -0
control_plane_api/app/models/__init__.py +40 -0
control_plane_api/app/models/agent.py +90 -0
control_plane_api/app/models/analytics.py +206 -0
control_plane_api/app/models/associations.py +107 -0
control_plane_api/app/models/auth_user.py +73 -0
control_plane_api/app/models/context.py +161 -0
control_plane_api/app/models/custom_integration.py +99 -0
control_plane_api/app/models/environment.py +64 -0
control_plane_api/app/models/execution.py +125 -0
control_plane_api/app/models/execution_transition.py +50 -0
control_plane_api/app/models/job.py +159 -0
control_plane_api/app/models/llm_model.py +78 -0
control_plane_api/app/models/orchestration.py +66 -0
control_plane_api/app/models/plan_execution.py +102 -0
control_plane_api/app/models/presence.py +49 -0
control_plane_api/app/models/project.py +61 -0
control_plane_api/app/models/project_management.py +85 -0
control_plane_api/app/models/session.py +29 -0
control_plane_api/app/models/skill.py +155 -0
control_plane_api/app/models/system_tables.py +43 -0
control_plane_api/app/models/task_planning.py +372 -0
control_plane_api/app/models/team.py +86 -0
control_plane_api/app/models/trace.py +257 -0
control_plane_api/app/models/user_profile.py +54 -0
control_plane_api/app/models/worker.py +221 -0
control_plane_api/app/models/workflow.py +161 -0
control_plane_api/app/models/workspace.py +50 -0
control_plane_api/app/observability/__init__.py +177 -0
control_plane_api/app/observability/context_logging.py +475 -0
control_plane_api/app/observability/decorators.py +337 -0
control_plane_api/app/observability/local_span_processor.py +702 -0
control_plane_api/app/observability/metrics.py +303 -0
control_plane_api/app/observability/middleware.py +246 -0
control_plane_api/app/observability/optional.py +115 -0
control_plane_api/app/observability/tracing.py +382 -0
control_plane_api/app/policies/README.md +149 -0
control_plane_api/app/policies/approved_users.rego +62 -0
control_plane_api/app/policies/business_hours.rego +51 -0
control_plane_api/app/policies/rate_limiting.rego +100 -0
control_plane_api/app/policies/tool_enforcement/README.md +336 -0
control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
control_plane_api/app/policies/tool_restrictions.rego +86 -0
control_plane_api/app/routers/__init__.py +4 -0
control_plane_api/app/routers/agents.py +382 -0
control_plane_api/app/routers/agents_v2.py +1598 -0
control_plane_api/app/routers/analytics.py +1310 -0
control_plane_api/app/routers/auth.py +59 -0
control_plane_api/app/routers/client_config.py +57 -0
control_plane_api/app/routers/context_graph.py +561 -0
control_plane_api/app/routers/context_manager.py +577 -0
control_plane_api/app/routers/custom_integrations.py +490 -0
control_plane_api/app/routers/enforcer.py +132 -0
control_plane_api/app/routers/environment_context.py +252 -0
control_plane_api/app/routers/environments.py +761 -0
control_plane_api/app/routers/execution_environment.py +847 -0
control_plane_api/app/routers/executions/__init__.py +28 -0
control_plane_api/app/routers/executions/router.py +286 -0
control_plane_api/app/routers/executions/services/__init__.py +22 -0
control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
control_plane_api/app/routers/executions/services/status_service.py +420 -0
control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
control_plane_api/app/routers/executions/services/worker_health.py +514 -0
control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
control_plane_api/app/routers/executions.py +4888 -0
control_plane_api/app/routers/health.py +165 -0
control_plane_api/app/routers/health_v2.py +394 -0
control_plane_api/app/routers/integration_templates.py +496 -0
control_plane_api/app/routers/integrations.py +287 -0
control_plane_api/app/routers/jobs.py +1809 -0
control_plane_api/app/routers/metrics.py +517 -0
control_plane_api/app/routers/models.py +82 -0
control_plane_api/app/routers/models_v2.py +628 -0
control_plane_api/app/routers/plan_executions.py +1481 -0
control_plane_api/app/routers/plan_generation_async.py +304 -0
control_plane_api/app/routers/policies.py +669 -0
control_plane_api/app/routers/presence.py +234 -0
control_plane_api/app/routers/projects.py +987 -0
control_plane_api/app/routers/runners.py +379 -0
control_plane_api/app/routers/runtimes.py +172 -0
control_plane_api/app/routers/secrets.py +171 -0
control_plane_api/app/routers/skills.py +1010 -0
control_plane_api/app/routers/skills_definitions.py +140 -0
control_plane_api/app/routers/storage.py +456 -0
control_plane_api/app/routers/task_planning.py +611 -0
control_plane_api/app/routers/task_queues.py +650 -0
control_plane_api/app/routers/team_context.py +274 -0
control_plane_api/app/routers/teams.py +1747 -0
control_plane_api/app/routers/templates.py +248 -0
control_plane_api/app/routers/traces.py +571 -0
control_plane_api/app/routers/websocket_client.py +479 -0
control_plane_api/app/routers/websocket_executions_status.py +437 -0
control_plane_api/app/routers/websocket_gateway.py +323 -0
control_plane_api/app/routers/websocket_traces.py +576 -0
control_plane_api/app/routers/worker_queues.py +2555 -0
control_plane_api/app/routers/worker_websocket.py +419 -0
control_plane_api/app/routers/workers.py +1004 -0
control_plane_api/app/routers/workflows.py +204 -0
control_plane_api/app/runtimes/__init__.py +6 -0
control_plane_api/app/runtimes/validation.py +344 -0
control_plane_api/app/schemas/__init__.py +1 -0
control_plane_api/app/schemas/job_schemas.py +302 -0
control_plane_api/app/schemas/mcp_schemas.py +311 -0
control_plane_api/app/schemas/template_schemas.py +133 -0
control_plane_api/app/schemas/trace_schemas.py +168 -0
control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
control_plane_api/app/services/__init__.py +1 -0
control_plane_api/app/services/agno_planning_strategy.py +233 -0
control_plane_api/app/services/agno_service.py +838 -0
control_plane_api/app/services/claude_code_planning_service.py +203 -0
control_plane_api/app/services/context_graph_client.py +224 -0
control_plane_api/app/services/custom_integration_service.py +415 -0
control_plane_api/app/services/integration_resolution_service.py +345 -0
control_plane_api/app/services/litellm_service.py +394 -0
control_plane_api/app/services/plan_generator.py +79 -0
control_plane_api/app/services/planning_strategy.py +66 -0
control_plane_api/app/services/planning_strategy_factory.py +118 -0
control_plane_api/app/services/policy_service.py +615 -0
control_plane_api/app/services/state_transition_service.py +755 -0
control_plane_api/app/services/storage_service.py +593 -0
control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
control_plane_api/app/services/trace_retention.py +354 -0
control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
control_plane_api/app/services/workflow_operations_service.py +611 -0
control_plane_api/app/skills/__init__.py +100 -0
control_plane_api/app/skills/base.py +239 -0
control_plane_api/app/skills/builtin/__init__.py +37 -0
control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
control_plane_api/app/skills/builtin/docker/skill.py +104 -0
control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
control_plane_api/app/skills/builtin/python/__init__.py +4 -0
control_plane_api/app/skills/builtin/python/skill.py +92 -0
control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
control_plane_api/app/skills/builtin/shell/skill.py +161 -0
control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
control_plane_api/app/skills/builtin/slack/skill.py +302 -0
control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
control_plane_api/app/skills/business_intelligence.py +189 -0
control_plane_api/app/skills/config.py +63 -0
control_plane_api/app/skills/loaders/__init__.py +14 -0
control_plane_api/app/skills/loaders/base.py +73 -0
control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
control_plane_api/app/skills/registry.py +125 -0
control_plane_api/app/utils/helpers.py +12 -0
control_plane_api/app/utils/workflow_executor.py +354 -0
control_plane_api/app/workflows/__init__.py +11 -0
control_plane_api/app/workflows/agent_execution.py +520 -0
control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
control_plane_api/app/workflows/namespace_provisioning.py +326 -0
control_plane_api/app/workflows/plan_generation.py +254 -0
control_plane_api/app/workflows/team_execution.py +442 -0
control_plane_api/scripts/seed_models.py +240 -0
control_plane_api/scripts/validate_existing_tool_names.py +492 -0
control_plane_api/shared/__init__.py +8 -0
control_plane_api/shared/version.py +17 -0
control_plane_api/test_deduplication.py +274 -0
control_plane_api/test_executor_deduplication_e2e.py +309 -0
control_plane_api/test_job_execution_e2e.py +283 -0
control_plane_api/test_real_integration.py +193 -0
control_plane_api/version.py +38 -0
control_plane_api/worker/__init__.py +0 -0
control_plane_api/worker/activities/__init__.py +0 -0
control_plane_api/worker/activities/agent_activities.py +1585 -0
control_plane_api/worker/activities/approval_activities.py +234 -0
control_plane_api/worker/activities/job_activities.py +199 -0
control_plane_api/worker/activities/runtime_activities.py +1167 -0
control_plane_api/worker/activities/skill_activities.py +282 -0
control_plane_api/worker/activities/team_activities.py +479 -0
control_plane_api/worker/agent_runtime_server.py +370 -0
control_plane_api/worker/binary_manager.py +333 -0
control_plane_api/worker/config/__init__.py +31 -0
control_plane_api/worker/config/worker_config.py +273 -0
control_plane_api/worker/control_plane_client.py +1491 -0
control_plane_api/worker/examples/analytics_integration_example.py +362 -0
control_plane_api/worker/health_monitor.py +159 -0
control_plane_api/worker/metrics.py +237 -0
control_plane_api/worker/models/__init__.py +1 -0
control_plane_api/worker/models/error_events.py +105 -0
control_plane_api/worker/models/inputs.py +89 -0
control_plane_api/worker/runtimes/__init__.py +35 -0
control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
control_plane_api/worker/runtimes/agno/__init__.py +34 -0
control_plane_api/worker/runtimes/agno/config.py +248 -0
control_plane_api/worker/runtimes/agno/hooks.py +385 -0
control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
control_plane_api/worker/runtimes/agno/utils.py +163 -0
control_plane_api/worker/runtimes/base.py +979 -0
control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
control_plane_api/worker/runtimes/claude_code/config.py +829 -0
control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
control_plane_api/worker/runtimes/factory.py +173 -0
control_plane_api/worker/runtimes/model_utils.py +107 -0
control_plane_api/worker/runtimes/validation.py +93 -0
control_plane_api/worker/services/__init__.py +1 -0
control_plane_api/worker/services/agent_communication_tools.py +908 -0
control_plane_api/worker/services/agent_executor.py +485 -0
control_plane_api/worker/services/agent_executor_v2.py +793 -0
control_plane_api/worker/services/analytics_collector.py +457 -0
control_plane_api/worker/services/analytics_service.py +464 -0
control_plane_api/worker/services/approval_tools.py +310 -0
control_plane_api/worker/services/approval_tools_agno.py +207 -0
control_plane_api/worker/services/cancellation_manager.py +177 -0
control_plane_api/worker/services/code_ingestion_tools.py +465 -0
control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
control_plane_api/worker/services/data_visualization.py +834 -0
control_plane_api/worker/services/event_publisher.py +531 -0
control_plane_api/worker/services/jira_tools.py +257 -0
control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
control_plane_api/worker/services/runtime_analytics.py +328 -0
control_plane_api/worker/services/session_service.py +365 -0
control_plane_api/worker/services/skill_context_enhancement.py +181 -0
control_plane_api/worker/services/skill_factory.py +471 -0
control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
control_plane_api/worker/services/team_executor.py +715 -0
control_plane_api/worker/services/team_executor_v2.py +1866 -0
control_plane_api/worker/services/tool_enforcement.py +254 -0
control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
control_plane_api/worker/services/workflow_executor/models.py +142 -0
control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
control_plane_api/worker/skills/__init__.py +12 -0
control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
control_plane_api/worker/skills/loaders/__init__.py +5 -0
control_plane_api/worker/skills/loaders/base.py +23 -0
control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
control_plane_api/worker/skills/registry.py +208 -0
control_plane_api/worker/tests/__init__.py +1 -0
control_plane_api/worker/tests/conftest.py +12 -0
control_plane_api/worker/tests/e2e/__init__.py +0 -0
control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
control_plane_api/worker/tests/integration/__init__.py +0 -0
control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
control_plane_api/worker/tests/unit/__init__.py +0 -0
control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
control_plane_api/worker/utils/__init__.py +1 -0
control_plane_api/worker/utils/chunk_batcher.py +330 -0
control_plane_api/worker/utils/environment.py +65 -0
control_plane_api/worker/utils/error_publisher.py +260 -0
control_plane_api/worker/utils/event_batcher.py +256 -0
control_plane_api/worker/utils/logging_config.py +335 -0
control_plane_api/worker/utils/logging_helper.py +326 -0
control_plane_api/worker/utils/parameter_validator.py +120 -0
control_plane_api/worker/utils/retry_utils.py +60 -0
control_plane_api/worker/utils/streaming_utils.py +665 -0
control_plane_api/worker/utils/tool_validation.py +332 -0
control_plane_api/worker/utils/workspace_manager.py +163 -0
control_plane_api/worker/websocket_client.py +393 -0
control_plane_api/worker/worker.py +1297 -0
control_plane_api/worker/workflows/__init__.py +0 -0
control_plane_api/worker/workflows/agent_execution.py +909 -0
control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
control_plane_api/worker/workflows/team_execution.py +611 -0
kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
scripts/__init__.py +1 -0
scripts/migrations.py +39 -0
scripts/seed_worker_queues.py +128 -0
scripts/setup_agent_runtime.py +142 -0
worker_internal/__init__.py +1 -0
worker_internal/planner/__init__.py +1 -0
worker_internal/planner/activities.py +1499 -0
worker_internal/planner/agent_tools.py +197 -0
worker_internal/planner/event_models.py +148 -0
worker_internal/planner/event_publisher.py +67 -0
worker_internal/planner/models.py +199 -0
worker_internal/planner/retry_logic.py +134 -0
worker_internal/planner/worker.py +300 -0
worker_internal/planner/workflows.py +970 -0

control_plane_api/worker/workflows/agent_execution.py ADDED Viewed

@@ -0,0 +1,909 @@
+"""Agent execution workflow for Temporal"""
+from dataclasses import dataclass, field
+from datetime import timedelta
+from typing import Optional, List, Dict, Any
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+import asyncio
+import os
+with workflow.unsafe.imports_passed_through():
+    from control_plane_api.worker.activities.agent_activities import (
+        execute_agent_llm,
+        update_execution_status,
+        get_execution_details,
+        update_agent_status,
+        persist_conversation_history,
+        submit_runtime_analytics_activity,
+        ActivityExecuteAgentInput,
+        ActivityUpdateExecutionInput,
+        ActivityGetExecutionInput,
+        ActivityUpdateAgentInput,
+        ActivityPersistConversationInput,
+        AnalyticsActivityInput,
+    )
+    from control_plane_api.worker.activities.runtime_activities import (
+        execute_with_runtime,
+        publish_user_message,
+        ActivityRuntimeExecuteInput,
+        PublishUserMessageInput,
+    )
+    from control_plane_api.worker.utils.logging_helper import execution_logger
+# Heartbeat timeout: Prove activity is alive (default 30 minutes)
+# This should be reasonable - heartbeats confirm the activity hasn't crashed
+HEARTBEAT_TIMEOUT_SECONDS = int(os.environ.get("ACTIVITY_HEARTBEAT_TIMEOUT_SECONDS", "1800"))
+# Activity execution timeout: Total time for activity to complete (default 24 hours)
+# This is the maximum time an activity can run. For streaming workflows, this should be VERY long
+# since the activity may stream for hours while the user interacts with the agent
+ACTIVITY_EXECUTION_TIMEOUT_SECONDS = int(os.environ.get("ACTIVITY_EXECUTION_TIMEOUT_SECONDS", "86400"))
+@dataclass
+class AgentExecutionInput:
+    """Input for agent execution workflow"""
+    # Required fields (no defaults)
+    agent_id: str
+    organization_id: str
+    prompt: str
+    # Optional fields (with defaults)
+    execution_id: Optional[str] = None  # Optional for backward compatibility with old schedules
+    system_prompt: Optional[str] = None
+    model_id: Optional[str] = None
+    model_config: dict = None
+    agent_config: dict = None
+    mcp_servers: dict = None  # MCP servers configuration
+    user_metadata: dict = None
+    runtime_type: str = "default"  # "default" (Agno) or "claude_code"
+    initial_message_timestamp: Optional[str] = None  # Real-time timestamp for initial message
+    def __post_init__(self):
+        if self.model_config is None:
+            self.model_config = {}
+        if self.agent_config is None:
+            self.agent_config = {}
+        if self.mcp_servers is None:
+            self.mcp_servers = {}
+        if self.user_metadata is None:
+            self.user_metadata = {}
+@dataclass
+class TeamExecutionInput:
+    """Input for team execution workflow (uses same workflow as agent)"""
+    # Required fields (no defaults)
+    team_id: str
+    organization_id: str
+    prompt: str
+    # Optional fields (with defaults)
+    execution_id: Optional[str] = None  # Optional for backward compatibility with old schedules
+    system_prompt: Optional[str] = None
+    model_id: Optional[str] = None
+    model_config: dict = None
+    team_config: dict = None
+    mcp_servers: dict = None  # MCP servers configuration
+    user_metadata: dict = None
+    runtime_type: str = "default"  # "default" (Agno) or "claude_code"
+    initial_message_timestamp: Optional[str] = None  # Real-time timestamp for initial message
+    def __post_init__(self):
+        if self.model_config is None:
+            self.model_config = {}
+        if self.team_config is None:
+            self.team_config = {}
+        if self.mcp_servers is None:
+            self.mcp_servers = {}
+        if self.user_metadata is None:
+            self.user_metadata = {}
+    def to_agent_input(self) -> AgentExecutionInput:
+        """Convert TeamExecutionInput to AgentExecutionInput for workflow reuse"""
+        return AgentExecutionInput(
+            execution_id=self.execution_id,
+            agent_id=self.team_id,  # Use team_id as agent_id
+            organization_id=self.organization_id,
+            prompt=self.prompt,
+            system_prompt=self.system_prompt,
+            model_id=self.model_id,
+            model_config=self.model_config,
+            agent_config=self.team_config,
+            mcp_servers=self.mcp_servers,
+            user_metadata=self.user_metadata,
+            runtime_type=self.runtime_type,
+            initial_message_timestamp=self.initial_message_timestamp,
+        )
+@dataclass
+class ChatMessage:
+    """Represents a message in the conversation"""
+    role: str  # "user", "assistant", "system", "tool"
+    content: str
+    timestamp: str
+    tool_name: Optional[str] = None
+    tool_input: Optional[Dict[str, Any]] = None
+    tool_output: Optional[Dict[str, Any]] = None
+    message_id: Optional[str] = None  # Unique identifier for deduplication
+    user_id: Optional[str] = None  # User who sent the message
+    user_name: Optional[str] = None
+    user_email: Optional[str] = None
+    user_avatar: Optional[str] = None
+@dataclass
+class ExecutionState:
+    """Current state of the execution for queries"""
+    status: str  # "pending", "running", "waiting_for_input", "completed", "failed"
+    messages: List[ChatMessage] = field(default_factory=list)
+    current_response: str = ""
+    error_message: Optional[str] = None
+    usage: Dict[str, Any] = field(default_factory=dict)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    is_waiting_for_input: bool = False
+    should_complete: bool = False
+@workflow.defn
+class AgentExecutionWorkflow:
+    """
+    Workflow for executing an agent with LLM with Temporal message passing support.
+    This workflow:
+    1. Updates execution status to running
+    2. Executes the agent's LLM call
+    3. Updates execution with results
+    4. Updates agent status
+    5. Supports queries for real-time state access
+    6. Supports signals for adding followup messages
+    """
+    def __init__(self) -> None:
+        """Initialize workflow state"""
+        self._state = ExecutionState(status="pending")
+        self._lock = asyncio.Lock()
+        self._new_message_count = 0
+        self._processed_message_count = 0
+    def _messages_to_dict(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
+        """
+        Convert ChatMessage objects to dict format for persistence.
+        This ensures the conversation history is in a clean, serializable format
+        that can be stored in the database and retrieved later.
+        Args:
+            messages: List of ChatMessage objects from workflow state
+        Returns:
+            List of message dicts ready for persistence
+        """
+        return [
+            {
+                "role": msg.role,
+                "content": msg.content,
+                "timestamp": msg.timestamp,
+                "tool_name": msg.tool_name,
+                "tool_input": msg.tool_input,
+                "tool_output": msg.tool_output,
+                "tool_execution_id": getattr(msg, "tool_execution_id", None),  # CRITICAL: For tool message deduplication
+                "message_id": getattr(msg, "message_id", None),  # CRITICAL: For message deduplication
+                "user_id": getattr(msg, "user_id", None),
+                "user_name": getattr(msg, "user_name", None),
+                "user_email": getattr(msg, "user_email", None),
+                "user_avatar": getattr(msg, "user_avatar", None),
+            }
+            for msg in messages
+        ]
+    @workflow.query
+    def get_state(self) -> ExecutionState:
+        """Query handler: Get current execution state including messages and status"""
+        return self._state
+    @workflow.signal
+    async def add_message(self, message: ChatMessage) -> None:
+        """
+        Signal handler: Add a message to the conversation.
+        This allows clients to send followup messages while the workflow is running.
+        The workflow will wake up and process this message.
+        """
+        async with self._lock:
+            self._state.messages.append(message)
+            self._new_message_count += 1
+            self._state.is_waiting_for_input = False
+            workflow.logger.info(
+                f"Message added to conversation",
+                extra={
+                    "role": message.role,
+                    "content_preview": message.content[:100] if message.content else "",
+                    "total_messages": len(self._state.messages)
+                }
+            )
+    @workflow.signal
+    async def mark_as_done(self) -> None:
+        """
+        Signal handler: Mark the workflow as complete.
+        This signals that the user is done with the conversation and the workflow should complete.
+        """
+        async with self._lock:
+            self._state.should_complete = True
+            self._state.is_waiting_for_input = False
+            workflow.logger.info("Workflow marked as done by user")
+    @workflow.run
+    async def run(self, input: AgentExecutionInput) -> dict:
+        """
+        Run the agent execution workflow with Human-in-the-Loop (HITL) pattern.
+        This workflow implements a continuous conversation loop:
+        1. Process the initial user message
+        2. Execute LLM and return response
+        3. Wait for user input (signals)
+        4. Process followup messages in a loop
+        5. Only complete when user explicitly marks as done
+        Args:
+            input: Workflow input with execution details
+        Returns:
+            Execution result dict with response, usage, etc.
+        """
+        # Generate execution_id if not provided (for backward compatibility with old schedules)
+        execution_id = input.execution_id
+        if not execution_id:
+            execution_id = workflow.uuid4()
+            workflow.logger.info(
+                "Generated execution_id for backward compatibility",
+                extra={"execution_id": execution_id}
+            )
+            # Update input object to use generated ID
+            input.execution_id = execution_id
+        # Removed: execution start logging (was appearing for all workers, possibly due to Temporal replays)
+        # execution_logger.execution_started(
+        #     input.execution_id,
+        #     agent_id=input.agent_id,
+        #     model=input.model_id,
+        #     runtime=input.runtime_type
+        # )
+        workflow.logger.info(
+            f"Starting agent execution workflow with HITL pattern",
+            extra={
+                "execution_id": input.execution_id,
+                "agent_id": input.agent_id,
+                "organization_id": input.organization_id,
+            }
+        )
+        # Initialize state with user's initial message
+        # CRITICAL: Use real-time timestamp (not workflow.now()) to ensure chronological ordering
+        # This prevents timestamp mismatches between initial and follow-up messages
+        message_timestamp = input.initial_message_timestamp or workflow.now().isoformat()
+        initial_user_message = ChatMessage(
+            role="user",
+            content=input.prompt,
+            timestamp=message_timestamp,
+            message_id=f"{input.execution_id}_user_1",  # Generate deterministic ID
+        )
+        self._state.messages.append(initial_user_message)
+        self._state.status = "running"
+        self._new_message_count = 1  # Initial message counts as a new message
+        self._processed_message_count = 0  # No messages processed yet (no response)
+        try:
+            # Step 1: Update execution status to running
+            await workflow.execute_activity(
+                update_execution_status,
+                ActivityUpdateExecutionInput(
+                    execution_id=input.execution_id,
+                    status="running",
+                    started_at=workflow.now().isoformat(),
+                    execution_metadata={
+                        "workflow_started": True,
+                        "has_mcp_servers": bool(input.mcp_servers),
+                        "mcp_server_count": len(input.mcp_servers) if input.mcp_servers else 0,
+                        "hitl_enabled": True,
+                    },
+                ),
+                start_to_close_timeout=timedelta(seconds=30),
+            )
+            # Step 2: Update agent status to running
+            await workflow.execute_activity(
+                update_agent_status,
+                ActivityUpdateAgentInput(
+                    agent_id=input.agent_id,
+                    organization_id=input.organization_id,
+                    status="running",
+                    last_active_at=workflow.now().isoformat(),
+                ),
+                start_to_close_timeout=timedelta(seconds=30),
+            )
+            # Deprecate old patch: We moved status update BEFORE persistence (was after)
+            # This ensures deterministic replay when continuing multi-turn conversations
+            workflow.deprecate_patch("status-update-before-persistence")
+            # HITL Conversation Loop - Continue until user marks as done
+            conversation_turn = 0
+            while not self._state.should_complete:
+                conversation_turn += 1
+                workflow.logger.info(
+                    f"Starting conversation turn {conversation_turn}",
+                    extra={"turn": conversation_turn, "message_count": len(self._state.messages)}
+                )
+                # Get the latest user message (last message added)
+                latest_message = self._state.messages[-1] if self._state.messages else None
+                latest_prompt = latest_message.content if latest_message and latest_message.role == "user" else input.prompt
+                # Extract user message metadata for session persistence deduplication
+                user_message_id = latest_message.message_id if latest_message and latest_message.role == "user" else None
+                user_id = latest_message.user_id if latest_message and latest_message.role == "user" else None
+                user_name = latest_message.user_name if latest_message and latest_message.role == "user" else None
+                user_email = latest_message.user_email if latest_message and latest_message.role == "user" else None
+                user_avatar = latest_message.user_avatar if latest_message and latest_message.role == "user" else None
+                # Step 3: Publish user message to stream IMMEDIATELY (for turn 1)
+                # For follow-up turns, the message is published when received via signal
+                # This ensures the initial user message appears in UI before assistant response
+                #
+                # IMPORTANT: Use workflow patching to handle existing workflows that don't have this activity
+                # Existing workflows will skip this during replay; new workflows will execute it
+                if conversation_turn == 1 and workflow.patched("publish-user-message-v1"):
+                    workflow.logger.info(
+                        f"Publishing initial user message to stream",
+                        extra={
+                            "turn": conversation_turn,
+                            "message_id": user_message_id,
+                            "execution_id": str(input.execution_id)[:8] if input.execution_id else "unknown"
+                        }
+                    )
+                    await workflow.execute_activity(
+                        publish_user_message,
+                        PublishUserMessageInput(
+                            execution_id=input.execution_id,
+                            prompt=input.prompt,
+                            timestamp=initial_user_message.timestamp,
+                            message_id=user_message_id,
+                            user_id=input.user_metadata.get("user_id") if input.user_metadata else None,
+                            user_name=input.user_metadata.get("user_name") if input.user_metadata else None,
+                            user_email=input.user_metadata.get("user_email") if input.user_metadata else None,
+                            user_avatar=input.user_metadata.get("user_avatar") if input.user_metadata else None,
+                        ),
+                        start_to_close_timeout=timedelta(seconds=10),
+                    )
+                # Execute using RuntimeFactory (supports both "default" Agno and "claude_code")
+                workflow.logger.info(
+                    f"Executing with runtime: {input.runtime_type}",
+                    extra={
+                        "runtime_type": input.runtime_type,
+                        "turn": conversation_turn,
+                        "user_message_id": user_message_id  # Log for debugging
+                    }
+                )
+                # DEBUG: Log MCP servers in workflow input
+                workflow.logger.info(
+                    f"🔍 DEBUG: Workflow MCP servers",
+                    extra={
+                        "mcp_servers_type": str(type(input.mcp_servers)),
+                        "mcp_servers_count": len(input.mcp_servers) if input.mcp_servers else 0,
+                        "mcp_server_names": list(input.mcp_servers.keys()) if input.mcp_servers else []
+                    }
+                )
+                # Track turn start time for analytics
+                # workflow.time() already returns a float timestamp, not a datetime
+                turn_start_time = workflow.time()
+                llm_result = await workflow.execute_activity(
+                    execute_with_runtime,
+                    ActivityRuntimeExecuteInput(
+                        execution_id=input.execution_id,
+                        agent_id=input.agent_id,
+                        organization_id=input.organization_id,
+                        prompt=latest_prompt,  # Current turn's prompt
+                        runtime_type=input.runtime_type,
+                        system_prompt=input.system_prompt,
+                        model_id=input.model_id,
+                        model_config=input.model_config,
+                        agent_config=input.agent_config,
+                        mcp_servers=input.mcp_servers,
+                        conversation_history=[],  # Agno manages history via session_id
+                        user_metadata=input.user_metadata,
+                        runtime_config={
+                            "session_id": input.execution_id,  # For Agno runtime
+                        },
+                        stream=True,  # Enable streaming for real-time updates
+                        conversation_turn=conversation_turn,  # Pass turn number for analytics
+                        # CRITICAL: Pass user message metadata for consistent deduplication
+                        user_message_id=user_message_id,
+                        user_id=user_id,
+                        user_name=user_name,
+                        user_email=user_email,
+                        user_avatar=user_avatar,
+                    ),
+                    start_to_close_timeout=timedelta(seconds=ACTIVITY_EXECUTION_TIMEOUT_SECONDS),  # Configurable, default 24 hours for long-running streaming
+                    heartbeat_timeout=timedelta(seconds=HEARTBEAT_TIMEOUT_SECONDS),  # Configurable, default 30 min for long-running tasks
+                    retry_policy=RetryPolicy(
+                        maximum_attempts=3,  # Retry automatically 1-3 times
+                        initial_interval=timedelta(seconds=1),
+                        maximum_interval=timedelta(seconds=10),
+                        backoff_coefficient=2.0,
+                        non_retryable_error_types=["ExecutionNotFound"],  # Don't retry if execution deleted
+                    ),
+                )
+                # Submit analytics as separate activity (fire-and-forget)
+                # This runs independently and doesn't block workflow progression
+                workflow.start_activity(
+                    submit_runtime_analytics_activity,
+                    AnalyticsActivityInput(
+                        execution_id=input.execution_id,
+                        turn_number=conversation_turn,
+                        result=llm_result,
+                        turn_start_time=turn_start_time,
+                    ),
+                    start_to_close_timeout=timedelta(seconds=30),
+                    retry_policy=RetryPolicy(
+                        maximum_attempts=3,
+                        initial_interval=timedelta(seconds=2),
+                        maximum_interval=timedelta(seconds=10),
+                        backoff_coefficient=2.0,
+                        non_retryable_error_types=["ValueError", "TypeError"],
+                    ),
+                )
+                # Add tool execution status messages (real-time updates)
+                if llm_result.get("tool_execution_messages"):
+                    async with self._lock:
+                        for tool_msg in llm_result["tool_execution_messages"]:
+                            self._state.messages.append(ChatMessage(
+                                role="system",
+                                content=tool_msg.get("content", ""),
+                                timestamp=tool_msg.get("timestamp", workflow.now().isoformat()),
+                                tool_name=tool_msg.get("tool_name"),
+                            ))
+                # Add tool messages to state (detailed tool info)
+                if llm_result.get("tool_messages"):
+                    async with self._lock:
+                        for tool_msg in llm_result["tool_messages"]:
+                            self._state.messages.append(ChatMessage(
+                                role="tool",
+                                content=tool_msg.get("content", ""),
+                                timestamp=tool_msg.get("timestamp", workflow.now().isoformat()),
+                                tool_name=tool_msg.get("tool_name"),
+                                tool_input=tool_msg.get("tool_input"),
+                            ))
+                # Update state with assistant response
+                if llm_result.get("response"):
+                    async with self._lock:
+                        self._state.messages.append(ChatMessage(
+                            role="assistant",
+                            content=llm_result["response"],
+                            timestamp=workflow.now().isoformat(),
+                        ))
+                        self._state.current_response = llm_result["response"]
+                        self._processed_message_count += 1
+                # Update usage and metadata (accumulate across turns)
+                if llm_result.get("usage"):
+                    # Accumulate token usage across conversation turns
+                    current_usage = self._state.usage
+                    new_usage = llm_result.get("usage", {})
+                    self._state.usage = {
+                        "prompt_tokens": current_usage.get("prompt_tokens", 0) + new_usage.get("prompt_tokens", 0),
+                        "completion_tokens": current_usage.get("completion_tokens", 0) + new_usage.get("completion_tokens", 0),
+                        "total_tokens": current_usage.get("total_tokens", 0) + new_usage.get("total_tokens", 0),
+                    }
+                # Update metadata with latest turn info
+                self._state.metadata.update({
+                    "model": llm_result.get("model"),
+                    "latest_finish_reason": llm_result.get("finish_reason"),
+                    "mcp_tools_used": self._state.metadata.get("mcp_tools_used", 0) + llm_result.get("mcp_tools_used", 0),
+                    "latest_run_id": llm_result.get("run_id"),
+                    "conversation_turns": conversation_turn,
+                })
+                # Extract session_id from runtime result for conversation continuity
+                # This enables multi-turn conversations in Claude Code runtime
+                llm_metadata = llm_result.get("metadata", {})
+                if "claude_code_session_id" in llm_metadata:
+                    # Update input.user_metadata so next turn can resume the session
+                    if not input.user_metadata:
+                        input.user_metadata = {}
+                    session_id_value = llm_metadata["claude_code_session_id"]
+                    input.user_metadata["claude_code_session_id"] = session_id_value
+                    workflow.logger.info(
+                        f"Updated user_metadata with session_id for turn continuity",
+                        extra={
+                            "turn": conversation_turn,
+                            "session_id": session_id_value[:16] if session_id_value else None
+                        }
+                    )
+                # Check if LLM call was cancelled (DURABILITY FIX)
+                finish_reason = llm_result.get("finish_reason")
+                if finish_reason == "cancelled":
+                    # Execution was cancelled/interrupted - handle gracefully
+                    workflow.logger.warning(
+                        f"⚠️ Execution cancelled during turn {conversation_turn}",
+                        extra={
+                            "turn": conversation_turn,
+                            "execution_id": input.execution_id,
+                            "metadata": llm_result.get("metadata", {}),
+                        }
+                    )
+                    # Mark as interrupted (not failed) to indicate this is recoverable
+                    self._state.status = "interrupted"
+                    self._state.error_message = "Execution was interrupted and can be resumed"
+                    # Save any accumulated response before breaking
+                    if llm_result.get("response"):
+                        async with self._lock:
+                            self._state.messages.append(ChatMessage(
+                                role="assistant",
+                                content=llm_result["response"],
+                                timestamp=workflow.now().isoformat(),
+                            ))
+                            self._state.current_response = llm_result["response"]
+                    # Break but allow workflow to complete gracefully
+                    break
+                # Check if LLM call failed
+                if not llm_result.get("success"):
+                    self._state.status = "failed"
+                    # Validate error message is never empty
+                    error_msg = llm_result.get("error") or "Execution failed with unknown error"
+                    if not error_msg.strip():
+                        error_msg = "Execution failed (error details not available)"
+                    self._state.error_message = error_msg
+                    break
+                # Wait for control plane to make intelligent state decision
+                # The control plane AI analyzes the turn and determines the appropriate state
+                workflow.logger.info(
+                    f"⏳ Waiting for control plane state decision for turn {conversation_turn}",
+                    extra={"turn": conversation_turn, "execution_id": str(input.execution_id)[:8] if input.execution_id else "unknown"}
+                )
+                # Give control plane time to make AI decision (up to 6 seconds with retries)
+                max_retries = 3
+                retry_delay = 2  # seconds
+                for retry in range(max_retries):
+                    await asyncio.sleep(retry_delay)
+                    # Query execution state from control plane
+                    try:
+                        current_execution = await workflow.execute_activity(
+                            get_execution_details,
+                            ActivityGetExecutionInput(execution_id=input.execution_id),
+                            start_to_close_timeout=timedelta(seconds=10),
+                        )
+                        control_plane_status = current_execution.get("status", "unknown")
+                        # Check if status has been updated from "running" (indicates AI made a decision)
+                        if control_plane_status != "running":
+                            workflow.logger.info(
+                                f"✅ Control plane decided state: {control_plane_status}",
+                                extra={
+                                    "execution_id": input.execution_id,
+                                    "turn": conversation_turn,
+                                    "decided_status": control_plane_status,
+                                    "retry": retry + 1
+                                }
+                            )
+                            break
+                        else:
+                            if retry < max_retries - 1:
+                                workflow.logger.info(
+                                    f"⏳ Control plane still processing, retry {retry + 1}/{max_retries}",
+                                    extra={"turn": conversation_turn}
+                                )
+                    except Exception as e:
+                        workflow.logger.warning(
+                            f"⚠️ Failed to query execution state: {str(e)}",
+                            extra={"turn": conversation_turn, "retry": retry + 1}
+                        )
+                        if retry == max_retries - 1:
+                            # Final retry failed - default to waiting_for_input (safe fallback)
+                            control_plane_status = "waiting_for_input"
+                            workflow.logger.warning(
+                                "Using safe fallback state: waiting_for_input",
+                                extra={"turn": conversation_turn}
+                            )
+                # Update internal state based on control plane decision
+                self._state.status = control_plane_status
+                self._state.is_waiting_for_input = (control_plane_status == "waiting_for_input")
+                workflow.logger.info(
+                    f"🎯 State transition complete: {control_plane_status}",
+                    extra={
+                        "execution_id": input.execution_id,
+                        "turn": conversation_turn,
+                        "status": control_plane_status
+                    }
+                )
+                # Then persist conversation
+                workflow.logger.info(
+                    f"Persisting conversation after turn {conversation_turn}",
+                    extra={"turn": conversation_turn, "message_count": len(self._state.messages)}
+                )
+                try:
+                    persist_result = await workflow.execute_activity(
+                        persist_conversation_history,
+                        ActivityPersistConversationInput(
+                            execution_id=input.execution_id,
+                            session_id=input.execution_id,
+                            messages=self._messages_to_dict(self._state.messages),
+                            user_id=input.user_metadata.get("user_id") if input.user_metadata else None,
+                            metadata={
+                                "agent_id": input.agent_id,
+                                "organization_id": input.organization_id,
+                                "conversation_turn": conversation_turn,
+                                "total_messages": len(self._state.messages),
+                            },
+                        ),
+                        start_to_close_timeout=timedelta(seconds=30),
+                    )
+                    if persist_result.get("success"):
+                        workflow.logger.info(
+                            f"✅ Conversation persisted for turn {conversation_turn}",
+                            extra={
+                                "turn": conversation_turn,
+                                "message_count": persist_result.get("message_count", len(self._state.messages))
+                            }
+                        )
+                    else:
+                        workflow.logger.warning(
+                            f"⚠️ Persistence returned failure for turn {conversation_turn}",
+                            extra={
+                                "turn": conversation_turn,
+                                "error": persist_result.get("error", "Unknown error")
+                            }
+                        )
+                except Exception as persist_error:
+                    # Log but don't fail the workflow if persistence fails
+                    error_type = type(persist_error).__name__
+                    error_msg = str(persist_error) if str(persist_error) else "No error message"
+                    workflow.logger.error(
+                        f"❌ Failed to persist conversation for turn {conversation_turn}",
+                        extra={
+                            "turn": conversation_turn,
+                            "error_type": error_type,
+                            "error": error_msg[:200],  # Truncate long errors
+                            "message_count": len(self._state.messages),
+                        }
+                    )
+                # Handle different states based on control plane decision
+                if control_plane_status == "completed":
+                    workflow.logger.info(
+                        f"✅ Task completed (AI decision) after turn {conversation_turn}",
+                        extra={"turn": conversation_turn}
+                    )
+                    # Task is complete - exit loop
+                    break
+                elif control_plane_status == "failed":
+                    workflow.logger.info(
+                        f"❌ Task failed (AI decision) after turn {conversation_turn}",
+                        extra={"turn": conversation_turn}
+                    )
+                    # Unrecoverable error - exit loop
+                    break
+                elif control_plane_status == "waiting_for_input":
+                    workflow.logger.info(
+                        f"⏸️ Waiting for user input after turn {conversation_turn}",
+                        extra={"turn": conversation_turn}
+                    )
+                    # Wait for either:
+                    # 1. New message from user (add_message signal)
+                    # 2. User marks as done (mark_as_done signal)
+                    # 3. Timeout (24 hours for long-running conversations)
+                    await workflow.wait_condition(
+                        lambda: self._new_message_count > self._processed_message_count or self._state.should_complete,
+                        timeout=timedelta(hours=24)
+                    )
+                    # Don't update processed count here - it will be updated after we add the assistant's response
+                    if self._state.should_complete:
+                        workflow.logger.info("User marked workflow as done")
+                        break
+                    # Continue loop to process new message
+                    self._state.status = "running"
+                elif control_plane_status == "running":
+                    workflow.logger.info(
+                        f"▶️ Continuing automatically to next turn {conversation_turn + 1}",
+                        extra={"turn": conversation_turn}
+                    )
+                    # Continue automatically to next turn (no user input needed)
+                    # Just loop back to execute_agent_llm
+                    continue
+                elif control_plane_status == "queued":
+                    workflow.logger.info(
+                        f"📥 Message queued, continuing to next turn {conversation_turn + 1}",
+                        extra={"turn": conversation_turn}
+                    )
+                    # "queued" means a new message was received and is waiting to be processed
+                    # Treat same as "running" - continue to next turn automatically
+                    continue
+                else:
+                    # Unknown status - default to waiting_for_input (safe fallback)
+                    workflow.logger.warning(
+                        f"⚠️ Unknown status '{control_plane_status}', defaulting to waiting_for_input",
+                        extra={"turn": conversation_turn, "status": control_plane_status}
+                    )
+                    self._state.status = "waiting_for_input"
+                    self._state.is_waiting_for_input = True
+                    await workflow.wait_condition(
+                        lambda: self._new_message_count > self._processed_message_count or self._state.should_complete,
+                        timeout=timedelta(hours=24)
+                    )
+                    if self._state.should_complete:
+                        workflow.logger.info("User marked workflow as done")
+                        break
+                    self._state.status = "running"
+            # Conversation complete - finalize workflow
+            # DURABILITY FIX: Handle interrupted status separately from failed/completed
+            if self._state.status == "interrupted":
+                final_status = "interrupted"
+                workflow.logger.warning(
+                    f"⚠️ Workflow interrupted (not failed)",
+                    extra={
+                        "execution_id": input.execution_id,
+                        "conversation_turns": conversation_turn,
+                    }
+                )
+            elif self._state.status == "failed":
+                final_status = "failed"
+            else:
+                final_status = "completed"
+            self._state.status = final_status
+            await workflow.execute_activity(
+                update_execution_status,
+                ActivityUpdateExecutionInput(
+                    execution_id=input.execution_id,
+                    status=final_status,
+                    completed_at=workflow.now().isoformat(),
+                    response=self._state.current_response,
+                    error_message=self._state.error_message,
+                    usage=self._state.usage,
+                    execution_metadata={
+                        **self._state.metadata,
+                        "workflow_completed": True,
+                        "total_conversation_turns": conversation_turn,
+                        "was_interrupted": final_status == "interrupted",
+                    },
+                ),
+                start_to_close_timeout=timedelta(seconds=30),
+            )
+            # Update agent final status
+            # DURABILITY FIX: Treat interrupted as a partial success, not a failure
+            agent_final_status = "failed" if final_status == "failed" else "completed"
+            await workflow.execute_activity(
+                update_agent_status,
+                ActivityUpdateAgentInput(
+                    agent_id=input.agent_id,
+                    organization_id=input.organization_id,
+                    status=agent_final_status,
+                    last_active_at=workflow.now().isoformat(),
+                    error_message=self._state.error_message if final_status == "failed" else None,
+                ),
+                start_to_close_timeout=timedelta(seconds=30),
+            )
+            workflow.logger.info(
+                f"Agent execution workflow completed with HITL",
+                extra={
+                    "execution_id": input.execution_id,
+                    "status": final_status,
+                    "conversation_turns": conversation_turn,
+                }
+            )
+            return {
+                "success": final_status == "completed",
+                "execution_id": input.execution_id,
+                "status": final_status,
+                "response": self._state.current_response,
+                "usage": self._state.usage,
+                "conversation_turns": conversation_turn,
+            }
+        except Exception as e:
+            # Update state with error
+            self._state.status = "failed"
+            self._state.error_message = str(e)
+            self._state.metadata["error_type"] = type(e).__name__
+            # Log failure with clear context
+            execution_logger.execution_failed(
+                input.execution_id,
+                error=str(e),
+                error_type=type(e).__name__,
+                recoverable=False
+            )
+            workflow.logger.error(
+                f"Agent execution workflow failed",
+                extra={
+                    "execution_id": input.execution_id,
+                    "error": str(e),
+                }
+            )
+            # Update execution as failed
+            try:
+                await workflow.execute_activity(
+                    update_execution_status,
+                    ActivityUpdateExecutionInput(
+                        execution_id=input.execution_id,
+                        status="failed",
+                        completed_at=workflow.now().isoformat(),
+                        error_message=f"Workflow error: {str(e)}",
+                        execution_metadata={
+                            "workflow_error": True,
+                            "error_type": type(e).__name__,
+                        },
+                    ),
+                    start_to_close_timeout=timedelta(seconds=30),
+                )
+                await workflow.execute_activity(
+                    update_agent_status,
+                    ActivityUpdateAgentInput(
+                        agent_id=input.agent_id,
+                        organization_id=input.organization_id,
+                        status="failed",
+                        last_active_at=workflow.now().isoformat(),
+                        error_message=str(e),
+                    ),
+                    start_to_close_timeout=timedelta(seconds=30),
+                )
+            except Exception as update_error:
+                execution_logger.warning(
+                    input.execution_id,
+                    f"Could not update execution status after workflow error: {str(update_error)}"
+                )
+                workflow.logger.error(
+                    f"Failed to update status after error",
+                    extra={"error": str(update_error)}
+                )
+            raise