PyPI - kubiya-control-plane-api - Versions diffs - 0.9.15__py3-none-any.whl - Mend

kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (479) hide show

control_plane_api/LICENSE +676 -0
control_plane_api/README.md +350 -0
control_plane_api/__init__.py +4 -0
control_plane_api/__version__.py +8 -0
control_plane_api/alembic/README +1 -0
control_plane_api/alembic/env.py +121 -0
control_plane_api/alembic/script.py.mako +28 -0
control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
control_plane_api/alembic.ini +148 -0
control_plane_api/api/index.py +12 -0
control_plane_api/app/__init__.py +11 -0
control_plane_api/app/activities/__init__.py +20 -0
control_plane_api/app/activities/agent_activities.py +384 -0
control_plane_api/app/activities/plan_generation_activities.py +499 -0
control_plane_api/app/activities/team_activities.py +424 -0
control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
control_plane_api/app/config/__init__.py +35 -0
control_plane_api/app/config/api_config.py +469 -0
control_plane_api/app/config/config_loader.py +224 -0
control_plane_api/app/config/model_pricing.py +323 -0
control_plane_api/app/config/storage_config.py +159 -0
control_plane_api/app/config.py +115 -0
control_plane_api/app/controllers/__init__.py +0 -0
control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
control_plane_api/app/database.py +135 -0
control_plane_api/app/exceptions.py +408 -0
control_plane_api/app/lib/__init__.py +11 -0
control_plane_api/app/lib/environment.py +65 -0
control_plane_api/app/lib/event_bus/__init__.py +17 -0
control_plane_api/app/lib/event_bus/base.py +136 -0
control_plane_api/app/lib/event_bus/manager.py +335 -0
control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
control_plane_api/app/lib/job_executor.py +330 -0
control_plane_api/app/lib/kubiya_client.py +293 -0
control_plane_api/app/lib/litellm_pricing.py +166 -0
control_plane_api/app/lib/mcp_validation.py +163 -0
control_plane_api/app/lib/nats/__init__.py +13 -0
control_plane_api/app/lib/nats/credentials_manager.py +288 -0
control_plane_api/app/lib/nats/listener.py +374 -0
control_plane_api/app/lib/planning_prompt_builder.py +153 -0
control_plane_api/app/lib/planning_tools/__init__.py +41 -0
control_plane_api/app/lib/planning_tools/agents.py +409 -0
control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
control_plane_api/app/lib/planning_tools/base.py +119 -0
control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
control_plane_api/app/lib/planning_tools/environments.py +218 -0
control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
control_plane_api/app/lib/planning_tools/models.py +93 -0
control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
control_plane_api/app/lib/planning_tools/resources.py +242 -0
control_plane_api/app/lib/planning_tools/teams.py +334 -0
control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
control_plane_api/app/lib/redis_client.py +803 -0
control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
control_plane_api/app/lib/storage/__init__.py +20 -0
control_plane_api/app/lib/storage/base_provider.py +274 -0
control_plane_api/app/lib/storage/provider_factory.py +157 -0
control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
control_plane_api/app/lib/supabase.py +71 -0
control_plane_api/app/lib/supabase_utils.py +138 -0
control_plane_api/app/lib/task_planning/__init__.py +138 -0
control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
control_plane_api/app/lib/task_planning/agents.py +389 -0
control_plane_api/app/lib/task_planning/cache.py +218 -0
control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
control_plane_api/app/lib/task_planning/helpers.py +293 -0
control_plane_api/app/lib/task_planning/hooks.py +474 -0
control_plane_api/app/lib/task_planning/models.py +503 -0
control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
control_plane_api/app/lib/task_planning/runner.py +656 -0
control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
control_plane_api/app/lib/task_planning/workflow.py +424 -0
control_plane_api/app/lib/templating/__init__.py +88 -0
control_plane_api/app/lib/templating/compiler.py +278 -0
control_plane_api/app/lib/templating/engine.py +178 -0
control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
control_plane_api/app/lib/templating/parsers/base.py +96 -0
control_plane_api/app/lib/templating/parsers/env.py +85 -0
control_plane_api/app/lib/templating/parsers/graph.py +112 -0
control_plane_api/app/lib/templating/parsers/secret.py +87 -0
control_plane_api/app/lib/templating/parsers/simple.py +81 -0
control_plane_api/app/lib/templating/resolver.py +366 -0
control_plane_api/app/lib/templating/types.py +214 -0
control_plane_api/app/lib/templating/validator.py +201 -0
control_plane_api/app/lib/temporal_client.py +232 -0
control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
control_plane_api/app/lib/temporal_credentials_service.py +203 -0
control_plane_api/app/lib/validation/__init__.py +24 -0
control_plane_api/app/lib/validation/runtime_validation.py +388 -0
control_plane_api/app/main.py +531 -0
control_plane_api/app/middleware/__init__.py +10 -0
control_plane_api/app/middleware/auth.py +645 -0
control_plane_api/app/middleware/exception_handler.py +267 -0
control_plane_api/app/middleware/prometheus_middleware.py +173 -0
control_plane_api/app/middleware/rate_limiting.py +384 -0
control_plane_api/app/middleware/request_id.py +202 -0
control_plane_api/app/models/__init__.py +40 -0
control_plane_api/app/models/agent.py +90 -0
control_plane_api/app/models/analytics.py +206 -0
control_plane_api/app/models/associations.py +107 -0
control_plane_api/app/models/auth_user.py +73 -0
control_plane_api/app/models/context.py +161 -0
control_plane_api/app/models/custom_integration.py +99 -0
control_plane_api/app/models/environment.py +64 -0
control_plane_api/app/models/execution.py +125 -0
control_plane_api/app/models/execution_transition.py +50 -0
control_plane_api/app/models/job.py +159 -0
control_plane_api/app/models/llm_model.py +78 -0
control_plane_api/app/models/orchestration.py +66 -0
control_plane_api/app/models/plan_execution.py +102 -0
control_plane_api/app/models/presence.py +49 -0
control_plane_api/app/models/project.py +61 -0
control_plane_api/app/models/project_management.py +85 -0
control_plane_api/app/models/session.py +29 -0
control_plane_api/app/models/skill.py +155 -0
control_plane_api/app/models/system_tables.py +43 -0
control_plane_api/app/models/task_planning.py +372 -0
control_plane_api/app/models/team.py +86 -0
control_plane_api/app/models/trace.py +257 -0
control_plane_api/app/models/user_profile.py +54 -0
control_plane_api/app/models/worker.py +221 -0
control_plane_api/app/models/workflow.py +161 -0
control_plane_api/app/models/workspace.py +50 -0
control_plane_api/app/observability/__init__.py +177 -0
control_plane_api/app/observability/context_logging.py +475 -0
control_plane_api/app/observability/decorators.py +337 -0
control_plane_api/app/observability/local_span_processor.py +702 -0
control_plane_api/app/observability/metrics.py +303 -0
control_plane_api/app/observability/middleware.py +246 -0
control_plane_api/app/observability/optional.py +115 -0
control_plane_api/app/observability/tracing.py +382 -0
control_plane_api/app/policies/README.md +149 -0
control_plane_api/app/policies/approved_users.rego +62 -0
control_plane_api/app/policies/business_hours.rego +51 -0
control_plane_api/app/policies/rate_limiting.rego +100 -0
control_plane_api/app/policies/tool_enforcement/README.md +336 -0
control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
control_plane_api/app/policies/tool_restrictions.rego +86 -0
control_plane_api/app/routers/__init__.py +4 -0
control_plane_api/app/routers/agents.py +382 -0
control_plane_api/app/routers/agents_v2.py +1598 -0
control_plane_api/app/routers/analytics.py +1310 -0
control_plane_api/app/routers/auth.py +59 -0
control_plane_api/app/routers/client_config.py +57 -0
control_plane_api/app/routers/context_graph.py +561 -0
control_plane_api/app/routers/context_manager.py +577 -0
control_plane_api/app/routers/custom_integrations.py +490 -0
control_plane_api/app/routers/enforcer.py +132 -0
control_plane_api/app/routers/environment_context.py +252 -0
control_plane_api/app/routers/environments.py +761 -0
control_plane_api/app/routers/execution_environment.py +847 -0
control_plane_api/app/routers/executions/__init__.py +28 -0
control_plane_api/app/routers/executions/router.py +286 -0
control_plane_api/app/routers/executions/services/__init__.py +22 -0
control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
control_plane_api/app/routers/executions/services/status_service.py +420 -0
control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
control_plane_api/app/routers/executions/services/worker_health.py +514 -0
control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
control_plane_api/app/routers/executions.py +4888 -0
control_plane_api/app/routers/health.py +165 -0
control_plane_api/app/routers/health_v2.py +394 -0
control_plane_api/app/routers/integration_templates.py +496 -0
control_plane_api/app/routers/integrations.py +287 -0
control_plane_api/app/routers/jobs.py +1809 -0
control_plane_api/app/routers/metrics.py +517 -0
control_plane_api/app/routers/models.py +82 -0
control_plane_api/app/routers/models_v2.py +628 -0
control_plane_api/app/routers/plan_executions.py +1481 -0
control_plane_api/app/routers/plan_generation_async.py +304 -0
control_plane_api/app/routers/policies.py +669 -0
control_plane_api/app/routers/presence.py +234 -0
control_plane_api/app/routers/projects.py +987 -0
control_plane_api/app/routers/runners.py +379 -0
control_plane_api/app/routers/runtimes.py +172 -0
control_plane_api/app/routers/secrets.py +171 -0
control_plane_api/app/routers/skills.py +1010 -0
control_plane_api/app/routers/skills_definitions.py +140 -0
control_plane_api/app/routers/storage.py +456 -0
control_plane_api/app/routers/task_planning.py +611 -0
control_plane_api/app/routers/task_queues.py +650 -0
control_plane_api/app/routers/team_context.py +274 -0
control_plane_api/app/routers/teams.py +1747 -0
control_plane_api/app/routers/templates.py +248 -0
control_plane_api/app/routers/traces.py +571 -0
control_plane_api/app/routers/websocket_client.py +479 -0
control_plane_api/app/routers/websocket_executions_status.py +437 -0
control_plane_api/app/routers/websocket_gateway.py +323 -0
control_plane_api/app/routers/websocket_traces.py +576 -0
control_plane_api/app/routers/worker_queues.py +2555 -0
control_plane_api/app/routers/worker_websocket.py +419 -0
control_plane_api/app/routers/workers.py +1004 -0
control_plane_api/app/routers/workflows.py +204 -0
control_plane_api/app/runtimes/__init__.py +6 -0
control_plane_api/app/runtimes/validation.py +344 -0
control_plane_api/app/schemas/__init__.py +1 -0
control_plane_api/app/schemas/job_schemas.py +302 -0
control_plane_api/app/schemas/mcp_schemas.py +311 -0
control_plane_api/app/schemas/template_schemas.py +133 -0
control_plane_api/app/schemas/trace_schemas.py +168 -0
control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
control_plane_api/app/services/__init__.py +1 -0
control_plane_api/app/services/agno_planning_strategy.py +233 -0
control_plane_api/app/services/agno_service.py +838 -0
control_plane_api/app/services/claude_code_planning_service.py +203 -0
control_plane_api/app/services/context_graph_client.py +224 -0
control_plane_api/app/services/custom_integration_service.py +415 -0
control_plane_api/app/services/integration_resolution_service.py +345 -0
control_plane_api/app/services/litellm_service.py +394 -0
control_plane_api/app/services/plan_generator.py +79 -0
control_plane_api/app/services/planning_strategy.py +66 -0
control_plane_api/app/services/planning_strategy_factory.py +118 -0
control_plane_api/app/services/policy_service.py +615 -0
control_plane_api/app/services/state_transition_service.py +755 -0
control_plane_api/app/services/storage_service.py +593 -0
control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
control_plane_api/app/services/trace_retention.py +354 -0
control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
control_plane_api/app/services/workflow_operations_service.py +611 -0
control_plane_api/app/skills/__init__.py +100 -0
control_plane_api/app/skills/base.py +239 -0
control_plane_api/app/skills/builtin/__init__.py +37 -0
control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
control_plane_api/app/skills/builtin/docker/skill.py +104 -0
control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
control_plane_api/app/skills/builtin/python/__init__.py +4 -0
control_plane_api/app/skills/builtin/python/skill.py +92 -0
control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
control_plane_api/app/skills/builtin/shell/skill.py +161 -0
control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
control_plane_api/app/skills/builtin/slack/skill.py +302 -0
control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
control_plane_api/app/skills/business_intelligence.py +189 -0
control_plane_api/app/skills/config.py +63 -0
control_plane_api/app/skills/loaders/__init__.py +14 -0
control_plane_api/app/skills/loaders/base.py +73 -0
control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
control_plane_api/app/skills/registry.py +125 -0
control_plane_api/app/utils/helpers.py +12 -0
control_plane_api/app/utils/workflow_executor.py +354 -0
control_plane_api/app/workflows/__init__.py +11 -0
control_plane_api/app/workflows/agent_execution.py +520 -0
control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
control_plane_api/app/workflows/namespace_provisioning.py +326 -0
control_plane_api/app/workflows/plan_generation.py +254 -0
control_plane_api/app/workflows/team_execution.py +442 -0
control_plane_api/scripts/seed_models.py +240 -0
control_plane_api/scripts/validate_existing_tool_names.py +492 -0
control_plane_api/shared/__init__.py +8 -0
control_plane_api/shared/version.py +17 -0
control_plane_api/test_deduplication.py +274 -0
control_plane_api/test_executor_deduplication_e2e.py +309 -0
control_plane_api/test_job_execution_e2e.py +283 -0
control_plane_api/test_real_integration.py +193 -0
control_plane_api/version.py +38 -0
control_plane_api/worker/__init__.py +0 -0
control_plane_api/worker/activities/__init__.py +0 -0
control_plane_api/worker/activities/agent_activities.py +1585 -0
control_plane_api/worker/activities/approval_activities.py +234 -0
control_plane_api/worker/activities/job_activities.py +199 -0
control_plane_api/worker/activities/runtime_activities.py +1167 -0
control_plane_api/worker/activities/skill_activities.py +282 -0
control_plane_api/worker/activities/team_activities.py +479 -0
control_plane_api/worker/agent_runtime_server.py +370 -0
control_plane_api/worker/binary_manager.py +333 -0
control_plane_api/worker/config/__init__.py +31 -0
control_plane_api/worker/config/worker_config.py +273 -0
control_plane_api/worker/control_plane_client.py +1491 -0
control_plane_api/worker/examples/analytics_integration_example.py +362 -0
control_plane_api/worker/health_monitor.py +159 -0
control_plane_api/worker/metrics.py +237 -0
control_plane_api/worker/models/__init__.py +1 -0
control_plane_api/worker/models/error_events.py +105 -0
control_plane_api/worker/models/inputs.py +89 -0
control_plane_api/worker/runtimes/__init__.py +35 -0
control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
control_plane_api/worker/runtimes/agno/__init__.py +34 -0
control_plane_api/worker/runtimes/agno/config.py +248 -0
control_plane_api/worker/runtimes/agno/hooks.py +385 -0
control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
control_plane_api/worker/runtimes/agno/utils.py +163 -0
control_plane_api/worker/runtimes/base.py +979 -0
control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
control_plane_api/worker/runtimes/claude_code/config.py +829 -0
control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
control_plane_api/worker/runtimes/factory.py +173 -0
control_plane_api/worker/runtimes/model_utils.py +107 -0
control_plane_api/worker/runtimes/validation.py +93 -0
control_plane_api/worker/services/__init__.py +1 -0
control_plane_api/worker/services/agent_communication_tools.py +908 -0
control_plane_api/worker/services/agent_executor.py +485 -0
control_plane_api/worker/services/agent_executor_v2.py +793 -0
control_plane_api/worker/services/analytics_collector.py +457 -0
control_plane_api/worker/services/analytics_service.py +464 -0
control_plane_api/worker/services/approval_tools.py +310 -0
control_plane_api/worker/services/approval_tools_agno.py +207 -0
control_plane_api/worker/services/cancellation_manager.py +177 -0
control_plane_api/worker/services/code_ingestion_tools.py +465 -0
control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
control_plane_api/worker/services/data_visualization.py +834 -0
control_plane_api/worker/services/event_publisher.py +531 -0
control_plane_api/worker/services/jira_tools.py +257 -0
control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
control_plane_api/worker/services/runtime_analytics.py +328 -0
control_plane_api/worker/services/session_service.py +365 -0
control_plane_api/worker/services/skill_context_enhancement.py +181 -0
control_plane_api/worker/services/skill_factory.py +471 -0
control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
control_plane_api/worker/services/team_executor.py +715 -0
control_plane_api/worker/services/team_executor_v2.py +1866 -0
control_plane_api/worker/services/tool_enforcement.py +254 -0
control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
control_plane_api/worker/services/workflow_executor/models.py +142 -0
control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
control_plane_api/worker/skills/__init__.py +12 -0
control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
control_plane_api/worker/skills/loaders/__init__.py +5 -0
control_plane_api/worker/skills/loaders/base.py +23 -0
control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
control_plane_api/worker/skills/registry.py +208 -0
control_plane_api/worker/tests/__init__.py +1 -0
control_plane_api/worker/tests/conftest.py +12 -0
control_plane_api/worker/tests/e2e/__init__.py +0 -0
control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
control_plane_api/worker/tests/integration/__init__.py +0 -0
control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
control_plane_api/worker/tests/unit/__init__.py +0 -0
control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
control_plane_api/worker/utils/__init__.py +1 -0
control_plane_api/worker/utils/chunk_batcher.py +330 -0
control_plane_api/worker/utils/environment.py +65 -0
control_plane_api/worker/utils/error_publisher.py +260 -0
control_plane_api/worker/utils/event_batcher.py +256 -0
control_plane_api/worker/utils/logging_config.py +335 -0
control_plane_api/worker/utils/logging_helper.py +326 -0
control_plane_api/worker/utils/parameter_validator.py +120 -0
control_plane_api/worker/utils/retry_utils.py +60 -0
control_plane_api/worker/utils/streaming_utils.py +665 -0
control_plane_api/worker/utils/tool_validation.py +332 -0
control_plane_api/worker/utils/workspace_manager.py +163 -0
control_plane_api/worker/websocket_client.py +393 -0
control_plane_api/worker/worker.py +1297 -0
control_plane_api/worker/workflows/__init__.py +0 -0
control_plane_api/worker/workflows/agent_execution.py +909 -0
control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
control_plane_api/worker/workflows/team_execution.py +611 -0
kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
scripts/__init__.py +1 -0
scripts/migrations.py +39 -0
scripts/seed_worker_queues.py +128 -0
scripts/setup_agent_runtime.py +142 -0
worker_internal/__init__.py +1 -0
worker_internal/planner/__init__.py +1 -0
worker_internal/planner/activities.py +1499 -0
worker_internal/planner/agent_tools.py +197 -0
worker_internal/planner/event_models.py +148 -0
worker_internal/planner/event_publisher.py +67 -0
worker_internal/planner/models.py +199 -0
worker_internal/planner/retry_logic.py +134 -0
worker_internal/planner/worker.py +300 -0
worker_internal/planner/workflows.py +970 -0

worker_internal/planner/activities.py ADDED Viewed

@@ -0,0 +1,1499 @@
+"""Temporal activities for plan orchestration."""
+import os
+import json
+import httpx
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timezone
+from temporalio import activity
+import structlog
+from worker_internal.planner.models import (
+    CreatePlanExecutionInput,
+    UpdatePlanStateInput,
+    TaskExecutionResult,
+    TaskValidationResult,
+    TaskStatus,
+    PlanTask,
+    TaskRetryContext,
+)
+from worker_internal.planner.event_publisher import publish_plan_event
+from worker_internal.planner.event_models import (
+    PlanStartedEvent,
+    TaskStartedEvent,
+    TaskRunningEvent,
+    TaskWaitingForInputEvent,
+    TaskCompletedEvent,
+    TaskValidationStartedEvent,
+    TaskValidationCompleteEvent,
+    PlanStatusUpdateEvent,
+    TodoListInitializedEvent,
+    TodoItemUpdatedEvent,
+    TodoItem,
+)
+logger = structlog.get_logger()
+def extract_user_from_jwt(jwt_token: Optional[str]) -> Optional[str]:
+    """
+    Extract user email from JWT token.
+    Args:
+        jwt_token: JWT token string
+    Returns:
+        User email if found, None otherwise
+    """
+    if not jwt_token:
+        return None
+    try:
+        import jwt as pyjwt
+        # Decode without verification to extract email
+        decoded = pyjwt.decode(jwt_token, options={"verify_signature": False})
+        return decoded.get("email")
+    except Exception as e:
+        logger.warning(f"failed_to_extract_user_from_jwt: {str(e)}")
+        return None
+def build_langfuse_metadata(
+    plan_execution_id: str,
+    generation_name: str,
+    user_id: Optional[str] = None,
+    organization_id: Optional[str] = None,
+    agent_id: Optional[str] = None,
+    task_id: Optional[int] = None,
+) -> Dict[str, Any]:
+    """
+    Build Langfuse metadata for LLM calls in plan execution.
+    This follows the same pattern as the agent worker to ensure proper
+    observability in Langfuse. All LLM calls within a plan execution
+    will be grouped under the same trace.
+    Args:
+        plan_execution_id: Plan execution ID (used as trace_id and session_id)
+        generation_name: Name for this specific LLM call (e.g., "task-1-completion-analysis")
+        user_id: User email (proxy will format as email-org)
+        organization_id: Organization ID
+        agent_id: Agent ID making the call
+        task_id: Task ID if this call is for a specific task
+    Returns:
+        Context dict for proxy to inject Langfuse metadata
+    """
+    context = {}
+    # CRITICAL: Pass raw user_id and organization_id for proxy to format
+    # Proxy will create trace_user_id = "email-org" to avoid 401 errors
+    if user_id:
+        context["user_id"] = user_id
+    if organization_id:
+        context["organization_id"] = organization_id
+    # CRITICAL: Use plan_execution_id as session_id to group all LLM calls
+    # Proxy will set this as trace_id
+    context["session_id"] = plan_execution_id
+    # Set custom names (proxy will preserve these instead of defaulting to "agent-chat")
+    context["trace_name"] = "plan-execution"
+    context["generation_name"] = generation_name
+    context["name"] = generation_name
+    # Additional context metadata
+    if agent_id:
+        context["agent_id"] = agent_id
+    if task_id is not None:
+        context["task_id"] = task_id
+    return context
+@activity.defn
+async def publish_event_activity(
+    execution_id: str,
+    event_type: str,
+    event_data: Dict[str, Any],
+) -> bool:
+    """Activity to publish events from workflow context."""
+    try:
+        redis_client = get_redis_client()
+        if not redis_client:
+            activity.logger.warning("redis_not_available", execution_id=execution_id[:8])
+            return False
+        message = {
+            "event_type": event_type,
+            "data": event_data,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+        }
+        # Serialize to JSON string
+        message_json = json.dumps(message)
+        list_key = f"plan-execution:{execution_id}:events"
+        channel = f"plan-execution:{execution_id}:stream"
+        await redis_client.lpush(list_key, message_json)
+        await redis_client.ltrim(list_key, 0, 999)
+        await redis_client.expire(list_key, 3600)
+        await redis_client.publish(channel, message_json)
+        activity.logger.debug(
+            "plan_event_published_from_workflow",
+            execution_id=execution_id[:8],
+            event_type=event_type,
+        )
+        return True
+    except Exception as e:
+        activity.logger.error("publish_event_failed", error=str(e), execution_id=execution_id[:8])
+        return False
+def get_redis_client():
+    """Get Redis client for event publishing."""
+    from control_plane_api.app.lib.redis_client import get_redis_client as _get_redis_client
+    return _get_redis_client()
+def get_control_plane_url() -> str:
+    """Get Control Plane API URL from environment."""
+    return os.getenv("CONTROL_PLANE_URL", "http://localhost:8000")
+def get_auth_headers(jwt_token: Optional[str] = None) -> Dict[str, str]:
+    """Get authentication headers for Control Plane API."""
+    headers = {"Content-Type": "application/json"}
+    if jwt_token:
+        headers["Authorization"] = f"Bearer {jwt_token}"
+    return headers
+@activity.defn
+async def create_plan_execution(input: CreatePlanExecutionInput) -> Dict[str, Any]:
+    """
+    Create plan execution record in database.
+    NOTE: The API already creates this record before starting the workflow,
+    so this activity just validates it exists and returns success.
+    """
+    activity.logger.info(
+        "plan_execution_already_created_by_api",
+        extra={
+            "execution_id": input.execution_id[:8],
+            "title": input.title,
+            "total_tasks": input.total_tasks,
+        }
+    )
+    # Publish plan_started event
+    await publish_plan_event(
+        execution_id=input.execution_id,
+        event_type="plan_started",
+        event_data=PlanStartedEvent(
+            execution_id=input.execution_id,
+            title=input.title,
+            total_tasks=input.total_tasks,
+            agent_id=input.agent_id,
+        )
+    )
+    # Record already created by API, just return success
+    return {"success": True, "plan_execution_id": input.execution_id}
+@activity.defn
+async def update_plan_state(input: UpdatePlanStateInput) -> Dict[str, Any]:
+    """
+    Update plan execution state in database via HTTP API.
+    """
+    activity.logger.info(
+        f"updating_plan_state: plan_id={input.plan_execution_id[:8]}, status={input.status}, completed={input.completed_tasks}"
+    )
+    try:
+        control_plane_url = get_control_plane_url()
+        # Build update payload
+        updates = {}
+        if input.status is not None:
+            updates["status"] = input.status.value if hasattr(input.status, 'value') else input.status
+        if input.completed_tasks is not None:
+            updates["completed_tasks"] = input.completed_tasks
+        if input.failed_tasks is not None:
+            updates["failed_tasks"] = input.failed_tasks
+        if input.waiting_tasks is not None:
+            updates["waiting_tasks"] = input.waiting_tasks
+        if input.dag_state is not None:
+            updates["dag_state"] = input.dag_state
+        if input.total_tokens is not None:
+            updates["total_tokens"] = input.total_tokens
+        if input.actual_cost_usd is not None:
+            updates["actual_cost_usd"] = input.actual_cost_usd
+        if not updates:
+            return {"success": True, "message": "No updates to apply"}
+        # Update via API
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.patch(
+                f"{control_plane_url}/api/v1/tasks/plan/{input.plan_execution_id}",
+                json=updates,
+            )
+            if response.status_code not in (200, 201):
+                activity.logger.error(
+                    f"failed_to_update_plan_state: status={response.status_code}, response={response.text[:200]}"
+                )
+                return {"success": False, "error": response.text}
+            activity.logger.info(f"plan_state_updated: updates={list(updates.keys())}")
+            return {"success": True}
+    except Exception as e:
+        activity.logger.error(f"update_plan_state_failed: {str(e)}")
+        return {"success": False, "error": str(e)}
+@activity.defn
+async def execute_task_activity(
+    task: PlanTask,
+    plan_execution_id: str,
+    organization_id: str,
+    dependency_outputs: Optional[Dict[int, str]] = None,
+    jwt_token: Optional[str] = None,
+    model_id: Optional[str] = None,
+    retry_context: Optional[TaskRetryContext] = None,
+    default_worker_queue_id: Optional[str] = None,  # Fallback from workflow input
+) -> TaskExecutionResult:
+    """
+    Execute a task by triggering an agent execution.
+    This spawns a child agent execution and waits for it to complete.
+    Returns the execution result for the orchestrator agent to analyze.
+    Uses agent_id and worker_queue_id from the task object.
+    Falls back to default_worker_queue_id if task doesn't have one.
+    Includes outputs from dependent tasks if provided.
+    If retry_context is provided, enriches the task with failure history.
+    """
+    from worker_internal.planner.retry_logic import enrich_task_with_retry_context
+    if retry_context:
+        task = enrich_task_with_retry_context(task, retry_context)
+    # Use agent_id and worker_queue_id from task, with fallback to workflow-level default
+    agent_id = task.agent_id
+    worker_queue_id = task.worker_queue_id or default_worker_queue_id
+    if not agent_id:
+        raise ValueError(f"Task {task.id} missing agent_id")
+    if not worker_queue_id:
+        raise ValueError(f"Task {task.id} missing worker_queue_id (and no default_worker_queue_id provided)")
+    activity.logger.info(
+        "executing_task",
+        extra={
+            "task_id": task.id,
+            "task_title": task.title,
+            "plan_execution_id": plan_execution_id[:8],
+            "has_jwt_token": bool(jwt_token),
+            "jwt_token_length": len(jwt_token) if jwt_token else 0,
+            "worker_queue_id": worker_queue_id,
+            "agent_id": agent_id,
+            "dependencies": task.dependencies,
+            "has_dependency_outputs": bool(dependency_outputs),
+            "is_retry": bool(retry_context),
+            "retry_attempt": retry_context.current_attempt if retry_context else 0,
+        }
+    )
+    started_at = datetime.now(timezone.utc)
+    try:
+        # Build dependency context if this task depends on others
+        dependency_context = ""
+        if task.dependencies and dependency_outputs:
+            dependency_context = "\n## Outputs from Previous Tasks\n"
+            for dep_task_id in task.dependencies:
+                if dep_task_id in dependency_outputs:
+                    output = dependency_outputs[dep_task_id]
+                    dependency_context += f"\n### Task {dep_task_id} Output:\n```\n{output}\n```\n"
+                else:
+                    dependency_context += f"\n### Task {dep_task_id}: Output not available\n"
+            dependency_context += "\n"
+        # Build enriched prompt for the task
+        enriched_prompt = f"""# Task: {task.title}
+## Description
+{task.description}
+## Detailed Instructions
+{task.details}
+{dependency_context}
+## Test Strategy
+{task.test_strategy or 'Complete the task as described and verify the output.'}
+## Priority
+{task.priority}
+## Available Skills
+{', '.join(task.skills_to_use) if task.skills_to_use else 'Use any available skills as needed'}
+Please complete this task following the instructions above. Be thorough and verify your work.
+"""
+        # Trigger agent execution via Control Plane API
+        control_plane_url = get_control_plane_url()
+        async with httpx.AsyncClient(timeout=600.0) as client:  # 10 min timeout for task execution
+            response = await client.post(
+                f"{control_plane_url}/api/v1/agents/{agent_id}/execute",
+                json={
+                    "prompt": enriched_prompt,
+                    "worker_queue_id": worker_queue_id,  # Use worker_queue_id from plan request
+                    # Don't pass execution_id - let API generate it
+                    "user_metadata": {
+                        "plan_execution_id": plan_execution_id,
+                        "task_id": task.id,
+                        "task_title": task.title,
+                        "skills_filter": task.skills_to_use,
+                        "env_vars_filter": task.env_vars_to_use,
+                        "secrets_filter": task.secrets_to_use,
+                        "session_id": plan_execution_id,  # For agent worker to use
+                    },
+                    "runtime_config": {
+                        "session_id": plan_execution_id,  # CRITICAL: Use plan_execution_id to group agent LLM calls under plan trace
+                    }
+                },
+                headers=get_auth_headers(jwt_token),
+            )
+            if response.status_code not in (200, 201, 202):
+                activity.logger.error(
+                    f"agent_execution_api_failed: status={response.status_code}, response={response.text[:500]}"
+                )
+                raise Exception(f"Failed to execute task: {response.text}")
+            result = response.json()
+            # Use execution_id from API response
+            execution_id = result.get("execution_id")
+            activity.logger.info(
+                f"agent_execution_started: execution_id={execution_id}, workflow_id={result.get('workflow_id')}"
+            )
+            # Publish task_started event (now we have task_execution_id)
+            await publish_plan_event(
+                execution_id=plan_execution_id,
+                event_type="task_started",
+                event_data=TaskStartedEvent(
+                    execution_id=plan_execution_id,
+                    task_id=task.id,
+                    title=task.title,
+                    description=task.description,
+                    agent_id=agent_id,
+                    task_execution_id=execution_id,  # Agent execution ID
+                    dependencies=task.dependencies or [],
+                )
+            )
+            # Publish TODO update: pending -> running
+            await publish_plan_event(
+                execution_id=plan_execution_id,
+                event_type="todo_item_updated",
+                event_data=TodoItemUpdatedEvent(
+                    execution_id=plan_execution_id,
+                    task_id=task.id,
+                    title=task.title,
+                    old_status="pending",
+                    new_status="running",
+                    message=f"Started executing: {task.title}",
+                )
+            )
+            # Stream execution events instead of polling
+            import asyncio
+            activity.logger.info(f"streaming_task_execution: execution_id={execution_id}, task_id={task.id}")
+            final_status = None
+            final_output = ""
+            final_tokens = 0
+            final_cost = 0.0
+            final_error = None
+            all_events = []  # Store all stream events
+            # Stream events from execution
+            async with client.stream(
+                "GET",
+                f"{control_plane_url}/api/v1/executions/{execution_id}/stream",
+                headers=get_auth_headers(jwt_token),
+                timeout=600.0,  # 10 min timeout
+            ) as stream_response:
+                if stream_response.status_code not in (200, 201):
+                    raise Exception(f"Failed to stream execution: {stream_response.status_code}")
+                current_event = None
+                async for line in stream_response.aiter_lines():
+                    if not line:
+                        continue
+                    # Parse SSE format: "event: type\ndata: json"
+                    if line.startswith("event: "):
+                        current_event = line[7:]  # Get event type
+                        continue
+                    if line.startswith("data: "):
+                        try:
+                            # Parse SSE data
+                            data = json.loads(line[6:])  # Remove "data: " prefix
+                            status = data.get("status")
+                            # Store event
+                            all_events.append({
+                                "event": current_event,
+                                "data": data,
+                                "timestamp": data.get("timestamp", datetime.now(timezone.utc).isoformat())
+                            })
+                            # Log event
+                            activity.logger.info(
+                                f"stream_event: event={current_event}, "
+                                f"status={status}, task_id={task.id}"
+                            )
+                            # Track status events for completion
+                            if current_event == "status" and status:
+                                if status in ("waiting_for_input", "completed", "success", "failed", "error"):
+                                    final_status = status
+                                    activity.logger.info(f"✅ Task complete! status={final_status}, task_id={task.id}")
+                                    break  # Done!
+                            # Track message content for summary output (only assistant messages)
+                            if current_event in ("message", "message_chunk"):
+                                # Only capture assistant messages, not user prompts
+                                msg_data = data.get("data", {})
+                                role = msg_data.get("role", data.get("role"))
+                                content = msg_data.get("content", data.get("content", ""))
+                                if role == "assistant" and content and content != "(no content)":
+                                    final_output += content
+                        except json.JSONDecodeError:
+                            continue  # Skip malformed events
+            # Return result based on stream
+            completed_at = datetime.now(timezone.utc)
+            # Determine task status based on final_status
+            if final_status in ("completed", "success"):
+                # Task completed successfully
+                task_status = TaskStatus.SUCCESS
+                needs_continuation = False
+                user_question = None
+            elif final_status == "waiting_for_input":
+                # Agent is waiting for user response - use LLM to analyze if task is complete
+                activity.logger.info(
+                    f"analyzing_waiting_for_input_status: task_id={task.id}, analyzing if task is complete or needs user input"
+                )
+                analysis = await analyze_task_completion_status(
+                    task,
+                    final_output,
+                    all_events,
+                    plan_execution_id=plan_execution_id,
+                    organization_id=organization_id,
+                    user_id=None,
+                    jwt_token=jwt_token,
+                )
+                if analysis.get("task_complete", False):
+                    # Task is actually complete despite waiting_for_input status
+                    activity.logger.info(
+                        f"task_complete_despite_waiting: task_id={task.id}, "
+                        f"reasoning={analysis.get('reasoning')}"
+                    )
+                    task_status = TaskStatus.SUCCESS
+                    needs_continuation = False
+                    user_question = None
+                else:
+                    # Task genuinely needs user input to continue
+                    activity.logger.info(
+                        f"task_needs_user_input: task_id={task.id}, "
+                        f"user_question={analysis.get('user_question')}"
+                    )
+                    task_status = TaskStatus.WAITING_FOR_INPUT
+                    needs_continuation = True
+                    user_question = analysis.get("user_question")
+            else:
+                # Task failed or errored
+                task_status = TaskStatus.FAILED
+                needs_continuation = False
+                user_question = None
+            # Publish appropriate event based on status
+            if task_status == TaskStatus.WAITING_FOR_INPUT:
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="task_waiting_for_input",
+                    event_data=TaskWaitingForInputEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        question=user_question or "Waiting for user input",
+                        task_execution_id=execution_id,
+                    )
+                )
+                # Publish TODO update: running -> waiting_for_input
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="todo_item_updated",
+                    event_data=TodoItemUpdatedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        old_status="running",
+                        new_status="waiting_for_input",
+                        message=user_question or "Waiting for user input",
+                    )
+                )
+            else:
+                # Task completed (success or failed)
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="task_completed",
+                    event_data=TaskCompletedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        status="success" if task_status == TaskStatus.SUCCESS else "failed",
+                        output=final_output[:500] if final_output else "",  # Truncate for event
+                        error=final_error,
+                        tokens=final_tokens,
+                        cost=final_cost,
+                    )
+                )
+                # Publish TODO update: running -> completed/failed
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="todo_item_updated",
+                    event_data=TodoItemUpdatedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        old_status="running",
+                        new_status="completed" if task_status == TaskStatus.SUCCESS else "failed",
+                        message=f"Task {'completed successfully' if task_status == TaskStatus.SUCCESS else 'failed'}",
+                    )
+                )
+            return TaskExecutionResult(
+                task_id=task.id,
+                status=task_status,
+                execution_id=execution_id,
+                output=final_output,
+                events=all_events,  # Include all stream events
+                tokens=final_tokens,
+                cost=final_cost,
+                started_at=started_at,
+                completed_at=completed_at,
+                error=final_error,
+                needs_continuation=needs_continuation,
+                user_question=user_question,
+            )
+    except Exception as e:
+        activity.logger.error(
+            "execute_task_failed",
+            extra={
+                "task_id": task.id,
+                "error": str(e),
+            }
+        )
+        return TaskExecutionResult(
+            task_id=task.id,
+            status=TaskStatus.FAILED,
+            execution_id=f"{plan_execution_id}-task-{task.id}",
+            output="",
+            events=[],  # No events on error
+            tokens=0,
+            cost=0.0,
+            started_at=started_at,
+            completed_at=datetime.now(timezone.utc),
+            error=str(e),
+        )
+@activity.defn
+async def analyze_task_completion_status(
+    task: PlanTask,
+    agent_output: str,
+    events: List[Dict[str, Any]] = None,
+    plan_execution_id: Optional[str] = None,
+    organization_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+    jwt_token: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Analyze if a task is complete or needs user input.
+    When an agent execution reaches 'waiting_for_input' status, we need to determine:
+    - Is the task actually complete? (agent finished the work)
+    - Or does the task need user input to continue? (agent is asking a question)
+    This uses LLM analysis to make an intelligent decision.
+    """
+    # Extract user_id from JWT if not provided
+    if not user_id and jwt_token:
+        user_id = extract_user_from_jwt(jwt_token)
+    # Extract full conversation from events (all user/assistant messages + tool executions)
+    conversation_summary = ""
+    if events:
+        # Build conversation from message and tool events
+        messages_by_id = {}  # message_id -> accumulated content
+        conversation_order = []  # (message_id, role, timestamp)
+        tool_executions = []  # Track tool executions
+        for event in events:
+            event_type = event.get("event")
+            # Track tool executions
+            if event_type == "tool_completed":
+                tool_data = event.get("data", {}).get("data", {})
+                tool_name = tool_data.get("tool_name", "")
+                tool_output = tool_data.get("tool_output", "")
+                if tool_name and tool_output:
+                    # Extract stdout if it's in dict format
+                    if isinstance(tool_output, str) and "stdout" in tool_output:
+                        try:
+                            import ast
+                            tool_dict = ast.literal_eval(tool_output)
+                            if isinstance(tool_dict, dict):
+                                tool_output = tool_dict.get("tool_response", {}).get("stdout", tool_output)
+                        except:
+                            pass
+                    tool_executions.append(f"TOOL({tool_name}): {tool_output}")
+            # Track messages
+            if event_type in ("message", "message_chunk"):
+                data = event.get("data", {})
+                if isinstance(data, dict):
+                    if event_type == "message_chunk" and "data" in data:
+                        msg_data = data.get("data", {})
+                    else:
+                        msg_data = data
+                    role = msg_data.get("role")
+                    content = msg_data.get("content", "")
+                    message_id = msg_data.get("message_id", "")
+                    timestamp = event.get("timestamp", "")
+                    # Skip tool messages and empty/no-content
+                    if role in ("user", "assistant") and content and content != "(no content)":
+                        if message_id not in messages_by_id:
+                            messages_by_id[message_id] = ""
+                            conversation_order.append((message_id, role, timestamp))
+                        # Accumulate chunks for this message
+                        messages_by_id[message_id] += content
+        # Build conversation in order, including tool executions
+        conversation_turns = []
+        for message_id, role, timestamp in conversation_order:
+            content = messages_by_id[message_id].strip()
+            if content:
+                truncated_content = content if len(content) <= 500 else content[:500] + "..."
+                conversation_turns.append(f"{role.upper()}: {truncated_content}")
+        # Add tool executions to conversation
+        if tool_executions:
+            conversation_turns.extend(tool_executions)
+        if conversation_turns:
+            conversation_summary = "\n\n".join(conversation_turns)
+            activity.logger.info(
+                f"extracted_full_conversation_from_events",
+                extra={
+                    "task_id": task.id,
+                    "total_events": len(events),
+                    "conversation_turns": len(conversation_turns),
+                    "conversation_preview": conversation_summary[:400],
+                }
+            )
+        else:
+            # No conversation in events, use accumulated output
+            conversation_summary = agent_output
+            activity.logger.info(
+                f"no_conversation_in_events_using_accumulated_output",
+                extra={
+                    "task_id": task.id,
+                    "output_length": len(agent_output),
+                }
+            )
+    # Use conversation summary for analysis
+    analysis_text = conversation_summary
+    activity.logger.info(
+        "analyzing_task_completion_status",
+        extra={
+            "task_id": task.id,
+            "task_title": task.title,
+            "analysis_text_length": len(analysis_text),
+            "analysis_text_preview": analysis_text[:300],
+            "using_conversation_summary": bool(conversation_summary),
+        }
+    )
+    try:
+        # Build analysis prompt
+        analysis_prompt = f"""Analyze this task execution to determine if the task is complete or if it needs user input to continue.
+Task Requirement:
+Title: {task.title}
+Description: {task.description}
+Details: {task.details}
+Test Strategy: {task.test_strategy or 'Complete the task as described'}
+Full Conversation for this Task:
+{analysis_text[:10000] if analysis_text else 'No output available'}
+Question: Looking at the FULL conversation above, did the agent complete the task requirement, or does it still need more user input?
+Analyze the complete conversation flow:
+1. What did the task require? (from Description and Details)
+2. What has happened in the conversation so far?
+3. Has the agent fulfilled the task requirement?
+4. Is the LATEST agent message asking for NEW information, or just confirming completion?
+Decision Rules:
+- **CRITICAL: If the agent explicitly says "completed", "done", "finished" → task_complete=true**
+- If the task said "ask user for X, then do Y" AND the conversation shows user provided X AND agent did Y → task_complete=true
+- If the task said "ask user" AND agent asked AND user hasn't responded yet → needs_user_input=true
+- If agent provided a result/answer that satisfies the task → task_complete=true
+- If agent's latest message is asking for the FIRST TIME for input → needs_user_input=true
+- If agent already got input and produced a result, even if asking again → task_complete=true (use the result before the repeat)
+- **If agent's LAST message confirms completion (not asking a question) → task_complete=true**
+Examples:
+- Task: "Ask for number, calculate" | Conv: "ASSISTANT: What number? USER: 5 ASSISTANT: Result is 10" → task_complete=true (result: 10)
+- Task: "Ask for input" | Conv: "ASSISTANT: What input?" → needs_user_input=true
+- Task: "Generate random number" | Conv: "ASSISTANT: Generated 7" → task_complete=true
+Respond with ONLY a JSON object (no markdown, no explanation):
+{{
+    "task_complete": true | false,
+    "reasoning": "brief explanation of your determination",
+    "confidence": 0.95,
+    "needs_user_input": true | false,
+    "user_question": "what the agent is asking for (if needs_user_input=true, otherwise null)"
+}}
+Guidelines:
+- task_complete=true: The task requirement was satisfied, agent produced a result
+- task_complete=false: The task is not complete yet
+- needs_user_input=true: The agent is explicitly asking for user input/clarification
+- needs_user_input=false: The task is complete or failed, no user input needed
+"""
+        # Use LiteLLM directly with metadata in request body
+        litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
+        litellm_api_key = os.getenv("LITELLM_API_KEY")
+        model = "kubiya/claude-sonnet-4"
+        # Build Langfuse metadata
+        metadata_context = build_langfuse_metadata(
+            plan_execution_id=plan_execution_id or "unknown",
+            generation_name=f"task-{task.id}-completion-analysis",
+            user_id=user_id,
+            organization_id=organization_id,
+            agent_id=task.agent_id,
+            task_id=task.id,
+        )
+        # Format user for LiteLLM (format: email-org)
+        user_field = None
+        if user_id and organization_id:
+            user_field = f"{user_id}-{organization_id}"
+        activity.logger.info(
+            "calling_llm_for_task_completion_analysis",
+            extra={
+                "task_id": task.id,
+                "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
+                "generation_name": metadata_context.get("generation_name"),
+                "session_id": metadata_context.get("session_id"),
+            }
+        )
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            request_body = {
+                "model": model,
+                "messages": [
+                    {"role": "user", "content": analysis_prompt}
+                ],
+                "temperature": 0.0,
+                "max_tokens": 500,
+            }
+            # DON'T add user field - Anthropic rejects emails!
+            # LiteLLM will extract trace_user_id from metadata for Langfuse
+            # Add metadata (LiteLLM extracts Langfuse fields from here)
+            # CRITICAL: Don't include user_id in metadata - Anthropic rejects emails!
+            # Only use trace_user_id which LiteLLM extracts for Langfuse
+            request_body["metadata"] = {
+                "trace_name": metadata_context.get("trace_name"),
+                "generation_name": metadata_context.get("generation_name"),
+                "trace_id": metadata_context.get("session_id"),
+                "session_id": metadata_context.get("session_id"),
+                "trace_user_id": user_field,  # For Langfuse only
+                "organization_id": organization_id,
+                "agent_id": metadata_context.get("agent_id"),
+                "task_id": metadata_context.get("task_id"),
+            }
+            response = await client.post(
+                f"{litellm_api_base}/v1/chat/completions",
+                json=request_body,
+                headers={
+                    "Authorization": f"Bearer {litellm_api_key}",
+                    "Content-Type": "application/json",
+                }
+            )
+            if response.status_code != 200:
+                raise Exception(f"LLM analysis failed: {response.status_code} - {response.text}")
+            result = response.json()
+            content = result['choices'][0]['message']['content']
+            # Parse JSON response
+            content = content.strip()
+            if content.startswith('```'):
+                content = content.split('```')[1]
+                if content.startswith('json'):
+                    content = content[4:]
+            content = content.strip()
+            analysis_data = json.loads(content)
+            activity.logger.info(
+                "task_completion_analysis_complete",
+                extra={
+                    "task_id": task.id,
+                    "task_complete": analysis_data.get("task_complete"),
+                    "needs_user_input": analysis_data.get("needs_user_input"),
+                    "confidence": analysis_data.get("confidence"),
+                    "reasoning": analysis_data.get("reasoning"),
+                    "analyzed_text_preview": analysis_text[:200],
+                }
+            )
+            return analysis_data
+    except Exception as e:
+        activity.logger.error(
+            "task_completion_analysis_failed",
+            extra={
+                "task_id": task.id,
+                "error": str(e),
+                "litellm_api_base": os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai"),
+                "has_api_key": bool(os.getenv("LITELLM_API_KEY")),
+            }
+        )
+        # Re-raise the exception so we can see what's wrong
+        raise Exception(f"Failed to analyze task completion for task {task.id}: {str(e)}") from e
+@activity.defn
+async def validate_task_completion(
+    task: PlanTask,
+    execution_result: TaskExecutionResult,
+    plan_execution_id: Optional[str] = None,
+    organization_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+    jwt_token: Optional[str] = None,
+) -> TaskValidationResult:
+    """
+    Validate task completion using LLM analysis.
+    Analyzes the task output to determine if it actually completed successfully.
+    """
+    # Extract user_id from JWT if not provided
+    if not user_id and jwt_token:
+        user_id = extract_user_from_jwt(jwt_token)
+    activity.logger.info(
+        "validating_task",
+        extra={
+            "task_id": task.id,
+            "task_title": task.title,
+        }
+    )
+    try:
+        # Build validation prompt
+        validation_prompt = f"""Analyze this task execution and determine if it completed successfully.
+Task: {task.title}
+Description: {task.description}
+Test Strategy: {task.test_strategy or 'Task should be completed as described'}
+Task Output:
+{execution_result.output[:2000] if execution_result.output else 'No output available'}
+Execution Status: {execution_result.status}
+{f"Error: {execution_result.error}" if execution_result.error else ""}
+Respond with ONLY a JSON object (no markdown, no explanation):
+{{
+    "status": "success" | "failed" | "pending",
+    "reason": "brief explanation of why you determined this status",
+    "confidence": 0.95,
+    "suggestions": "optional suggestions for improvement or next steps"
+}}
+Guidelines:
+- "success": Task completed and output matches test strategy
+- "failed": Task failed, errored, or output doesn't match requirements
+- "pending": Task seems incomplete or needs clarification
+"""
+        # Use LiteLLM directly with metadata in request body
+        litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
+        litellm_api_key = os.getenv("LITELLM_API_KEY")
+        model = "kubiya/claude-sonnet-4"
+        # Build Langfuse metadata
+        metadata_context = build_langfuse_metadata(
+            plan_execution_id=plan_execution_id or "unknown",
+            generation_name=f"task-{task.id}-validation",
+            user_id=user_id,
+            organization_id=organization_id,
+            agent_id=task.agent_id,
+            task_id=task.id,
+        )
+        # Format user for LiteLLM (format: email-org)
+        user_field = None
+        if user_id and organization_id:
+            user_field = f"{user_id}-{organization_id}"
+        activity.logger.info(
+            "calling_llm_for_task_validation",
+            extra={
+                "task_id": task.id,
+                "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
+                "generation_name": metadata_context.get("generation_name"),
+                "session_id": metadata_context.get("session_id"),
+            }
+        )
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            request_body = {
+                "model": model,
+                "messages": [
+                    {"role": "user", "content": validation_prompt}
+                ],
+                "temperature": 0.0,
+                "max_tokens": 500,
+            }
+            # DON'T add user field - Anthropic rejects emails!
+            # LiteLLM will extract trace_user_id from metadata for Langfuse
+            # Add metadata (LiteLLM extracts Langfuse fields from here)
+            # CRITICAL: Don't include user_id in metadata - Anthropic rejects emails!
+            # Only use trace_user_id which LiteLLM extracts for Langfuse
+            request_body["metadata"] = {
+                "trace_name": metadata_context.get("trace_name"),
+                "generation_name": metadata_context.get("generation_name"),
+                "trace_id": metadata_context.get("session_id"),
+                "session_id": metadata_context.get("session_id"),
+                "trace_user_id": user_field,  # For Langfuse only
+                "organization_id": organization_id,
+                "agent_id": metadata_context.get("agent_id"),
+                "task_id": metadata_context.get("task_id"),
+            }
+            response = await client.post(
+                f"{litellm_api_base}/v1/chat/completions",
+                json=request_body,
+                headers={
+                    "Authorization": f"Bearer {litellm_api_key}",
+                    "Content-Type": "application/json",
+                }
+            )
+            if response.status_code != 200:
+                raise Exception(f"LLM validation failed: {response.status_code} - {response.text}")
+            result = response.json()
+            content = result['choices'][0]['message']['content']
+            # Parse JSON response
+            content = content.strip()
+            if content.startswith('```'):
+                content = content.split('```')[1]
+                if content.startswith('json'):
+                    content = content[4:]
+            content = content.strip()
+            validation_data = json.loads(content)
+            # Map status string to TaskStatus enum
+            status_map = {
+                "success": TaskStatus.SUCCESS,
+                "failed": TaskStatus.FAILED,
+                "pending": TaskStatus.PENDING,
+            }
+            return TaskValidationResult(
+                task_id=task.id,
+                status=status_map.get(validation_data.get("status", "failed"), TaskStatus.FAILED),
+                reason=validation_data.get("reason", "Validation completed"),
+                confidence=validation_data.get("confidence", 0.5),
+                suggestions=validation_data.get("suggestions"),
+            )
+    except Exception as e:
+        activity.logger.error(
+            "task_validation_failed",
+            extra={
+                "task_id": task.id,
+                "error": str(e),
+            }
+        )
+        # Default to success if validation fails
+        return TaskValidationResult(
+            task_id=task.id,
+            status=TaskStatus.SUCCESS,
+            reason=f"Validation failed, assuming success: {str(e)}",
+            confidence=0.5,
+        )
+@activity.defn
+async def continue_task_activity(
+    task: PlanTask,
+    execution_id: str,
+    user_message: str,
+    plan_execution_id: str,
+    jwt_token: Optional[str] = None,
+    model_id: Optional[str] = None,
+    organization_id: Optional[str] = None,
+) -> TaskExecutionResult:
+    """
+    Continue a task execution after user provides input.
+    This sends the user's message to the existing agent execution,
+    then continues streaming events until the task completes or needs more input.
+    """
+    activity.logger.info(
+        "continuing_task_execution",
+        extra={
+            "task_id": task.id,
+            "execution_id": execution_id,
+            "plan_execution_id": plan_execution_id[:8],
+            "message_preview": user_message[:100],
+        }
+    )
+    started_at = datetime.now(timezone.utc)
+    try:
+        control_plane_url = get_control_plane_url()
+        async with httpx.AsyncClient(timeout=600.0) as client:
+            # Step 1: Send user message to continue conversation (only if message provided)
+            if user_message:
+                message_response = await client.post(
+                    f"{control_plane_url}/api/v1/executions/{execution_id}/message",
+                    json={"message": user_message},
+                    headers=get_auth_headers(jwt_token),
+                )
+                if message_response.status_code not in (200, 201, 202):
+                    raise Exception(f"Failed to send message: {message_response.text}")
+                activity.logger.info(
+                    f"user_message_sent_to_execution: execution_id={execution_id}"
+                )
+            else:
+                activity.logger.info(
+                    f"skipping_message_send_already_sent_by_api: execution_id={execution_id}"
+                )
+            # Step 2: Continue streaming from the execution
+            final_status = None
+            final_output = ""
+            final_tokens = 0
+            final_cost = 0.0
+            final_error = None
+            all_events = []
+            seen_events_after_message = False  # Track if we've seen NEW events after sending message
+            async with client.stream(
+                "GET",
+                f"{control_plane_url}/api/v1/executions/{execution_id}/stream",
+                headers=get_auth_headers(jwt_token),
+                timeout=600.0,
+            ) as stream_response:
+                if stream_response.status_code not in (200, 201):
+                    raise Exception(f"Failed to stream execution: {stream_response.status_code}")
+                current_event = None
+                async for line in stream_response.aiter_lines():
+                    if not line:
+                        continue
+                    # Parse SSE format
+                    if line.startswith("event: "):
+                        current_event = line[7:]
+                        continue
+                    if line.startswith("data: "):
+                        try:
+                            data = json.loads(line[6:])
+                            status = data.get("status")
+                            # Check if this is a NEW event (after our message was sent)
+                            event_timestamp = data.get("timestamp", "")
+                            if event_timestamp and event_timestamp > started_at.isoformat():
+                                seen_events_after_message = True
+                            all_events.append({
+                                "event": current_event,
+                                "data": data,
+                                "timestamp": data.get("timestamp", datetime.now(timezone.utc).isoformat())
+                            })
+                            activity.logger.info(
+                                f"stream_event: event={current_event}, status={status}, task_id={task.id}, new={seen_events_after_message}"
+                            )
+                            # Check for completion (but ignore old waiting_for_input status)
+                            if current_event == "status" and status:
+                                # During continuation, ignore waiting_for_input unless we've seen new events
+                                # This prevents breaking on old cached status
+                                if status in ("completed", "success", "failed", "error"):
+                                    final_status = status
+                                    activity.logger.info(
+                                        f"task_continuation_complete: status={final_status}, task_id={task.id}"
+                                    )
+                                    break
+                                elif status == "waiting_for_input" and seen_events_after_message:
+                                    # Agent needs MORE input after our message
+                                    final_status = status
+                                    activity.logger.info(
+                                        f"task_needs_more_input: status={final_status}, task_id={task.id}"
+                                    )
+                                    break
+                            # Track assistant messages
+                            if current_event in ("message", "message_chunk"):
+                                msg_data = data.get("data", {})
+                                role = msg_data.get("role", data.get("role"))
+                                content = msg_data.get("content", data.get("content", ""))
+                                if role == "assistant" and content and content != "(no content)":
+                                    final_output += content
+                        except json.JSONDecodeError:
+                            continue
+            # Analyze completion status
+            completed_at = datetime.now(timezone.utc)
+            if final_status in ("completed", "success"):
+                task_status = TaskStatus.SUCCESS
+                needs_continuation = False
+                user_question = None
+            elif final_status == "waiting_for_input":
+                # Use LLM analysis again
+                activity.logger.info(
+                    f"re_analyzing_after_user_input: task_id={task.id}, analyzing continuation result"
+                )
+                analysis = await analyze_task_completion_status(
+                    task,
+                    final_output,
+                    all_events,
+                    plan_execution_id=plan_execution_id,
+                    organization_id=organization_id,
+                    user_id=None,
+                    jwt_token=jwt_token,
+                )
+                if analysis.get("task_complete", False):
+                    task_status = TaskStatus.SUCCESS
+                    needs_continuation = False
+                    user_question = None
+                    activity.logger.info(
+                        f"task_complete_after_user_input: task_id={task.id}"
+                    )
+                else:
+                    # Task still needs more input
+                    task_status = TaskStatus.WAITING_FOR_INPUT
+                    needs_continuation = True
+                    user_question = analysis.get("user_question")
+                    activity.logger.info(
+                        f"task_still_needs_input: task_id={task.id}, question={user_question}"
+                    )
+            else:
+                task_status = TaskStatus.FAILED
+                needs_continuation = False
+                user_question = None
+            # Publish completion events (same as execute_task_activity)
+            if task_status == TaskStatus.WAITING_FOR_INPUT:
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="task_waiting_for_input",
+                    event_data=TaskWaitingForInputEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        question=user_question or "Waiting for user input",
+                        task_execution_id=execution_id,
+                    )
+                )
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="todo_item_updated",
+                    event_data=TodoItemUpdatedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        old_status="running",
+                        new_status="waiting_for_input",
+                        message=user_question or "Waiting for user input",
+                    )
+                )
+            else:
+                # Task completed (success or failed)
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="task_completed",
+                    event_data=TaskCompletedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        status="success" if task_status == TaskStatus.SUCCESS else "failed",
+                        output=final_output[:500] if final_output else "",
+                        error=final_error,
+                        tokens=final_tokens,
+                        cost=final_cost,
+                    )
+                )
+                await publish_plan_event(
+                    execution_id=plan_execution_id,
+                    event_type="todo_item_updated",
+                    event_data=TodoItemUpdatedEvent(
+                        execution_id=plan_execution_id,
+                        task_id=task.id,
+                        title=task.title,
+                        old_status="waiting_for_input",  # Was waiting, now completing
+                        new_status="completed" if task_status == TaskStatus.SUCCESS else "failed",
+                        message=f"Task {'completed successfully' if task_status == TaskStatus.SUCCESS else 'failed'}",
+                    )
+                )
+            return TaskExecutionResult(
+                task_id=task.id,
+                status=task_status,
+                execution_id=execution_id,
+                output=final_output,
+                events=all_events,
+                tokens=final_tokens,
+                cost=final_cost,
+                started_at=started_at,
+                completed_at=completed_at,
+                error=final_error,
+                needs_continuation=needs_continuation,
+                user_question=user_question,
+            )
+    except Exception as e:
+        activity.logger.error(
+            "continue_task_failed",
+            extra={
+                "task_id": task.id,
+                "execution_id": execution_id,
+                "error": str(e),
+            }
+        )
+        return TaskExecutionResult(
+            task_id=task.id,
+            status=TaskStatus.FAILED,
+            execution_id=execution_id,
+            output="",
+            events=[],
+            tokens=0,
+            cost=0.0,
+            started_at=started_at,
+            completed_at=datetime.now(timezone.utc),
+            error=str(e),
+            needs_continuation=False,
+            user_question=None,
+        )
+@activity.defn
+async def get_task_status_activity(
+    task_id: int,
+    task_results: Dict[int, TaskExecutionResult],
+) -> Dict[str, Any]:
+    """Get the current status of a task."""
+    if task_id in task_results:
+        result = task_results[task_id]
+        return {
+            "found": True,
+            "status": result.status.value,
+            "output": result.output,
+            "tokens": result.tokens,
+            "cost": result.cost,
+            "error": result.error,
+        }
+    else:
+        return {
+            "found": False,
+            "status": "pending",
+        }
+@activity.defn
+async def call_llm_activity(
+    messages: List[Dict[str, Any]],
+    system_prompt: str,
+    tools: List[Dict[str, Any]],
+    model_id: str,
+    plan_execution_id: Optional[str] = None,
+    organization_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+    task_id: Optional[int] = None,
+    generation_name: Optional[str] = None,
+    jwt_token: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Activity to call Anthropic API directly (like Claude Code runtime does).
+    This activity now includes Langfuse metadata for proper observability.
+    """
+    # Extract user_id from JWT if not provided
+    if not user_id and jwt_token:
+        user_id = extract_user_from_jwt(jwt_token)
+    activity.logger.info(
+        "calling_anthropic_api",
+        model=model_id,
+        message_count=len(messages),
+        tool_count=len(tools),
+        plan_execution_id=plan_execution_id[:8] if plan_execution_id else "unknown",
+    )
+    try:
+        # Use httpx directly to have full control over request with metadata
+        litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
+        litellm_api_key = os.getenv("LITELLM_API_KEY")
+        # Build Langfuse metadata
+        metadata_context = build_langfuse_metadata(
+            plan_execution_id=plan_execution_id or "unknown",
+            generation_name=generation_name or "plan-orchestrator-llm-call",
+            user_id=user_id,
+            organization_id=organization_id,
+            task_id=task_id,
+        )
+        # Format user for LiteLLM (format: email-org)
+        user_field = None
+        if user_id and organization_id:
+            user_field = f"{user_id}-{organization_id}"
+        activity.logger.info(
+            "calling_anthropic_with_metadata",
+            extra={
+                "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
+                "generation_name": metadata_context.get("generation_name"),
+                "session_id": metadata_context.get("session_id"),
+            }
+        )
+        # Build request body in Anthropic format with metadata
+        request_body = {
+            "model": model_id,
+            "max_tokens": 4096,
+            "system": system_prompt,
+            "messages": messages,
+            "tools": tools,
+            "temperature": 0.0,
+        }
+        # DON'T add user field - Anthropic rejects emails!
+        # LiteLLM will extract trace_user_id from metadata for Langfuse
+        # Add metadata (LiteLLM extracts Langfuse fields from here)
+        # CRITICAL: Don't include user_id - Anthropic rejects emails!
+        request_body["metadata"] = {
+            "trace_name": metadata_context.get("trace_name"),
+            "generation_name": metadata_context.get("generation_name"),
+            "trace_id": metadata_context.get("session_id"),
+            "session_id": metadata_context.get("session_id"),
+            "trace_user_id": user_field,  # For Langfuse only
+            "organization_id": organization_id,
+            "agent_id": metadata_context.get("agent_id"),
+            "task_id": metadata_context.get("task_id"),
+        }
+        async with httpx.AsyncClient(timeout=300.0) as http_client:
+            response = await http_client.post(
+                f"{litellm_api_base}/v1/messages",
+                json=request_body,
+                headers={
+                    "Authorization": f"Bearer {litellm_api_key}",
+                    "Content-Type": "application/json",
+                    "anthropic-version": "2023-06-01",
+                },
+            )
+            if response.status_code != 200:
+                raise Exception(f"Anthropic API call failed: {response.status_code} - {response.text}")
+            result = response.json()
+            # Extract tool calls from response
+            tool_calls = []
+            content_text = ""
+            for block in result.get("content", []):
+                if block.get("type") == "text":
+                    content_text = block.get("text", "")
+                elif block.get("type") == "tool_use":
+                    tool_calls.append({
+                        "id": block.get("id"),
+                        "name": block.get("name"),
+                        "input": block.get("input", {}),
+                    })
+            activity.logger.info(
+                "anthropic_call_complete",
+                tool_calls_count=len(tool_calls),
+            )
+            return {
+                "content": content_text,
+                "tool_calls": tool_calls,
+            }
+    except Exception as e:
+        activity.logger.error(f"anthropic_call_failed: {str(e)}")
+        raise