kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,849 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ExecutionStreamer - Main orchestrator for resumable execution streaming.
|
|
3
|
+
|
|
4
|
+
This module provides the ExecutionStreamer class that orchestrates the complete
|
|
5
|
+
streaming lifecycle: immediate connection, historical message loading, and live
|
|
6
|
+
event streaming with gap recovery support.
|
|
7
|
+
|
|
8
|
+
Architecture:
|
|
9
|
+
This is the core component of the Resumable Execution Stream Architecture that
|
|
10
|
+
ties together all the specialized streaming components:
|
|
11
|
+
|
|
12
|
+
1. Phase 1: Immediate Connection (<50ms)
|
|
13
|
+
- Send 'connected' event immediately to unblock EventSource
|
|
14
|
+
- Don't wait for any slow operations (DB, Temporal queries)
|
|
15
|
+
|
|
16
|
+
2. Phase 2: Stream Historical Messages
|
|
17
|
+
- Use HistoryLoader to progressively stream database messages
|
|
18
|
+
- Yield one message at a time for instant UI rendering
|
|
19
|
+
- Track sent messages via MessageDeduplicator
|
|
20
|
+
|
|
21
|
+
3. Phase 3: History Complete
|
|
22
|
+
- Send 'history_complete' event to signal transition
|
|
23
|
+
- Include message count and truncation flags
|
|
24
|
+
|
|
25
|
+
4. Phase 4: Live Event Streaming
|
|
26
|
+
- Use LiveEventSource to stream real-time Redis events
|
|
27
|
+
- Poll at 200ms intervals for new events
|
|
28
|
+
- Continue until workflow completes or timeout
|
|
29
|
+
|
|
30
|
+
Gap Recovery:
|
|
31
|
+
- Supports Last-Event-ID pattern for client reconnection
|
|
32
|
+
- Uses EventBuffer to detect and handle gaps
|
|
33
|
+
- Replays missing events when possible
|
|
34
|
+
- Notifies client when gaps are unrecoverable
|
|
35
|
+
|
|
36
|
+
Test Strategy:
|
|
37
|
+
- Integration test full streaming flow (all 4 phases in order)
|
|
38
|
+
- Test phase transitions occur at correct times with correct data
|
|
39
|
+
- Test Last-Event-ID resumption skips already-sent events
|
|
40
|
+
- Test gap detection and replay from EventBuffer
|
|
41
|
+
- Test error handling in each phase (graceful degradation)
|
|
42
|
+
- Test statistics tracking across phases
|
|
43
|
+
- Test timeout handling (0 = no timeout, streams until task completes)
|
|
44
|
+
- Test workflow completion detection stops streaming
|
|
45
|
+
- Test deduplication across history + live phases
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
import asyncio
|
|
49
|
+
import logging
|
|
50
|
+
import time
|
|
51
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
52
|
+
|
|
53
|
+
from structlog import get_logger
|
|
54
|
+
|
|
55
|
+
from .deduplication import MessageDeduplicator
|
|
56
|
+
from .event_buffer import EventBuffer
|
|
57
|
+
from .event_formatter import EventFormatter
|
|
58
|
+
from .history_loader import HistoryLoader
|
|
59
|
+
from .live_source import LiveEventSource
|
|
60
|
+
from ..services.worker_health import WorkerHealthChecker, DegradationMode
|
|
61
|
+
|
|
62
|
+
logger = get_logger(__name__)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ExecutionStreamer:
|
|
66
|
+
"""
|
|
67
|
+
Main orchestrator for resumable execution streaming.
|
|
68
|
+
|
|
69
|
+
This class coordinates all phases of execution streaming:
|
|
70
|
+
1. Immediate connection acknowledgment
|
|
71
|
+
2. Progressive historical message streaming
|
|
72
|
+
3. History completion notification
|
|
73
|
+
4. Live event streaming with completion detection
|
|
74
|
+
|
|
75
|
+
The streamer supports gap recovery via Last-Event-ID pattern, enabling
|
|
76
|
+
clients to reconnect and resume from their last received event without
|
|
77
|
+
missing any updates.
|
|
78
|
+
|
|
79
|
+
Example usage:
|
|
80
|
+
```python
|
|
81
|
+
streamer = ExecutionStreamer(
|
|
82
|
+
execution_id="exec-123",
|
|
83
|
+
organization_id="org-456",
|
|
84
|
+
db_session=db,
|
|
85
|
+
redis_client=redis,
|
|
86
|
+
temporal_client=temporal_client,
|
|
87
|
+
last_event_id="exec-123_42_1234567890", # Optional, for resumption
|
|
88
|
+
timeout_seconds=0, # 0 = no timeout
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
async for sse_event in streamer.stream():
|
|
92
|
+
# Send SSE event to client
|
|
93
|
+
yield sse_event
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Architecture:
|
|
97
|
+
- Delegates to specialized components for each concern
|
|
98
|
+
- Maintains single deduplicator instance shared across phases
|
|
99
|
+
- Uses EventBuffer for gap detection and replay
|
|
100
|
+
- Uses EventFormatter for consistent SSE formatting
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self,
|
|
105
|
+
execution_id: str,
|
|
106
|
+
organization_id: str,
|
|
107
|
+
db_session, # SQLAlchemy session
|
|
108
|
+
redis_client, # Redis client (UpstashRedisClient or StandardRedisClient)
|
|
109
|
+
temporal_client, # temporalio.client.Client
|
|
110
|
+
last_event_id: Optional[str] = None,
|
|
111
|
+
timeout_seconds: int = 0, # 0 = no timeout, stream until task completes
|
|
112
|
+
execution_type: Optional[str] = None,
|
|
113
|
+
health_checker: Optional[WorkerHealthChecker] = None,
|
|
114
|
+
):
|
|
115
|
+
"""
|
|
116
|
+
Initialize ExecutionStreamer.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
execution_id: Execution ID to stream
|
|
120
|
+
organization_id: Organization ID for authorization
|
|
121
|
+
db_session: SQLAlchemy database session for HistoryLoader
|
|
122
|
+
redis_client: Redis client for LiveEventSource (can be None)
|
|
123
|
+
temporal_client: Temporal client for workflow queries (can be None)
|
|
124
|
+
last_event_id: Last event ID client received (for resumption)
|
|
125
|
+
timeout_seconds: Maximum streaming duration (default: 0 = no timeout)
|
|
126
|
+
execution_type: Execution type ("AGENT" or "TEAM") to determine workflow_id
|
|
127
|
+
health_checker: WorkerHealthChecker instance for graceful degradation (optional)
|
|
128
|
+
"""
|
|
129
|
+
self.execution_id = execution_id
|
|
130
|
+
self.organization_id = organization_id
|
|
131
|
+
self.db_session = db_session
|
|
132
|
+
self.redis_client = redis_client
|
|
133
|
+
self.temporal_client = temporal_client
|
|
134
|
+
self.last_event_id = last_event_id
|
|
135
|
+
self.timeout_seconds = timeout_seconds
|
|
136
|
+
self.execution_type = execution_type or "AGENT"
|
|
137
|
+
|
|
138
|
+
# Determine workflow ID based on execution type
|
|
139
|
+
if self.execution_type == "TEAM":
|
|
140
|
+
self.workflow_id = f"team-execution-{execution_id}"
|
|
141
|
+
else:
|
|
142
|
+
self.workflow_id = f"agent-execution-{execution_id}"
|
|
143
|
+
|
|
144
|
+
# Core components (initialized once, reused across phases)
|
|
145
|
+
self.deduplicator = MessageDeduplicator()
|
|
146
|
+
self.formatter = EventFormatter(execution_id)
|
|
147
|
+
self.buffer = EventBuffer(execution_id)
|
|
148
|
+
|
|
149
|
+
# Health checker for graceful degradation
|
|
150
|
+
self.health_checker = health_checker or WorkerHealthChecker(
|
|
151
|
+
temporal_client=temporal_client,
|
|
152
|
+
redis_client=redis_client,
|
|
153
|
+
db_session=db_session,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Temporal workflow handle (cached)
|
|
157
|
+
self._workflow_handle = None
|
|
158
|
+
self._workflow_handle_error = None
|
|
159
|
+
|
|
160
|
+
# Degradation tracking
|
|
161
|
+
self._degradation_mode = None
|
|
162
|
+
self._last_health_check = None
|
|
163
|
+
|
|
164
|
+
# Message tracking for done event fallback
|
|
165
|
+
self._streamed_messages = []
|
|
166
|
+
|
|
167
|
+
# Statistics tracking
|
|
168
|
+
self._stats = {
|
|
169
|
+
"phase": "initializing",
|
|
170
|
+
"start_time": None,
|
|
171
|
+
"connection_time_ms": 0,
|
|
172
|
+
"history_load_time_ms": 0,
|
|
173
|
+
"live_streaming_time_ms": 0,
|
|
174
|
+
"total_events_sent": 0,
|
|
175
|
+
"history_messages_sent": 0,
|
|
176
|
+
"live_events_sent": 0,
|
|
177
|
+
"events_buffered": 0,
|
|
178
|
+
"events_replayed": 0,
|
|
179
|
+
"deduplication_stats": {},
|
|
180
|
+
"errors": [],
|
|
181
|
+
"degradation_mode": None,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
logger.info(
|
|
185
|
+
"execution_streamer_initialized",
|
|
186
|
+
execution_id=execution_id[:8],
|
|
187
|
+
organization_id=organization_id[:8],
|
|
188
|
+
workflow_id=self.workflow_id,
|
|
189
|
+
has_last_event_id=bool(last_event_id),
|
|
190
|
+
timeout_seconds=timeout_seconds,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
async def stream(self) -> AsyncGenerator[str, None]:
|
|
194
|
+
"""
|
|
195
|
+
Main streaming generator that orchestrates all phases.
|
|
196
|
+
|
|
197
|
+
This method executes the complete streaming lifecycle:
|
|
198
|
+
1. Send immediate 'connected' event
|
|
199
|
+
2. Handle Last-Event-ID resumption (replay or gap detection)
|
|
200
|
+
3. Stream historical messages from database
|
|
201
|
+
4. Send 'history_complete' event
|
|
202
|
+
5. Stream live events from Redis until completion
|
|
203
|
+
|
|
204
|
+
Yields:
|
|
205
|
+
SSE-formatted event strings ready to send to client
|
|
206
|
+
|
|
207
|
+
Example:
|
|
208
|
+
```python
|
|
209
|
+
async for sse_event in streamer.stream():
|
|
210
|
+
# sse_event is already formatted: "id: ...\nevent: ...\ndata: ...\n\n"
|
|
211
|
+
yield sse_event
|
|
212
|
+
```
|
|
213
|
+
"""
|
|
214
|
+
self._stats["start_time"] = time.time()
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# ========== PHASE 1: IMMEDIATE CONNECTION ==========
|
|
218
|
+
yield await self._phase_1_connect()
|
|
219
|
+
|
|
220
|
+
# ========== HEALTH CHECK: Determine degradation mode ==========
|
|
221
|
+
degradation_mode = await self.health_checker.get_degradation_mode()
|
|
222
|
+
self._degradation_mode = degradation_mode
|
|
223
|
+
self._last_health_check = time.time()
|
|
224
|
+
self._stats["degradation_mode"] = degradation_mode.value
|
|
225
|
+
|
|
226
|
+
logger.info(
|
|
227
|
+
"health_check_complete",
|
|
228
|
+
execution_id=self.execution_id[:8],
|
|
229
|
+
degradation_mode=degradation_mode.value,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Send degraded event if not in full mode
|
|
233
|
+
if degradation_mode != DegradationMode.FULL:
|
|
234
|
+
capabilities = self.health_checker.get_capabilities(degradation_mode)
|
|
235
|
+
|
|
236
|
+
# Determine reason message based on mode
|
|
237
|
+
if degradation_mode == DegradationMode.UNAVAILABLE:
|
|
238
|
+
reason = "All services unavailable"
|
|
239
|
+
message = "Unable to stream execution data - all services are down"
|
|
240
|
+
elif degradation_mode == DegradationMode.HISTORY_ONLY:
|
|
241
|
+
reason = "Live streaming unavailable"
|
|
242
|
+
message = "Real-time updates unavailable. Showing historical data only."
|
|
243
|
+
elif degradation_mode == DegradationMode.LIVE_ONLY:
|
|
244
|
+
reason = "Historical data unavailable"
|
|
245
|
+
message = "Database unavailable. Showing live updates only (no history)."
|
|
246
|
+
else:
|
|
247
|
+
reason = "Partial service availability"
|
|
248
|
+
message = "Some services unavailable. Functionality may be limited."
|
|
249
|
+
|
|
250
|
+
yield self.formatter.format_degraded_event(
|
|
251
|
+
mode=degradation_mode.value,
|
|
252
|
+
reason=reason,
|
|
253
|
+
message=message,
|
|
254
|
+
capabilities=capabilities,
|
|
255
|
+
)
|
|
256
|
+
self._stats["total_events_sent"] += 1
|
|
257
|
+
|
|
258
|
+
# If completely unavailable, stop here
|
|
259
|
+
if degradation_mode == DegradationMode.UNAVAILABLE:
|
|
260
|
+
yield self.formatter.format_error_event(
|
|
261
|
+
error="All services unavailable - cannot stream execution data",
|
|
262
|
+
error_type="unavailable",
|
|
263
|
+
)
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
# ========== PHASE 2: RESUMPTION (if Last-Event-ID provided) ==========
|
|
267
|
+
if self.last_event_id:
|
|
268
|
+
async for event in self._phase_2_resumption():
|
|
269
|
+
yield event
|
|
270
|
+
|
|
271
|
+
# ========== PHASE 3: STREAM HISTORICAL MESSAGES ==========
|
|
272
|
+
# Skip history if in LIVE_ONLY mode
|
|
273
|
+
if degradation_mode not in [DegradationMode.LIVE_ONLY]:
|
|
274
|
+
async for event in self._phase_3_history(degradation_mode):
|
|
275
|
+
yield event
|
|
276
|
+
|
|
277
|
+
# ========== PHASE 4: HISTORY COMPLETE ==========
|
|
278
|
+
# Only send if we attempted history loading
|
|
279
|
+
if degradation_mode not in [DegradationMode.LIVE_ONLY]:
|
|
280
|
+
yield await self._phase_4_history_complete()
|
|
281
|
+
|
|
282
|
+
# ========== PHASE 5: LIVE STREAMING ==========
|
|
283
|
+
# Skip live if in HISTORY_ONLY mode
|
|
284
|
+
if degradation_mode not in [DegradationMode.HISTORY_ONLY]:
|
|
285
|
+
async for event in self._phase_5_live_streaming(degradation_mode):
|
|
286
|
+
yield event
|
|
287
|
+
|
|
288
|
+
# ========== PHASE 6: SEND DONE EVENT ==========
|
|
289
|
+
# Send 'done' event to signal stream completion to CLI/clients
|
|
290
|
+
# This is critical for clients that wait for a terminal event
|
|
291
|
+
# IMPORTANT: Include messages array as fallback for completed executions
|
|
292
|
+
# where frontend may not have received message events properly
|
|
293
|
+
yield self.formatter.format_done_event(
|
|
294
|
+
response=None, # Response is in the messages already
|
|
295
|
+
workflow_status="completed",
|
|
296
|
+
messages=self._streamed_messages if self._streamed_messages else None,
|
|
297
|
+
)
|
|
298
|
+
self._stats["total_events_sent"] += 1
|
|
299
|
+
|
|
300
|
+
logger.info(
|
|
301
|
+
"phase_6_done_event_sent",
|
|
302
|
+
execution_id=self.execution_id[:8],
|
|
303
|
+
messages_included=len(self._streamed_messages),
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
except Exception as e:
|
|
307
|
+
# Log critical error
|
|
308
|
+
logger.error(
|
|
309
|
+
"streaming_orchestration_error",
|
|
310
|
+
execution_id=self.execution_id[:8],
|
|
311
|
+
phase=self._stats["phase"],
|
|
312
|
+
error=str(e),
|
|
313
|
+
error_type=type(e).__name__,
|
|
314
|
+
)
|
|
315
|
+
self._stats["errors"].append({
|
|
316
|
+
"phase": self._stats["phase"],
|
|
317
|
+
"error": str(e),
|
|
318
|
+
"error_type": type(e).__name__,
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
# Send error event to client
|
|
322
|
+
yield self.formatter.format_error_event(
|
|
323
|
+
error=str(e),
|
|
324
|
+
error_type="streaming_error",
|
|
325
|
+
)
|
|
326
|
+
finally:
|
|
327
|
+
# Log final statistics
|
|
328
|
+
elapsed = time.time() - self._stats["start_time"]
|
|
329
|
+
self._stats["total_duration_ms"] = int(elapsed * 1000)
|
|
330
|
+
self._stats["deduplication_stats"] = self.deduplicator.get_stats()
|
|
331
|
+
|
|
332
|
+
logger.info(
|
|
333
|
+
"execution_streaming_complete",
|
|
334
|
+
execution_id=self.execution_id[:8],
|
|
335
|
+
stats=self._stats,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
async def _phase_1_connect(self) -> str:
|
|
339
|
+
"""
|
|
340
|
+
Phase 1: Send immediate 'connected' event (<50ms).
|
|
341
|
+
|
|
342
|
+
This event is sent first to unblock the EventSource connection before
|
|
343
|
+
any slow operations (Temporal queries, DB lookups) are performed.
|
|
344
|
+
|
|
345
|
+
The client receives this event instantly, allowing the UI to show
|
|
346
|
+
"connecting..." state while we load data in the background.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
SSE-formatted 'connected' event string
|
|
350
|
+
"""
|
|
351
|
+
t0 = time.time()
|
|
352
|
+
self._stats["phase"] = "connecting"
|
|
353
|
+
|
|
354
|
+
logger.info(
|
|
355
|
+
"phase_1_connecting",
|
|
356
|
+
execution_id=self.execution_id[:8],
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Send connected event with minimal data (no DB/Temporal queries)
|
|
360
|
+
event = self.formatter.format_connected_event(
|
|
361
|
+
organization_id=self.organization_id,
|
|
362
|
+
status="pending", # Default status, will be updated later
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
self._stats["connection_time_ms"] = int((time.time() - t0) * 1000)
|
|
366
|
+
self._stats["total_events_sent"] += 1
|
|
367
|
+
|
|
368
|
+
logger.info(
|
|
369
|
+
"phase_1_connected",
|
|
370
|
+
execution_id=self.execution_id[:8],
|
|
371
|
+
duration_ms=self._stats["connection_time_ms"],
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
return event
|
|
375
|
+
|
|
376
|
+
async def _phase_2_resumption(self) -> AsyncGenerator[str, None]:
|
|
377
|
+
"""
|
|
378
|
+
Phase 2: Handle Last-Event-ID resumption (gap detection and replay).
|
|
379
|
+
|
|
380
|
+
If the client provided a Last-Event-ID, we need to:
|
|
381
|
+
1. Check if we have buffered events after that ID
|
|
382
|
+
2. If yes, replay them
|
|
383
|
+
3. If no, check for gaps and notify client
|
|
384
|
+
|
|
385
|
+
This phase is skipped if no Last-Event-ID was provided (new connection).
|
|
386
|
+
|
|
387
|
+
Yields:
|
|
388
|
+
SSE-formatted events for replay or gap notification
|
|
389
|
+
"""
|
|
390
|
+
if not self.last_event_id:
|
|
391
|
+
return
|
|
392
|
+
|
|
393
|
+
t0 = time.time()
|
|
394
|
+
self._stats["phase"] = "resumption"
|
|
395
|
+
|
|
396
|
+
logger.info(
|
|
397
|
+
"phase_2_resumption_start",
|
|
398
|
+
execution_id=self.execution_id[:8],
|
|
399
|
+
last_event_id=self.last_event_id,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
# Check for buffered events to replay
|
|
404
|
+
replay_events = self.buffer.replay_from_id(self.last_event_id)
|
|
405
|
+
|
|
406
|
+
if replay_events:
|
|
407
|
+
# Replay buffered events
|
|
408
|
+
logger.info(
|
|
409
|
+
"replaying_buffered_events",
|
|
410
|
+
execution_id=self.execution_id[:8],
|
|
411
|
+
replay_count=len(replay_events),
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
for event_id, event_type, data_json in replay_events:
|
|
415
|
+
# Use existing event ID (don't regenerate)
|
|
416
|
+
yield self.formatter.format_event(
|
|
417
|
+
event_type=event_type,
|
|
418
|
+
data={"replay": True}, # Placeholder, actual data in data_json
|
|
419
|
+
event_id=event_id,
|
|
420
|
+
)
|
|
421
|
+
self._stats["events_replayed"] += 1
|
|
422
|
+
self._stats["total_events_sent"] += 1
|
|
423
|
+
|
|
424
|
+
else:
|
|
425
|
+
# Check if last_event_id is too old (buffer miss)
|
|
426
|
+
buffer_miss = self.buffer.check_buffer_miss(self.last_event_id)
|
|
427
|
+
|
|
428
|
+
if buffer_miss:
|
|
429
|
+
# Gap detected - notify client
|
|
430
|
+
logger.warning(
|
|
431
|
+
"gap_detected_notifying_client",
|
|
432
|
+
execution_id=self.execution_id[:8],
|
|
433
|
+
buffer_miss=buffer_miss,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
yield self.formatter.format_gap_detected_event(
|
|
437
|
+
reason=buffer_miss.get("reason", "Unknown gap"),
|
|
438
|
+
buffer_oldest=buffer_miss.get("buffer_oldest"),
|
|
439
|
+
)
|
|
440
|
+
self._stats["total_events_sent"] += 1
|
|
441
|
+
|
|
442
|
+
except Exception as e:
|
|
443
|
+
logger.error(
|
|
444
|
+
"phase_2_resumption_error",
|
|
445
|
+
execution_id=self.execution_id[:8],
|
|
446
|
+
error=str(e),
|
|
447
|
+
)
|
|
448
|
+
self._stats["errors"].append({
|
|
449
|
+
"phase": "resumption",
|
|
450
|
+
"error": str(e),
|
|
451
|
+
})
|
|
452
|
+
|
|
453
|
+
# Continue to history load despite error
|
|
454
|
+
# Client will receive full history instead of incremental replay
|
|
455
|
+
|
|
456
|
+
async def _phase_3_history(self, degradation_mode: DegradationMode) -> AsyncGenerator[str, None]:
|
|
457
|
+
"""
|
|
458
|
+
Phase 3: Stream historical messages from database.
|
|
459
|
+
|
|
460
|
+
This phase progressively streams messages from the database using
|
|
461
|
+
HistoryLoader. Messages are yielded one at a time for instant UI
|
|
462
|
+
rendering without waiting for the entire history to load.
|
|
463
|
+
|
|
464
|
+
The HistoryLoader handles:
|
|
465
|
+
- Database query with Temporal fallback
|
|
466
|
+
- Message sorting and limiting (last 200)
|
|
467
|
+
- Deduplication via shared deduplicator
|
|
468
|
+
- Empty message filtering
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
degradation_mode: Current degradation mode for adaptive behavior
|
|
472
|
+
|
|
473
|
+
Yields:
|
|
474
|
+
SSE-formatted 'message' events for each historical message
|
|
475
|
+
"""
|
|
476
|
+
t0 = time.time()
|
|
477
|
+
self._stats["phase"] = "history_loading"
|
|
478
|
+
|
|
479
|
+
logger.info(
|
|
480
|
+
"phase_3_history_start",
|
|
481
|
+
execution_id=self.execution_id[:8],
|
|
482
|
+
degradation_mode=degradation_mode.value,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
# Create history loader with shared deduplicator
|
|
487
|
+
history_loader = HistoryLoader(
|
|
488
|
+
execution_id=self.execution_id,
|
|
489
|
+
organization_id=self.organization_id,
|
|
490
|
+
db_session=self.db_session,
|
|
491
|
+
temporal_client=self.temporal_client,
|
|
492
|
+
deduplicator=self.deduplicator,
|
|
493
|
+
workflow_id=self.workflow_id,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Stream messages progressively
|
|
497
|
+
message_count = 0
|
|
498
|
+
async for message in history_loader.stream():
|
|
499
|
+
# Track message for done event fallback
|
|
500
|
+
self._streamed_messages.append(message)
|
|
501
|
+
|
|
502
|
+
# Format as SSE event
|
|
503
|
+
event = self.formatter.format_message_event(message)
|
|
504
|
+
|
|
505
|
+
# Buffer event for gap recovery
|
|
506
|
+
event_id = self.formatter.generate_event_id()
|
|
507
|
+
self.buffer.add_event(
|
|
508
|
+
event_id=event_id,
|
|
509
|
+
event_type="message",
|
|
510
|
+
data=str(message), # Convert to JSON string for buffering
|
|
511
|
+
)
|
|
512
|
+
self._stats["events_buffered"] += 1
|
|
513
|
+
|
|
514
|
+
yield event
|
|
515
|
+
message_count += 1
|
|
516
|
+
self._stats["total_events_sent"] += 1
|
|
517
|
+
self._stats["history_messages_sent"] += 1
|
|
518
|
+
|
|
519
|
+
# Get history loader stats
|
|
520
|
+
history_stats = history_loader.get_stats()
|
|
521
|
+
self._stats["history_load_time_ms"] = int((time.time() - t0) * 1000)
|
|
522
|
+
|
|
523
|
+
logger.info(
|
|
524
|
+
"phase_3_history_complete",
|
|
525
|
+
execution_id=self.execution_id[:8],
|
|
526
|
+
message_count=message_count,
|
|
527
|
+
duration_ms=self._stats["history_load_time_ms"],
|
|
528
|
+
history_stats=history_stats,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
except Exception as e:
|
|
532
|
+
logger.error(
|
|
533
|
+
"phase_3_history_error",
|
|
534
|
+
execution_id=self.execution_id[:8],
|
|
535
|
+
error=str(e),
|
|
536
|
+
error_type=type(e).__name__,
|
|
537
|
+
)
|
|
538
|
+
self._stats["errors"].append({
|
|
539
|
+
"phase": "history_loading",
|
|
540
|
+
"error": str(e),
|
|
541
|
+
"error_type": type(e).__name__,
|
|
542
|
+
})
|
|
543
|
+
|
|
544
|
+
# Send degraded event to notify client
|
|
545
|
+
yield self.formatter.format_degraded_event(
|
|
546
|
+
mode="history_unavailable",
|
|
547
|
+
reason="Failed to load message history",
|
|
548
|
+
message=f"Database query failed: {str(e)[:100]}",
|
|
549
|
+
capabilities=["live_events"], # Can still serve live if Redis available
|
|
550
|
+
)
|
|
551
|
+
self._stats["total_events_sent"] += 1
|
|
552
|
+
|
|
553
|
+
# Continue to live streaming despite history failure
|
|
554
|
+
|
|
555
|
+
async def _phase_4_history_complete(self) -> str:
|
|
556
|
+
"""
|
|
557
|
+
Phase 4: Send 'history_complete' event.
|
|
558
|
+
|
|
559
|
+
This event signals to the client that all historical messages have
|
|
560
|
+
been loaded and the stream is transitioning to live event mode.
|
|
561
|
+
|
|
562
|
+
The client can use this to:
|
|
563
|
+
- Stop showing loading spinners
|
|
564
|
+
- Switch to real-time update mode
|
|
565
|
+
- Update UI to indicate "connected" state
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
SSE-formatted 'history_complete' event string
|
|
569
|
+
"""
|
|
570
|
+
self._stats["phase"] = "history_complete"
|
|
571
|
+
|
|
572
|
+
logger.info(
|
|
573
|
+
"phase_4_history_complete",
|
|
574
|
+
execution_id=self.execution_id[:8],
|
|
575
|
+
message_count=self._stats["history_messages_sent"],
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
event = self.formatter.format_history_complete_event(
|
|
579
|
+
message_count=self._stats["history_messages_sent"],
|
|
580
|
+
is_truncated=False, # HistoryLoader handles truncation internally
|
|
581
|
+
has_more=False,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
self._stats["total_events_sent"] += 1
|
|
585
|
+
|
|
586
|
+
return event
|
|
587
|
+
|
|
588
|
+
async def _phase_5_live_streaming(self, degradation_mode: DegradationMode) -> AsyncGenerator[str, None]:
|
|
589
|
+
"""
|
|
590
|
+
Phase 5: Stream live events from Redis until workflow completes.
|
|
591
|
+
|
|
592
|
+
This phase uses LiveEventSource to poll Redis for new events at
|
|
593
|
+
200ms intervals. Events are deduplicated against history and yielded
|
|
594
|
+
as they arrive.
|
|
595
|
+
|
|
596
|
+
The streaming continues until:
|
|
597
|
+
- Workflow reaches terminal state (COMPLETED, FAILED, CANCELLED)
|
|
598
|
+
- Timeout is reached (default: 0 = no timeout, streams until complete)
|
|
599
|
+
- Client disconnects
|
|
600
|
+
- Critical error occurs
|
|
601
|
+
|
|
602
|
+
Includes periodic recovery checks every 30 seconds to detect when
|
|
603
|
+
services come back online.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
degradation_mode: Current degradation mode for adaptive behavior
|
|
607
|
+
|
|
608
|
+
Yields:
|
|
609
|
+
SSE-formatted events for live updates, keepalives, status changes, etc.
|
|
610
|
+
"""
|
|
611
|
+
t0 = time.time()
|
|
612
|
+
self._stats["phase"] = "live_streaming"
|
|
613
|
+
|
|
614
|
+
# Constants for recovery monitoring
|
|
615
|
+
HEALTH_CHECK_INTERVAL = 30 # seconds
|
|
616
|
+
|
|
617
|
+
logger.info(
|
|
618
|
+
"phase_5_live_streaming_start",
|
|
619
|
+
execution_id=self.execution_id[:8],
|
|
620
|
+
timeout_seconds=self.timeout_seconds,
|
|
621
|
+
degradation_mode=degradation_mode.value,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
try:
|
|
625
|
+
# Get or create workflow handle
|
|
626
|
+
workflow_handle = await self._get_workflow_handle()
|
|
627
|
+
|
|
628
|
+
# Create live event source with shared deduplicator
|
|
629
|
+
live_source = LiveEventSource(
|
|
630
|
+
execution_id=self.execution_id,
|
|
631
|
+
organization_id=self.organization_id,
|
|
632
|
+
redis_client=self.redis_client,
|
|
633
|
+
workflow_handle=workflow_handle,
|
|
634
|
+
deduplicator=self.deduplicator,
|
|
635
|
+
timeout_seconds=self.timeout_seconds,
|
|
636
|
+
keepalive_interval=15,
|
|
637
|
+
db_session=self.db_session, # Pass database session for status polling
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Stream live events
|
|
641
|
+
event_count = 0
|
|
642
|
+
async for event in live_source.stream():
|
|
643
|
+
# Check for service recovery every 30 seconds
|
|
644
|
+
if time.time() - self._last_health_check > HEALTH_CHECK_INTERVAL:
|
|
645
|
+
new_mode = await self.health_checker.get_degradation_mode()
|
|
646
|
+
self._last_health_check = time.time()
|
|
647
|
+
|
|
648
|
+
# If services recovered to FULL from degraded mode
|
|
649
|
+
if new_mode == DegradationMode.FULL and self._degradation_mode != DegradationMode.FULL:
|
|
650
|
+
logger.info(
|
|
651
|
+
"services_recovered",
|
|
652
|
+
execution_id=self.execution_id[:8],
|
|
653
|
+
old_mode=self._degradation_mode.value,
|
|
654
|
+
new_mode=new_mode.value,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
# Notify client of recovery
|
|
658
|
+
recovery_event = self.formatter.format_recovered_event(
|
|
659
|
+
message="Services recovered, resuming full functionality"
|
|
660
|
+
)
|
|
661
|
+
yield recovery_event
|
|
662
|
+
self._stats["total_events_sent"] += 1
|
|
663
|
+
|
|
664
|
+
# Update tracking
|
|
665
|
+
self._degradation_mode = new_mode
|
|
666
|
+
self._stats["degradation_mode"] = new_mode.value
|
|
667
|
+
|
|
668
|
+
# Event is already a dict with event_type and data
|
|
669
|
+
event_type = event.get("event_type", "message")
|
|
670
|
+
|
|
671
|
+
# Format based on event type
|
|
672
|
+
if event_type == "message":
|
|
673
|
+
sse_event = self.formatter.format_message_event(event)
|
|
674
|
+
elif event_type == "status":
|
|
675
|
+
sse_event = self.formatter.format_status_event(
|
|
676
|
+
status=event.get("status", "unknown"),
|
|
677
|
+
metadata=event.get("data", {}),
|
|
678
|
+
)
|
|
679
|
+
elif event_type == "tool_started":
|
|
680
|
+
sse_event = self.formatter.format_tool_started_event(event)
|
|
681
|
+
elif event_type == "tool_completed":
|
|
682
|
+
sse_event = self.formatter.format_tool_completed_event(event)
|
|
683
|
+
elif event_type == "member_tool_started":
|
|
684
|
+
sse_event = self.formatter.format_member_tool_started_event(event)
|
|
685
|
+
elif event_type == "member_tool_completed":
|
|
686
|
+
sse_event = self.formatter.format_member_tool_completed_event(event)
|
|
687
|
+
elif event_type == "message_chunk":
|
|
688
|
+
sse_event = self.formatter.format_message_chunk_event(event)
|
|
689
|
+
elif event_type == "member_message_chunk":
|
|
690
|
+
sse_event = self.formatter.format_member_message_chunk_event(event)
|
|
691
|
+
elif event_type == "member_message_complete":
|
|
692
|
+
sse_event = self.formatter.format_member_message_complete_event(event)
|
|
693
|
+
# Thinking/reasoning event types
|
|
694
|
+
elif event_type == "thinking_start":
|
|
695
|
+
sse_event = self.formatter.format_thinking_start_event(
|
|
696
|
+
message_id=event.get("data", {}).get("message_id", ""),
|
|
697
|
+
index=event.get("data", {}).get("index", 0),
|
|
698
|
+
budget_tokens=event.get("data", {}).get("budget_tokens"),
|
|
699
|
+
)
|
|
700
|
+
elif event_type == "thinking_delta":
|
|
701
|
+
sse_event = self.formatter.format_thinking_delta_event(
|
|
702
|
+
message_id=event.get("data", {}).get("message_id", ""),
|
|
703
|
+
thinking=event.get("data", {}).get("thinking", ""),
|
|
704
|
+
index=event.get("data", {}).get("index", 0),
|
|
705
|
+
)
|
|
706
|
+
elif event_type == "thinking_complete":
|
|
707
|
+
sse_event = self.formatter.format_thinking_complete_event(
|
|
708
|
+
message_id=event.get("data", {}).get("message_id", ""),
|
|
709
|
+
index=event.get("data", {}).get("index", 0),
|
|
710
|
+
signature=event.get("data", {}).get("signature"),
|
|
711
|
+
tokens_used=event.get("data", {}).get("tokens_used"),
|
|
712
|
+
)
|
|
713
|
+
elif event_type == "keepalive":
|
|
714
|
+
sse_event = self.formatter.format_keepalive()
|
|
715
|
+
elif event_type == "degraded":
|
|
716
|
+
# Handle legacy degraded events from LiveEventSource
|
|
717
|
+
sse_event = self.formatter.format_degraded_event(
|
|
718
|
+
mode="degraded",
|
|
719
|
+
reason=event.get("data", {}).get("reason", "unknown"),
|
|
720
|
+
message=event.get("data", {}).get("message", "Degraded mode"),
|
|
721
|
+
)
|
|
722
|
+
else:
|
|
723
|
+
# Generic event
|
|
724
|
+
sse_event = self.formatter.format_event(
|
|
725
|
+
event_type=event_type,
|
|
726
|
+
data=event.get("data", event),
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
# Buffer event for gap recovery (except keepalives)
|
|
730
|
+
if event_type != "keepalive":
|
|
731
|
+
event_id = self.formatter.generate_event_id()
|
|
732
|
+
self.buffer.add_event(
|
|
733
|
+
event_id=event_id,
|
|
734
|
+
event_type=event_type,
|
|
735
|
+
data=str(event), # Convert to JSON string
|
|
736
|
+
)
|
|
737
|
+
self._stats["events_buffered"] += 1
|
|
738
|
+
|
|
739
|
+
yield sse_event
|
|
740
|
+
event_count += 1
|
|
741
|
+
self._stats["total_events_sent"] += 1
|
|
742
|
+
self._stats["live_events_sent"] += 1
|
|
743
|
+
|
|
744
|
+
self._stats["live_streaming_time_ms"] = int((time.time() - t0) * 1000)
|
|
745
|
+
|
|
746
|
+
logger.info(
|
|
747
|
+
"phase_5_live_streaming_complete",
|
|
748
|
+
execution_id=self.execution_id[:8],
|
|
749
|
+
event_count=event_count,
|
|
750
|
+
duration_ms=self._stats["live_streaming_time_ms"],
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
except Exception as e:
|
|
754
|
+
logger.error(
|
|
755
|
+
"phase_5_live_streaming_error",
|
|
756
|
+
execution_id=self.execution_id[:8],
|
|
757
|
+
error=str(e),
|
|
758
|
+
error_type=type(e).__name__,
|
|
759
|
+
)
|
|
760
|
+
self._stats["errors"].append({
|
|
761
|
+
"phase": "live_streaming",
|
|
762
|
+
"error": str(e),
|
|
763
|
+
"error_type": type(e).__name__,
|
|
764
|
+
})
|
|
765
|
+
|
|
766
|
+
# Send degraded event to notify client
|
|
767
|
+
yield self.formatter.format_degraded_event(
|
|
768
|
+
mode="live_events_unavailable",
|
|
769
|
+
reason="Failed to stream live events",
|
|
770
|
+
message=f"Redis streaming failed: {str(e)[:100]}",
|
|
771
|
+
capabilities=["history"], # At least we served history
|
|
772
|
+
)
|
|
773
|
+
self._stats["total_events_sent"] += 1
|
|
774
|
+
|
|
775
|
+
# Don't crash - we already served history if it was available
|
|
776
|
+
|
|
777
|
+
async def _get_workflow_handle(self):
|
|
778
|
+
"""
|
|
779
|
+
Get Temporal workflow handle with caching and error handling.
|
|
780
|
+
|
|
781
|
+
This method attempts to get a workflow handle from Temporal with a
|
|
782
|
+
2-second timeout to fail fast when worker is down.
|
|
783
|
+
|
|
784
|
+
Returns:
|
|
785
|
+
Temporal workflow handle or None if unavailable
|
|
786
|
+
|
|
787
|
+
Note:
|
|
788
|
+
The workflow handle is cached after first successful retrieval.
|
|
789
|
+
If retrieval fails, None is cached and logged (graceful degradation).
|
|
790
|
+
"""
|
|
791
|
+
if self._workflow_handle is not None:
|
|
792
|
+
return self._workflow_handle
|
|
793
|
+
|
|
794
|
+
if self._workflow_handle_error is not None:
|
|
795
|
+
# Already failed to get handle, don't retry
|
|
796
|
+
return None
|
|
797
|
+
|
|
798
|
+
if not self.temporal_client:
|
|
799
|
+
logger.warning(
|
|
800
|
+
"no_temporal_client_available",
|
|
801
|
+
execution_id=self.execution_id[:8],
|
|
802
|
+
)
|
|
803
|
+
self._workflow_handle_error = "No Temporal client"
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
try:
|
|
807
|
+
# Try to get workflow handle with 2-second timeout
|
|
808
|
+
self._workflow_handle = self.temporal_client.get_workflow_handle(
|
|
809
|
+
self.workflow_id
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
logger.info(
|
|
813
|
+
"workflow_handle_obtained",
|
|
814
|
+
execution_id=self.execution_id[:8],
|
|
815
|
+
workflow_id=self.workflow_id,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
return self._workflow_handle
|
|
819
|
+
|
|
820
|
+
except Exception as e:
|
|
821
|
+
logger.warning(
|
|
822
|
+
"failed_to_get_workflow_handle",
|
|
823
|
+
execution_id=self.execution_id[:8],
|
|
824
|
+
workflow_id=self.workflow_id,
|
|
825
|
+
error=str(e),
|
|
826
|
+
)
|
|
827
|
+
self._workflow_handle_error = str(e)
|
|
828
|
+
return None
|
|
829
|
+
|
|
830
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
831
|
+
"""
|
|
832
|
+
Get streaming statistics.
|
|
833
|
+
|
|
834
|
+
Returns:
|
|
835
|
+
Dictionary with statistics:
|
|
836
|
+
- phase: Current phase
|
|
837
|
+
- start_time: Stream start timestamp
|
|
838
|
+
- connection_time_ms: Phase 1 duration
|
|
839
|
+
- history_load_time_ms: Phase 3 duration
|
|
840
|
+
- live_streaming_time_ms: Phase 5 duration
|
|
841
|
+
- total_events_sent: Total events sent to client
|
|
842
|
+
- history_messages_sent: Messages sent in phase 3
|
|
843
|
+
- live_events_sent: Events sent in phase 5
|
|
844
|
+
- events_buffered: Events added to EventBuffer
|
|
845
|
+
- events_replayed: Events replayed in phase 2
|
|
846
|
+
- deduplication_stats: Stats from MessageDeduplicator
|
|
847
|
+
- errors: List of errors encountered
|
|
848
|
+
"""
|
|
849
|
+
return self._stats.copy()
|