kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LocalStorageSpanProcessor - Async OTEL SpanProcessor for local trace storage.
|
|
3
|
+
|
|
4
|
+
This processor intercepts completed spans from the OTEL SDK and stores them
|
|
5
|
+
in PostgreSQL for local querying. It's designed to be fully async and non-blocking
|
|
6
|
+
to avoid impacting application performance.
|
|
7
|
+
|
|
8
|
+
Features:
|
|
9
|
+
- Fully async database operations using asyncio
|
|
10
|
+
- Non-blocking span queuing with bounded queue
|
|
11
|
+
- Bulk inserts for performance
|
|
12
|
+
- Publishes events to Redis for real-time WebSocket streaming
|
|
13
|
+
- Configurable via OTEL_LOCAL_STORAGE_ENABLED env var
|
|
14
|
+
- Graceful degradation on errors - never blocks the main application
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
from control_plane_api.app.observability.local_span_processor import (
|
|
18
|
+
LocalStorageSpanProcessor,
|
|
19
|
+
setup_local_storage_processor
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# In setup_telemetry():
|
|
23
|
+
setup_local_storage_processor(tracer_provider)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import structlog
|
|
27
|
+
import asyncio
|
|
28
|
+
import threading
|
|
29
|
+
from collections import deque
|
|
30
|
+
from typing import Optional, List, Dict, Any, Deque
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
import json
|
|
33
|
+
|
|
34
|
+
from opentelemetry.sdk.trace import SpanProcessor, ReadableSpan
|
|
35
|
+
from opentelemetry.trace import SpanKind as OTELSpanKind
|
|
36
|
+
from opentelemetry.trace.status import StatusCode
|
|
37
|
+
|
|
38
|
+
from control_plane_api.app.config import settings
|
|
39
|
+
|
|
40
|
+
logger = structlog.get_logger(__name__)
|
|
41
|
+
|
|
42
|
+
# Configuration from settings
|
|
43
|
+
LOCAL_STORAGE_ENABLED = getattr(settings, 'OTEL_LOCAL_STORAGE_ENABLED', True)
|
|
44
|
+
LOCAL_STORAGE_BATCH_SIZE = getattr(settings, 'OTEL_LOCAL_STORAGE_BATCH_SIZE', 100)
|
|
45
|
+
LOCAL_STORAGE_FLUSH_INTERVAL = getattr(settings, 'OTEL_LOCAL_STORAGE_FLUSH_INTERVAL', 1000) # ms
|
|
46
|
+
|
|
47
|
+
# Maximum queue size to prevent memory issues
|
|
48
|
+
MAX_QUEUE_SIZE = 10000
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _span_kind_to_string(kind: OTELSpanKind) -> str:
|
|
52
|
+
"""Convert OTEL SpanKind to our enum string"""
|
|
53
|
+
mapping = {
|
|
54
|
+
OTELSpanKind.INTERNAL: "INTERNAL",
|
|
55
|
+
OTELSpanKind.SERVER: "SERVER",
|
|
56
|
+
OTELSpanKind.CLIENT: "CLIENT",
|
|
57
|
+
OTELSpanKind.PRODUCER: "PRODUCER",
|
|
58
|
+
OTELSpanKind.CONSUMER: "CONSUMER",
|
|
59
|
+
}
|
|
60
|
+
return mapping.get(kind, "INTERNAL")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _status_code_to_string(status: StatusCode) -> str:
|
|
64
|
+
"""Convert OTEL StatusCode to our enum string"""
|
|
65
|
+
mapping = {
|
|
66
|
+
StatusCode.UNSET: "UNSET",
|
|
67
|
+
StatusCode.OK: "OK",
|
|
68
|
+
StatusCode.ERROR: "ERROR",
|
|
69
|
+
}
|
|
70
|
+
return mapping.get(status, "UNSET")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _extract_attributes(span: ReadableSpan) -> Dict[str, Any]:
|
|
74
|
+
"""Extract span attributes as a dictionary"""
|
|
75
|
+
attrs = {}
|
|
76
|
+
if span.attributes:
|
|
77
|
+
for key, value in span.attributes.items():
|
|
78
|
+
if isinstance(value, (str, int, float, bool)):
|
|
79
|
+
attrs[key] = value
|
|
80
|
+
elif isinstance(value, (list, tuple)):
|
|
81
|
+
attrs[key] = list(value)
|
|
82
|
+
else:
|
|
83
|
+
attrs[key] = str(value)
|
|
84
|
+
return attrs
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _extract_resource_attributes(span: ReadableSpan) -> Dict[str, Any]:
|
|
88
|
+
"""Extract resource attributes as a dictionary"""
|
|
89
|
+
attrs = {}
|
|
90
|
+
if span.resource and span.resource.attributes:
|
|
91
|
+
for key, value in span.resource.attributes.items():
|
|
92
|
+
if isinstance(value, (str, int, float, bool)):
|
|
93
|
+
attrs[key] = value
|
|
94
|
+
elif isinstance(value, (list, tuple)):
|
|
95
|
+
attrs[key] = list(value)
|
|
96
|
+
else:
|
|
97
|
+
attrs[key] = str(value)
|
|
98
|
+
return attrs
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _extract_events(span: ReadableSpan) -> List[Dict[str, Any]]:
|
|
102
|
+
"""Extract span events as a list of dictionaries"""
|
|
103
|
+
events = []
|
|
104
|
+
if span.events:
|
|
105
|
+
for event in span.events:
|
|
106
|
+
event_dict = {
|
|
107
|
+
"name": event.name,
|
|
108
|
+
"timestamp": event.timestamp,
|
|
109
|
+
"attributes": {}
|
|
110
|
+
}
|
|
111
|
+
if event.attributes:
|
|
112
|
+
for key, value in event.attributes.items():
|
|
113
|
+
if isinstance(value, (str, int, float, bool)):
|
|
114
|
+
event_dict["attributes"][key] = value
|
|
115
|
+
else:
|
|
116
|
+
event_dict["attributes"][key] = str(value)
|
|
117
|
+
events.append(event_dict)
|
|
118
|
+
return events
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _extract_links(span: ReadableSpan) -> List[Dict[str, Any]]:
|
|
122
|
+
"""Extract span links as a list of dictionaries"""
|
|
123
|
+
links = []
|
|
124
|
+
if span.links:
|
|
125
|
+
for link in span.links:
|
|
126
|
+
link_dict = {
|
|
127
|
+
"trace_id": format(link.context.trace_id, '032x'),
|
|
128
|
+
"span_id": format(link.context.span_id, '016x'),
|
|
129
|
+
"attributes": {}
|
|
130
|
+
}
|
|
131
|
+
if link.attributes:
|
|
132
|
+
for key, value in link.attributes.items():
|
|
133
|
+
if isinstance(value, (str, int, float, bool)):
|
|
134
|
+
link_dict["attributes"][key] = value
|
|
135
|
+
else:
|
|
136
|
+
link_dict["attributes"][key] = str(value)
|
|
137
|
+
links.append(link_dict)
|
|
138
|
+
return links
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class LocalStorageSpanProcessor(SpanProcessor):
|
|
142
|
+
"""
|
|
143
|
+
Async SpanProcessor that stores spans locally in PostgreSQL.
|
|
144
|
+
|
|
145
|
+
Design principles:
|
|
146
|
+
- NEVER block the main application thread
|
|
147
|
+
- Use bounded queues to prevent memory issues
|
|
148
|
+
- Bulk insert for database efficiency
|
|
149
|
+
- Graceful degradation on errors
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self,
|
|
154
|
+
enabled: bool = True,
|
|
155
|
+
batch_size: int = 100,
|
|
156
|
+
flush_interval_ms: int = 1000,
|
|
157
|
+
):
|
|
158
|
+
self.enabled = enabled
|
|
159
|
+
self.batch_size = batch_size
|
|
160
|
+
self.flush_interval_ms = flush_interval_ms
|
|
161
|
+
|
|
162
|
+
# Thread-safe bounded queue using deque with maxlen
|
|
163
|
+
self._span_queue: Deque[Dict[str, Any]] = deque(maxlen=MAX_QUEUE_SIZE)
|
|
164
|
+
self._queue_lock = threading.Lock()
|
|
165
|
+
|
|
166
|
+
# Async event loop for background processing
|
|
167
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
168
|
+
self._task: Optional[asyncio.Task] = None
|
|
169
|
+
self._shutdown_event = asyncio.Event()
|
|
170
|
+
|
|
171
|
+
# Stats for monitoring
|
|
172
|
+
self._stats = {
|
|
173
|
+
"spans_received": 0,
|
|
174
|
+
"spans_stored": 0,
|
|
175
|
+
"spans_dropped": 0,
|
|
176
|
+
"batch_inserts": 0,
|
|
177
|
+
"errors": 0,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if self.enabled:
|
|
181
|
+
self._start_async_worker()
|
|
182
|
+
logger.info(
|
|
183
|
+
"local_storage_span_processor_initialized",
|
|
184
|
+
batch_size=self.batch_size,
|
|
185
|
+
flush_interval_ms=self.flush_interval_ms,
|
|
186
|
+
)
|
|
187
|
+
else:
|
|
188
|
+
logger.info("local_storage_span_processor_disabled")
|
|
189
|
+
|
|
190
|
+
def _start_async_worker(self):
|
|
191
|
+
"""Start the async background worker in a separate thread"""
|
|
192
|
+
def run_async_loop():
|
|
193
|
+
self._loop = asyncio.new_event_loop()
|
|
194
|
+
asyncio.set_event_loop(self._loop)
|
|
195
|
+
try:
|
|
196
|
+
self._loop.run_until_complete(self._async_batch_worker())
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error("async_worker_crashed", error=str(e), exc_info=True)
|
|
199
|
+
finally:
|
|
200
|
+
self._loop.close()
|
|
201
|
+
|
|
202
|
+
thread = threading.Thread(
|
|
203
|
+
target=run_async_loop,
|
|
204
|
+
name="LocalStorageAsyncWorker",
|
|
205
|
+
daemon=True,
|
|
206
|
+
)
|
|
207
|
+
thread.start()
|
|
208
|
+
|
|
209
|
+
async def _async_batch_worker(self):
|
|
210
|
+
"""Async worker that batches and inserts spans using raw asyncpg with connection pooling"""
|
|
211
|
+
import asyncpg
|
|
212
|
+
import ssl as ssl_module
|
|
213
|
+
|
|
214
|
+
flush_interval_sec = self.flush_interval_ms / 1000.0
|
|
215
|
+
|
|
216
|
+
# Get database URL from settings
|
|
217
|
+
database_url = settings.database_url
|
|
218
|
+
if not database_url:
|
|
219
|
+
logger.warning("async_batch_worker_no_db", message="DATABASE_URL not configured, spans will not be stored")
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
# Convert to asyncpg format
|
|
223
|
+
if database_url.startswith("postgresql://"):
|
|
224
|
+
asyncpg_url = database_url.replace("postgresql://", "postgres://", 1)
|
|
225
|
+
else:
|
|
226
|
+
asyncpg_url = database_url
|
|
227
|
+
|
|
228
|
+
# Remove sslmode from URL - we'll handle SSL separately
|
|
229
|
+
import re
|
|
230
|
+
asyncpg_url = re.sub(r'[?&]sslmode=[^&]*', '', asyncpg_url)
|
|
231
|
+
asyncpg_url = asyncpg_url.rstrip('?').replace('&&', '&').rstrip('&')
|
|
232
|
+
|
|
233
|
+
# Check if SSL is needed
|
|
234
|
+
requires_ssl = 'sslmode' in database_url and 'sslmode=disable' not in database_url
|
|
235
|
+
|
|
236
|
+
ssl_context = None
|
|
237
|
+
if requires_ssl:
|
|
238
|
+
ssl_context = ssl_module.create_default_context()
|
|
239
|
+
ssl_context.check_hostname = False
|
|
240
|
+
ssl_context.verify_mode = ssl_module.CERT_NONE
|
|
241
|
+
|
|
242
|
+
logger.info("async_batch_worker_starting", ssl_enabled=requires_ssl)
|
|
243
|
+
|
|
244
|
+
# Create connection pool for efficient connection reuse
|
|
245
|
+
# Small pool size since this is background batch processing
|
|
246
|
+
pool = None
|
|
247
|
+
try:
|
|
248
|
+
pool = await asyncpg.create_pool(
|
|
249
|
+
asyncpg_url,
|
|
250
|
+
ssl=ssl_context,
|
|
251
|
+
min_size=1,
|
|
252
|
+
max_size=3, # Small pool - we don't need many concurrent connections
|
|
253
|
+
statement_cache_size=0, # Disable for PgBouncer compatibility
|
|
254
|
+
command_timeout=30,
|
|
255
|
+
)
|
|
256
|
+
logger.info("async_batch_worker_pool_created")
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logger.error("async_batch_worker_pool_failed", error=str(e))
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
while True:
|
|
263
|
+
try:
|
|
264
|
+
# Wait for flush interval
|
|
265
|
+
await asyncio.sleep(flush_interval_sec)
|
|
266
|
+
|
|
267
|
+
# Check for shutdown
|
|
268
|
+
if self._shutdown_event.is_set():
|
|
269
|
+
await self._flush_batch_with_pool(pool)
|
|
270
|
+
break
|
|
271
|
+
|
|
272
|
+
# Flush current batch
|
|
273
|
+
await self._flush_batch_with_pool(pool)
|
|
274
|
+
|
|
275
|
+
except asyncio.CancelledError:
|
|
276
|
+
# Final flush on cancellation
|
|
277
|
+
await self._flush_batch_with_pool(pool)
|
|
278
|
+
break
|
|
279
|
+
except Exception as e:
|
|
280
|
+
self._stats["errors"] += 1
|
|
281
|
+
logger.error("async_batch_worker_error", error=str(e), exc_info=True)
|
|
282
|
+
await asyncio.sleep(1) # Back off on error
|
|
283
|
+
finally:
|
|
284
|
+
if pool:
|
|
285
|
+
await pool.close()
|
|
286
|
+
logger.info("async_batch_worker_pool_closed")
|
|
287
|
+
|
|
288
|
+
async def _flush_batch_with_pool(self, pool):
|
|
289
|
+
"""Flush pending spans to database using connection pool"""
|
|
290
|
+
# Get batch from queue (thread-safe)
|
|
291
|
+
batch = []
|
|
292
|
+
with self._queue_lock:
|
|
293
|
+
while self._span_queue and len(batch) < self.batch_size:
|
|
294
|
+
batch.append(self._span_queue.popleft())
|
|
295
|
+
|
|
296
|
+
if not batch:
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
async with pool.acquire() as conn:
|
|
301
|
+
await self._bulk_insert_spans_asyncpg(conn, batch)
|
|
302
|
+
self._stats["spans_stored"] += len(batch)
|
|
303
|
+
self._stats["batch_inserts"] += 1
|
|
304
|
+
|
|
305
|
+
logger.debug(
|
|
306
|
+
"batch_flushed",
|
|
307
|
+
spans=len(batch),
|
|
308
|
+
queue_size=len(self._span_queue),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
self._stats["errors"] += 1
|
|
313
|
+
logger.error("batch_flush_failed", error=str(e), spans=len(batch))
|
|
314
|
+
|
|
315
|
+
async def _flush_batch_asyncpg(self, db_url: str, ssl_context):
|
|
316
|
+
"""Flush pending spans to database using raw asyncpg (legacy, for backward compat)"""
|
|
317
|
+
import asyncpg
|
|
318
|
+
|
|
319
|
+
# Get batch from queue (thread-safe)
|
|
320
|
+
batch = []
|
|
321
|
+
with self._queue_lock:
|
|
322
|
+
while self._span_queue and len(batch) < self.batch_size:
|
|
323
|
+
batch.append(self._span_queue.popleft())
|
|
324
|
+
|
|
325
|
+
if not batch:
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
# Create a fresh connection for each batch
|
|
330
|
+
# prepared_statement_cache_size=0 is critical for PgBouncer
|
|
331
|
+
conn = await asyncpg.connect(
|
|
332
|
+
db_url,
|
|
333
|
+
ssl=ssl_context,
|
|
334
|
+
statement_cache_size=0, # Disable statement cache for PgBouncer
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
await self._bulk_insert_spans_asyncpg(conn, batch)
|
|
339
|
+
self._stats["spans_stored"] += len(batch)
|
|
340
|
+
self._stats["batch_inserts"] += 1
|
|
341
|
+
|
|
342
|
+
logger.debug(
|
|
343
|
+
"batch_flushed",
|
|
344
|
+
spans=len(batch),
|
|
345
|
+
queue_size=len(self._span_queue),
|
|
346
|
+
)
|
|
347
|
+
finally:
|
|
348
|
+
await conn.close()
|
|
349
|
+
|
|
350
|
+
except Exception as e:
|
|
351
|
+
self._stats["errors"] += 1
|
|
352
|
+
logger.error("batch_flush_failed", error=str(e), spans=len(batch))
|
|
353
|
+
|
|
354
|
+
async def _bulk_insert_spans_asyncpg(self, conn, batch: List[Dict[str, Any]]):
|
|
355
|
+
"""Bulk insert spans using raw asyncpg (PgBouncer compatible)"""
|
|
356
|
+
# Group spans by trace_id for processing
|
|
357
|
+
traces_to_create = {}
|
|
358
|
+
traces_to_complete = {} # Root spans that have ended
|
|
359
|
+
traces_user_info = {} # Track user info from ANY span that has it
|
|
360
|
+
spans_data = []
|
|
361
|
+
trace_span_counts = {} # Count spans per trace
|
|
362
|
+
trace_error_counts = {} # Count errors per trace
|
|
363
|
+
|
|
364
|
+
for span_data in batch:
|
|
365
|
+
trace_id = span_data["trace_id"]
|
|
366
|
+
org_id = span_data.get("organization_id")
|
|
367
|
+
|
|
368
|
+
if not org_id:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
# Track span counts per trace
|
|
372
|
+
trace_span_counts[trace_id] = trace_span_counts.get(trace_id, 0) + 1
|
|
373
|
+
|
|
374
|
+
# Track error counts
|
|
375
|
+
if span_data["status_code"] == "ERROR":
|
|
376
|
+
trace_error_counts[trace_id] = trace_error_counts.get(trace_id, 0) + 1
|
|
377
|
+
|
|
378
|
+
# Extract user info from ANY span that has it (not just root spans)
|
|
379
|
+
# The HTTP root span from FastAPI Instrumentor has user attributes set by auth middleware
|
|
380
|
+
span_user_email = span_data["attributes"].get("user.email")
|
|
381
|
+
span_user_name = span_data["attributes"].get("user.name")
|
|
382
|
+
if span_user_email or span_user_name:
|
|
383
|
+
# Found user info on this span - track it for the trace
|
|
384
|
+
if trace_id not in traces_user_info:
|
|
385
|
+
traces_user_info[trace_id] = {}
|
|
386
|
+
# Use first non-null values found
|
|
387
|
+
if span_user_email and not traces_user_info[trace_id].get("user_email"):
|
|
388
|
+
traces_user_info[trace_id]["user_email"] = span_user_email
|
|
389
|
+
if span_user_name and not traces_user_info[trace_id].get("user_name"):
|
|
390
|
+
traces_user_info[trace_id]["user_name"] = span_user_name
|
|
391
|
+
if span_data["attributes"].get("user.id") and not traces_user_info[trace_id].get("user_id"):
|
|
392
|
+
traces_user_info[trace_id]["user_id"] = span_data["attributes"].get("user.id")
|
|
393
|
+
if span_data["attributes"].get("user.avatar") and not traces_user_info[trace_id].get("user_avatar"):
|
|
394
|
+
traces_user_info[trace_id]["user_avatar"] = span_data["attributes"].get("user.avatar")
|
|
395
|
+
|
|
396
|
+
# Handle root spans (no parent) - these define the trace
|
|
397
|
+
if span_data["parent_span_id"] is None:
|
|
398
|
+
service_name = span_data["resource_attributes"].get("service.name", "unknown")
|
|
399
|
+
|
|
400
|
+
# Determine status based on span status
|
|
401
|
+
if span_data["status_code"] == "ERROR":
|
|
402
|
+
trace_status = "error"
|
|
403
|
+
elif span_data.get("end_time_unix_nano"):
|
|
404
|
+
trace_status = "success" # Completed without error
|
|
405
|
+
else:
|
|
406
|
+
trace_status = "running"
|
|
407
|
+
|
|
408
|
+
# Calculate duration in ms if span has ended
|
|
409
|
+
duration_ms = None
|
|
410
|
+
if span_data.get("duration_ns"):
|
|
411
|
+
duration_ms = span_data["duration_ns"] // 1_000_000
|
|
412
|
+
|
|
413
|
+
# Get user info - prefer from tracked user info, fall back to this span's attributes
|
|
414
|
+
user_info = traces_user_info.get(trace_id, {})
|
|
415
|
+
user_id = user_info.get("user_id") or span_data["attributes"].get("user.id")
|
|
416
|
+
user_email = user_info.get("user_email") or span_data["attributes"].get("user.email")
|
|
417
|
+
user_name = user_info.get("user_name") or span_data["attributes"].get("user.name")
|
|
418
|
+
user_avatar = user_info.get("user_avatar") or span_data["attributes"].get("user.avatar")
|
|
419
|
+
|
|
420
|
+
trace_record = {
|
|
421
|
+
"trace_id": trace_id,
|
|
422
|
+
"organization_id": org_id,
|
|
423
|
+
"name": span_data["name"],
|
|
424
|
+
"service_name": service_name,
|
|
425
|
+
"status": trace_status,
|
|
426
|
+
"user_id": user_id,
|
|
427
|
+
"user_email": user_email,
|
|
428
|
+
"user_name": user_name,
|
|
429
|
+
"user_avatar": user_avatar,
|
|
430
|
+
"execution_id": span_data["attributes"].get("execution.id"),
|
|
431
|
+
"duration_ms": duration_ms,
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
# Always create trace first, then update if completed
|
|
435
|
+
traces_to_create[trace_id] = trace_record
|
|
436
|
+
if trace_status in ("success", "error"):
|
|
437
|
+
# Mark for completion update after creation
|
|
438
|
+
traces_to_complete[trace_id] = trace_record
|
|
439
|
+
|
|
440
|
+
spans_data.append({
|
|
441
|
+
"trace_id": trace_id,
|
|
442
|
+
"span_id": span_data["span_id"],
|
|
443
|
+
"parent_span_id": span_data["parent_span_id"],
|
|
444
|
+
"organization_id": org_id,
|
|
445
|
+
"name": span_data["name"],
|
|
446
|
+
"kind": span_data["kind"],
|
|
447
|
+
"status_code": span_data["status_code"],
|
|
448
|
+
"status_message": span_data.get("status_message"),
|
|
449
|
+
"start_time_unix_nano": span_data["start_time_unix_nano"],
|
|
450
|
+
"end_time_unix_nano": span_data.get("end_time_unix_nano"),
|
|
451
|
+
"duration_ns": span_data.get("duration_ns"),
|
|
452
|
+
"attributes": span_data["attributes"],
|
|
453
|
+
"resource_attributes": span_data["resource_attributes"],
|
|
454
|
+
"events": span_data["events"],
|
|
455
|
+
"links": span_data["links"],
|
|
456
|
+
})
|
|
457
|
+
|
|
458
|
+
# Use a transaction for atomicity
|
|
459
|
+
async with conn.transaction():
|
|
460
|
+
# Batch insert traces using executemany for better performance
|
|
461
|
+
# Use DO UPDATE to set user info if it's provided and current values are NULL
|
|
462
|
+
if traces_to_create:
|
|
463
|
+
trace_records = [
|
|
464
|
+
(
|
|
465
|
+
trace["trace_id"],
|
|
466
|
+
trace["organization_id"],
|
|
467
|
+
trace["name"],
|
|
468
|
+
trace["service_name"],
|
|
469
|
+
trace["status"],
|
|
470
|
+
trace["user_id"],
|
|
471
|
+
trace["user_email"],
|
|
472
|
+
trace["user_name"],
|
|
473
|
+
trace["user_avatar"],
|
|
474
|
+
trace["execution_id"],
|
|
475
|
+
)
|
|
476
|
+
for trace in traces_to_create.values()
|
|
477
|
+
]
|
|
478
|
+
await conn.executemany(
|
|
479
|
+
"""
|
|
480
|
+
INSERT INTO traces (trace_id, organization_id, name, service_name, status, user_id, user_email, user_name, user_avatar, execution_id, span_count, error_count)
|
|
481
|
+
VALUES ($1, $2, $3, $4, $5::trace_status, $6, $7, $8, $9, $10, 0, 0)
|
|
482
|
+
ON CONFLICT (trace_id) DO UPDATE SET
|
|
483
|
+
user_id = COALESCE(traces.user_id, EXCLUDED.user_id),
|
|
484
|
+
user_email = COALESCE(traces.user_email, EXCLUDED.user_email),
|
|
485
|
+
user_name = COALESCE(traces.user_name, EXCLUDED.user_name),
|
|
486
|
+
user_avatar = COALESCE(traces.user_avatar, EXCLUDED.user_avatar),
|
|
487
|
+
updated_at = NOW()
|
|
488
|
+
""",
|
|
489
|
+
trace_records,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Batch insert spans using executemany for better performance
|
|
493
|
+
if spans_data:
|
|
494
|
+
span_records = [
|
|
495
|
+
(
|
|
496
|
+
span["trace_id"],
|
|
497
|
+
span["span_id"],
|
|
498
|
+
span["parent_span_id"],
|
|
499
|
+
span["organization_id"],
|
|
500
|
+
span["name"],
|
|
501
|
+
span["kind"],
|
|
502
|
+
span["status_code"],
|
|
503
|
+
span["status_message"],
|
|
504
|
+
span["start_time_unix_nano"],
|
|
505
|
+
span["end_time_unix_nano"],
|
|
506
|
+
span["duration_ns"],
|
|
507
|
+
json.dumps(span["attributes"]),
|
|
508
|
+
json.dumps(span["resource_attributes"]),
|
|
509
|
+
json.dumps(span["events"]),
|
|
510
|
+
json.dumps(span["links"]),
|
|
511
|
+
)
|
|
512
|
+
for span in spans_data
|
|
513
|
+
]
|
|
514
|
+
await conn.executemany(
|
|
515
|
+
"""
|
|
516
|
+
INSERT INTO spans (trace_id, span_id, parent_span_id, organization_id, name, kind, status_code, status_message, start_time_unix_nano, end_time_unix_nano, duration_ns, attributes, resource_attributes, events, links)
|
|
517
|
+
VALUES ($1, $2, $3, $4, $5, $6::span_kind, $7::span_status_code, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14::jsonb, $15::jsonb)
|
|
518
|
+
ON CONFLICT (trace_id, span_id) DO UPDATE SET
|
|
519
|
+
end_time_unix_nano = EXCLUDED.end_time_unix_nano,
|
|
520
|
+
duration_ns = EXCLUDED.duration_ns,
|
|
521
|
+
status_code = EXCLUDED.status_code,
|
|
522
|
+
status_message = EXCLUDED.status_message
|
|
523
|
+
""",
|
|
524
|
+
span_records,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
# Batch update span counts for all traces
|
|
528
|
+
if trace_span_counts:
|
|
529
|
+
count_updates = [
|
|
530
|
+
(count, trace_error_counts.get(trace_id, 0), trace_id)
|
|
531
|
+
for trace_id, count in trace_span_counts.items()
|
|
532
|
+
]
|
|
533
|
+
await conn.executemany(
|
|
534
|
+
"""
|
|
535
|
+
UPDATE traces
|
|
536
|
+
SET span_count = span_count + $1,
|
|
537
|
+
error_count = error_count + $2,
|
|
538
|
+
updated_at = NOW()
|
|
539
|
+
WHERE trace_id = $3
|
|
540
|
+
""",
|
|
541
|
+
count_updates,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Batch complete traces that have finished root spans
|
|
545
|
+
if traces_to_complete:
|
|
546
|
+
completion_updates = [
|
|
547
|
+
(trace["status"], trace["duration_ms"], trace_id)
|
|
548
|
+
for trace_id, trace in traces_to_complete.items()
|
|
549
|
+
]
|
|
550
|
+
await conn.executemany(
|
|
551
|
+
"""
|
|
552
|
+
UPDATE traces
|
|
553
|
+
SET status = $1::trace_status,
|
|
554
|
+
duration_ms = $2,
|
|
555
|
+
ended_at = NOW(),
|
|
556
|
+
updated_at = NOW()
|
|
557
|
+
WHERE trace_id = $3
|
|
558
|
+
""",
|
|
559
|
+
completion_updates,
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
# Batch update user info for traces where we found it on any span
|
|
563
|
+
user_info_updates = [
|
|
564
|
+
(
|
|
565
|
+
user_info.get("user_id"),
|
|
566
|
+
user_info.get("user_email"),
|
|
567
|
+
user_info.get("user_name"),
|
|
568
|
+
user_info.get("user_avatar"),
|
|
569
|
+
trace_id,
|
|
570
|
+
)
|
|
571
|
+
for trace_id, user_info in traces_user_info.items()
|
|
572
|
+
if user_info
|
|
573
|
+
]
|
|
574
|
+
if user_info_updates:
|
|
575
|
+
await conn.executemany(
|
|
576
|
+
"""
|
|
577
|
+
UPDATE traces
|
|
578
|
+
SET user_id = COALESCE(user_id, $1),
|
|
579
|
+
user_email = COALESCE(user_email, $2),
|
|
580
|
+
user_name = COALESCE(user_name, $3),
|
|
581
|
+
user_avatar = COALESCE(user_avatar, $4),
|
|
582
|
+
updated_at = NOW()
|
|
583
|
+
WHERE trace_id = $5
|
|
584
|
+
""",
|
|
585
|
+
user_info_updates,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
def on_start(self, span: ReadableSpan, parent_context) -> None:
|
|
589
|
+
"""Called when a span starts - non-blocking"""
|
|
590
|
+
# We don't store on start, only on end
|
|
591
|
+
pass
|
|
592
|
+
|
|
593
|
+
def on_end(self, span: ReadableSpan) -> None:
|
|
594
|
+
"""Called when a span ends - queue for async storage (non-blocking)"""
|
|
595
|
+
if not self.enabled:
|
|
596
|
+
return
|
|
597
|
+
|
|
598
|
+
self._stats["spans_received"] += 1
|
|
599
|
+
|
|
600
|
+
try:
|
|
601
|
+
span_data = self._extract_span_data(span)
|
|
602
|
+
|
|
603
|
+
# Non-blocking queue add
|
|
604
|
+
with self._queue_lock:
|
|
605
|
+
if len(self._span_queue) >= MAX_QUEUE_SIZE:
|
|
606
|
+
self._stats["spans_dropped"] += 1
|
|
607
|
+
# Queue is full, drop oldest
|
|
608
|
+
self._span_queue.popleft()
|
|
609
|
+
self._span_queue.append(span_data)
|
|
610
|
+
|
|
611
|
+
except Exception as e:
|
|
612
|
+
self._stats["errors"] += 1
|
|
613
|
+
logger.warning("span_extraction_failed", error=str(e))
|
|
614
|
+
|
|
615
|
+
def _extract_span_data(self, span: ReadableSpan) -> Dict[str, Any]:
|
|
616
|
+
"""Extract all data from a span for storage"""
|
|
617
|
+
span_context = span.get_span_context()
|
|
618
|
+
trace_id = format(span_context.trace_id, '032x')
|
|
619
|
+
span_id = format(span_context.span_id, '016x')
|
|
620
|
+
|
|
621
|
+
parent_span_id = None
|
|
622
|
+
if span.parent:
|
|
623
|
+
parent_span_id = format(span.parent.span_id, '016x')
|
|
624
|
+
|
|
625
|
+
attributes = _extract_attributes(span)
|
|
626
|
+
org_id = attributes.get("organization.id") or attributes.get("organization_id")
|
|
627
|
+
|
|
628
|
+
duration_ns = None
|
|
629
|
+
if span.end_time and span.start_time:
|
|
630
|
+
duration_ns = span.end_time - span.start_time
|
|
631
|
+
|
|
632
|
+
return {
|
|
633
|
+
"trace_id": trace_id,
|
|
634
|
+
"span_id": span_id,
|
|
635
|
+
"parent_span_id": parent_span_id,
|
|
636
|
+
"organization_id": org_id,
|
|
637
|
+
"name": span.name,
|
|
638
|
+
"kind": _span_kind_to_string(span.kind),
|
|
639
|
+
"status_code": _status_code_to_string(span.status.status_code),
|
|
640
|
+
"status_message": span.status.description,
|
|
641
|
+
"start_time_unix_nano": span.start_time,
|
|
642
|
+
"end_time_unix_nano": span.end_time,
|
|
643
|
+
"duration_ns": duration_ns,
|
|
644
|
+
"attributes": attributes,
|
|
645
|
+
"resource_attributes": _extract_resource_attributes(span),
|
|
646
|
+
"events": _extract_events(span),
|
|
647
|
+
"links": _extract_links(span),
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
def shutdown(self) -> None:
|
|
651
|
+
"""Shutdown the processor and flush remaining spans"""
|
|
652
|
+
logger.info("local_storage_span_processor_shutting_down", stats=self._stats)
|
|
653
|
+
|
|
654
|
+
if self._loop and self._loop.is_running():
|
|
655
|
+
# Signal shutdown
|
|
656
|
+
self._loop.call_soon_threadsafe(self._shutdown_event.set)
|
|
657
|
+
|
|
658
|
+
logger.info("local_storage_span_processor_shutdown_complete", stats=self._stats)
|
|
659
|
+
|
|
660
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
661
|
+
"""Force flush all pending spans"""
|
|
662
|
+
if not self.enabled:
|
|
663
|
+
return True
|
|
664
|
+
# Async flush will happen on next interval
|
|
665
|
+
return True
|
|
666
|
+
|
|
667
|
+
def get_stats(self) -> Dict[str, int]:
|
|
668
|
+
"""Get processor statistics"""
|
|
669
|
+
return dict(self._stats)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def setup_local_storage_processor(
|
|
673
|
+
tracer_provider,
|
|
674
|
+
) -> Optional[LocalStorageSpanProcessor]:
|
|
675
|
+
"""
|
|
676
|
+
Set up and add the LocalStorageSpanProcessor to a TracerProvider.
|
|
677
|
+
|
|
678
|
+
Args:
|
|
679
|
+
tracer_provider: The TracerProvider to add the processor to
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
The created LocalStorageSpanProcessor, or None if disabled
|
|
683
|
+
"""
|
|
684
|
+
if not LOCAL_STORAGE_ENABLED:
|
|
685
|
+
logger.info("local_storage_disabled", reason="OTEL_LOCAL_STORAGE_ENABLED=false")
|
|
686
|
+
return None
|
|
687
|
+
|
|
688
|
+
processor = LocalStorageSpanProcessor(
|
|
689
|
+
enabled=True,
|
|
690
|
+
batch_size=LOCAL_STORAGE_BATCH_SIZE,
|
|
691
|
+
flush_interval_ms=LOCAL_STORAGE_FLUSH_INTERVAL,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
tracer_provider.add_span_processor(processor)
|
|
695
|
+
|
|
696
|
+
logger.info(
|
|
697
|
+
"local_storage_processor_added",
|
|
698
|
+
batch_size=LOCAL_STORAGE_BATCH_SIZE,
|
|
699
|
+
flush_interval_ms=LOCAL_STORAGE_FLUSH_INTERVAL,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
return processor
|