kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message deduplication for streaming execution data.
|
|
3
|
+
|
|
4
|
+
This module provides a bounded-memory message deduplication system using LRU caching
|
|
5
|
+
to prevent duplicate messages from being sent during streaming execution sessions.
|
|
6
|
+
|
|
7
|
+
Key Features:
|
|
8
|
+
- Two-level deduplication: message ID + content signature
|
|
9
|
+
- LRU cache with bounded memory (max 1000 entries)
|
|
10
|
+
- Backward compatibility with old message ID formats
|
|
11
|
+
- Content-based deduplication for assistant messages within 5-second window
|
|
12
|
+
- Thread-safe operation
|
|
13
|
+
|
|
14
|
+
Test Strategy:
|
|
15
|
+
- Unit test deduplication with various message types (user, assistant, system)
|
|
16
|
+
- Test LRU eviction at boundary (1001st entry)
|
|
17
|
+
- Test backward compatibility with old message ID formats (timestamp-based vs turn-based)
|
|
18
|
+
- Test content signature collision handling with near-duplicate messages
|
|
19
|
+
- Test edge cases: empty content, None message_id, missing timestamps
|
|
20
|
+
- Test thread safety with concurrent message processing
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import hashlib
|
|
24
|
+
import logging
|
|
25
|
+
from collections import OrderedDict
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
from typing import Any, Dict, Optional
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MessageDeduplicator:
|
|
33
|
+
"""
|
|
34
|
+
Handles message deduplication with bounded memory using LRU cache.
|
|
35
|
+
|
|
36
|
+
This class provides two-level deduplication:
|
|
37
|
+
1. Message ID deduplication - prevents exact duplicate messages
|
|
38
|
+
2. Content signature deduplication - prevents near-duplicate assistant messages
|
|
39
|
+
within a 5-second window (handles retry/regeneration scenarios)
|
|
40
|
+
|
|
41
|
+
The deduplicator also normalizes old message ID formats for backward compatibility:
|
|
42
|
+
- New format: {execution_id}_{role}_{turn_number} (deterministic, turn-based)
|
|
43
|
+
- Old formats: {execution_id}_{role}_{timestamp_micros} or {execution_id}_{role}_{idx}
|
|
44
|
+
|
|
45
|
+
Memory is bounded using LRU eviction at 1000 entries to prevent memory leaks
|
|
46
|
+
in long-running streaming sessions.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# LRU cache size - limits memory usage to ~1000 message IDs + signatures
|
|
50
|
+
MAX_CACHE_SIZE = 1000
|
|
51
|
+
|
|
52
|
+
# Time window for content signature deduplication (seconds)
|
|
53
|
+
CONTENT_DEDUP_WINDOW = 5.0
|
|
54
|
+
|
|
55
|
+
# Length of content to use for signature (characters)
|
|
56
|
+
CONTENT_SIGNATURE_LENGTH = 200
|
|
57
|
+
|
|
58
|
+
def __init__(self, max_size: int = MAX_CACHE_SIZE):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the message deduplicator.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
max_size: Maximum number of entries in LRU cache (default: 1000)
|
|
64
|
+
"""
|
|
65
|
+
self.max_size = max_size
|
|
66
|
+
|
|
67
|
+
# LRU cache for sent message IDs
|
|
68
|
+
# OrderedDict maintains insertion order, move_to_end() implements LRU
|
|
69
|
+
self._sent_ids: OrderedDict[str, bool] = OrderedDict()
|
|
70
|
+
|
|
71
|
+
# Content signature cache for assistant messages
|
|
72
|
+
# Maps content_signature -> (message_data, timestamp)
|
|
73
|
+
self._content_cache: OrderedDict[str, tuple[Dict[str, Any], datetime]] = OrderedDict()
|
|
74
|
+
|
|
75
|
+
# Statistics for monitoring
|
|
76
|
+
self._stats = {
|
|
77
|
+
"messages_checked": 0,
|
|
78
|
+
"duplicates_by_id": 0,
|
|
79
|
+
"duplicates_by_content": 0,
|
|
80
|
+
"evictions": 0,
|
|
81
|
+
"normalized_ids": 0,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def is_sent(self, message: Dict[str, Any]) -> bool:
|
|
85
|
+
"""
|
|
86
|
+
Check if a message has already been sent (deduplicate).
|
|
87
|
+
|
|
88
|
+
This method performs two-level deduplication:
|
|
89
|
+
1. Check if message ID is in sent cache
|
|
90
|
+
2. For assistant messages, check content signature within time window
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
message: Message dictionary with keys: message_id, role, content, timestamp
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
True if message is a duplicate and should be skipped, False otherwise
|
|
97
|
+
"""
|
|
98
|
+
self._stats["messages_checked"] += 1
|
|
99
|
+
|
|
100
|
+
message_id = message.get("message_id")
|
|
101
|
+
role = message.get("role")
|
|
102
|
+
|
|
103
|
+
# Normalize old message ID formats for backward compatibility
|
|
104
|
+
if message_id:
|
|
105
|
+
message_id = self._normalize_message_id(message_id, message)
|
|
106
|
+
message["message_id"] = message_id
|
|
107
|
+
|
|
108
|
+
# Level 1: Check message ID deduplication
|
|
109
|
+
if message_id and message_id in self._sent_ids:
|
|
110
|
+
self._stats["duplicates_by_id"] += 1
|
|
111
|
+
logger.debug(
|
|
112
|
+
"duplicate_message_id_detected",
|
|
113
|
+
message_id=message_id,
|
|
114
|
+
role=role,
|
|
115
|
+
content_preview=(message.get("content", "") or "")[:50]
|
|
116
|
+
)
|
|
117
|
+
# Move to end to mark as recently used (LRU)
|
|
118
|
+
self._sent_ids.move_to_end(message_id)
|
|
119
|
+
return True
|
|
120
|
+
|
|
121
|
+
# Level 2: Check content signature deduplication (assistant messages only)
|
|
122
|
+
if role == "assistant":
|
|
123
|
+
content = message.get("content", "") or ""
|
|
124
|
+
timestamp_str = message.get("timestamp", "")
|
|
125
|
+
|
|
126
|
+
if content and timestamp_str:
|
|
127
|
+
content_sig = self._content_signature(content)
|
|
128
|
+
|
|
129
|
+
if content_sig in self._content_cache:
|
|
130
|
+
prev_msg, prev_timestamp = self._content_cache[content_sig]
|
|
131
|
+
|
|
132
|
+
# Check if messages are within deduplication window
|
|
133
|
+
try:
|
|
134
|
+
current_timestamp = self._parse_timestamp(timestamp_str)
|
|
135
|
+
time_diff = abs((current_timestamp - prev_timestamp).total_seconds())
|
|
136
|
+
|
|
137
|
+
if time_diff <= self.CONTENT_DEDUP_WINDOW:
|
|
138
|
+
self._stats["duplicates_by_content"] += 1
|
|
139
|
+
logger.debug(
|
|
140
|
+
"duplicate_content_signature_detected",
|
|
141
|
+
message_id=message_id,
|
|
142
|
+
content_signature=content_sig[:16],
|
|
143
|
+
time_diff=time_diff,
|
|
144
|
+
prev_message_id=prev_msg.get("message_id")
|
|
145
|
+
)
|
|
146
|
+
# Move to end (LRU)
|
|
147
|
+
self._content_cache.move_to_end(content_sig)
|
|
148
|
+
return True
|
|
149
|
+
except Exception as e:
|
|
150
|
+
# If timestamp parsing fails, don't skip the message
|
|
151
|
+
logger.warning(
|
|
152
|
+
"failed_to_parse_timestamp_for_content_dedup",
|
|
153
|
+
timestamp=timestamp_str,
|
|
154
|
+
error=str(e)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def mark_sent(self, message: Dict[str, Any]) -> None:
|
|
160
|
+
"""
|
|
161
|
+
Mark a message as sent (add to deduplication cache).
|
|
162
|
+
|
|
163
|
+
This method:
|
|
164
|
+
1. Adds message ID to sent cache
|
|
165
|
+
2. For assistant messages, adds content signature to cache
|
|
166
|
+
3. Enforces LRU eviction when cache exceeds max size
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
message: Message dictionary with keys: message_id, role, content, timestamp
|
|
170
|
+
"""
|
|
171
|
+
message_id = message.get("message_id")
|
|
172
|
+
role = message.get("role")
|
|
173
|
+
|
|
174
|
+
# Add message ID to sent cache
|
|
175
|
+
if message_id:
|
|
176
|
+
self._sent_ids[message_id] = True
|
|
177
|
+
self._evict_if_needed(self._sent_ids)
|
|
178
|
+
|
|
179
|
+
# Add content signature for assistant messages
|
|
180
|
+
if role == "assistant":
|
|
181
|
+
content = message.get("content", "") or ""
|
|
182
|
+
timestamp_str = message.get("timestamp", "")
|
|
183
|
+
|
|
184
|
+
if content and timestamp_str:
|
|
185
|
+
try:
|
|
186
|
+
content_sig = self._content_signature(content)
|
|
187
|
+
timestamp = self._parse_timestamp(timestamp_str)
|
|
188
|
+
self._content_cache[content_sig] = (message, timestamp)
|
|
189
|
+
self._evict_if_needed(self._content_cache)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.warning(
|
|
192
|
+
"failed_to_cache_content_signature",
|
|
193
|
+
message_id=message_id,
|
|
194
|
+
error=str(e)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _content_signature(self, content: str) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generate a content signature for deduplication.
|
|
200
|
+
|
|
201
|
+
The signature is based on:
|
|
202
|
+
- First 200 characters of normalized content
|
|
203
|
+
- Lowercase, stripped of whitespace
|
|
204
|
+
- MD5 hashed for fixed-length signature
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
content: Message content string
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
MD5 hash of normalized content (32-character hex string)
|
|
211
|
+
"""
|
|
212
|
+
if not content:
|
|
213
|
+
return ""
|
|
214
|
+
|
|
215
|
+
# Normalize: strip, lowercase, first 200 chars
|
|
216
|
+
normalized = content.strip().lower()[:self.CONTENT_SIGNATURE_LENGTH]
|
|
217
|
+
|
|
218
|
+
# Hash for fixed-length signature
|
|
219
|
+
return hashlib.md5(normalized.encode()).hexdigest()
|
|
220
|
+
|
|
221
|
+
def _normalize_message_id(self, message_id: str, message: Dict[str, Any]) -> str:
|
|
222
|
+
"""
|
|
223
|
+
Normalize old message ID formats for backward compatibility.
|
|
224
|
+
|
|
225
|
+
Message ID formats:
|
|
226
|
+
- New (turn-based): {execution_id}_{role}_{turn_number}
|
|
227
|
+
Example: "exec123_assistant_5"
|
|
228
|
+
- Old (timestamp-based): {execution_id}_{role}_{timestamp_micros}
|
|
229
|
+
Example: "exec123_assistant_1234567890123456"
|
|
230
|
+
- Old (index-based): {execution_id}_{role}_{idx}
|
|
231
|
+
Example: "exec123_assistant_42" (ambiguous with turn-based)
|
|
232
|
+
|
|
233
|
+
Detection heuristic:
|
|
234
|
+
- If last part is < 10000, assume turn-based (new format) - keep as-is
|
|
235
|
+
- If last part is >= 10000, assume timestamp-based (old format) - use content hash
|
|
236
|
+
- If can't parse, use content hash
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
message_id: Original message ID
|
|
240
|
+
message: Full message dictionary for content hash fallback
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Normalized message ID (may be unchanged if already in new format)
|
|
244
|
+
"""
|
|
245
|
+
if not message_id:
|
|
246
|
+
return message_id
|
|
247
|
+
|
|
248
|
+
parts = message_id.split("_")
|
|
249
|
+
|
|
250
|
+
# Check if format is: {execution_id}_{role}_{number}
|
|
251
|
+
if len(parts) >= 3 and parts[-2] in ["user", "assistant", "system"]:
|
|
252
|
+
try:
|
|
253
|
+
last_part = int(parts[-1])
|
|
254
|
+
|
|
255
|
+
# Turn numbers are small (1-100), timestamps are huge (1e15)
|
|
256
|
+
if last_part < 10000:
|
|
257
|
+
# New format (turn-based) - keep as-is
|
|
258
|
+
return message_id
|
|
259
|
+
|
|
260
|
+
# Old format (timestamp-based) - normalize to content hash
|
|
261
|
+
self._stats["normalized_ids"] += 1
|
|
262
|
+
|
|
263
|
+
except (ValueError, IndexError):
|
|
264
|
+
# Can't parse as number - might be hash or other format
|
|
265
|
+
# Use content hash for stability
|
|
266
|
+
self._stats["normalized_ids"] += 1
|
|
267
|
+
|
|
268
|
+
# Generate stable ID based on content hash
|
|
269
|
+
content = message.get("content", "") or ""
|
|
270
|
+
role = message.get("role", "unknown")
|
|
271
|
+
execution_id = parts[0] if parts else "unknown"
|
|
272
|
+
|
|
273
|
+
content_hash = hashlib.md5(content.encode()).hexdigest()[:8]
|
|
274
|
+
normalized_id = f"{execution_id}_{role}_{content_hash}"
|
|
275
|
+
|
|
276
|
+
logger.debug(
|
|
277
|
+
"normalized_old_message_id_format",
|
|
278
|
+
old_id=message_id,
|
|
279
|
+
new_id=normalized_id,
|
|
280
|
+
role=role
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return normalized_id
|
|
284
|
+
|
|
285
|
+
def _parse_timestamp(self, timestamp_str: str) -> datetime:
|
|
286
|
+
"""
|
|
287
|
+
Parse ISO format timestamp string.
|
|
288
|
+
|
|
289
|
+
Handles both with and without 'Z' suffix.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
timestamp_str: ISO format timestamp (e.g., "2024-01-15T10:30:00Z")
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
datetime object
|
|
296
|
+
|
|
297
|
+
Raises:
|
|
298
|
+
ValueError: If timestamp cannot be parsed
|
|
299
|
+
"""
|
|
300
|
+
# Handle 'Z' suffix for UTC timestamps
|
|
301
|
+
normalized = timestamp_str.replace('Z', '+00:00')
|
|
302
|
+
return datetime.fromisoformat(normalized)
|
|
303
|
+
|
|
304
|
+
def _evict_if_needed(self, cache: OrderedDict) -> None:
|
|
305
|
+
"""
|
|
306
|
+
Evict oldest entry from cache if it exceeds max size (LRU eviction).
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
cache: OrderedDict cache to check and evict from
|
|
310
|
+
"""
|
|
311
|
+
if len(cache) > self.max_size:
|
|
312
|
+
# Remove oldest entry (first item in OrderedDict)
|
|
313
|
+
cache.popitem(last=False)
|
|
314
|
+
self._stats["evictions"] += 1
|
|
315
|
+
|
|
316
|
+
logger.debug(
|
|
317
|
+
"lru_cache_eviction",
|
|
318
|
+
cache_size=len(cache),
|
|
319
|
+
max_size=self.max_size,
|
|
320
|
+
total_evictions=self._stats["evictions"]
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def get_stats(self) -> Dict[str, int]:
|
|
324
|
+
"""
|
|
325
|
+
Get deduplication statistics.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Dictionary with statistics:
|
|
329
|
+
- messages_checked: Total messages checked
|
|
330
|
+
- duplicates_by_id: Duplicates found by message ID
|
|
331
|
+
- duplicates_by_content: Duplicates found by content signature
|
|
332
|
+
- evictions: LRU cache evictions performed
|
|
333
|
+
- normalized_ids: Old message IDs normalized
|
|
334
|
+
"""
|
|
335
|
+
return self._stats.copy()
|
|
336
|
+
|
|
337
|
+
def reset(self) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Reset the deduplicator (clear all caches and statistics).
|
|
340
|
+
|
|
341
|
+
Useful for testing or starting a new session.
|
|
342
|
+
"""
|
|
343
|
+
self._sent_ids.clear()
|
|
344
|
+
self._content_cache.clear()
|
|
345
|
+
self._stats = {
|
|
346
|
+
"messages_checked": 0,
|
|
347
|
+
"duplicates_by_id": 0,
|
|
348
|
+
"duplicates_by_content": 0,
|
|
349
|
+
"evictions": 0,
|
|
350
|
+
"normalized_ids": 0,
|
|
351
|
+
}
|
|
352
|
+
logger.debug("message_deduplicator_reset")
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EventBuffer class for SSE gap recovery support.
|
|
3
|
+
|
|
4
|
+
Implements a ring buffer pattern using collections.deque for bounded memory,
|
|
5
|
+
supporting event replay and gap detection for Server-Sent Events (SSE) streams.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import json
|
|
10
|
+
from collections import deque
|
|
11
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
12
|
+
from structlog import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EventBuffer:
|
|
18
|
+
"""
|
|
19
|
+
Ring buffer for SSE event storage with gap recovery support.
|
|
20
|
+
|
|
21
|
+
Uses a bounded deque to store recent SSE events, enabling:
|
|
22
|
+
- Event replay on reconnection (Last-Event-ID pattern)
|
|
23
|
+
- Gap detection between client and server state
|
|
24
|
+
- Automatic memory management with size-based eviction
|
|
25
|
+
|
|
26
|
+
Events are stored as tuples: (event_id, event_type, data, size)
|
|
27
|
+
Event IDs follow format: {execution_id}_{counter}_{timestamp_micros}
|
|
28
|
+
|
|
29
|
+
Limits:
|
|
30
|
+
- Max 100 events (ring buffer evicts oldest)
|
|
31
|
+
- Max 100KB total buffer size (evicts oldest until under limit)
|
|
32
|
+
|
|
33
|
+
Thread Safety:
|
|
34
|
+
- Uses deque which is thread-safe for append/popleft operations
|
|
35
|
+
- Size tracking uses atomic operations
|
|
36
|
+
|
|
37
|
+
Test Strategy:
|
|
38
|
+
- Unit test buffer capacity limits (100 events)
|
|
39
|
+
- Unit test size limits (100KB)
|
|
40
|
+
- Test replay from various positions
|
|
41
|
+
- Test gap detection accuracy
|
|
42
|
+
- Test edge cases (empty buffer, no gaps, large gaps)
|
|
43
|
+
- Test event ID parsing with various formats
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
MAX_BUFFER_EVENTS = 100
|
|
47
|
+
MAX_BUFFER_SIZE = 100 * 1024 # 100KB
|
|
48
|
+
|
|
49
|
+
def __init__(self, execution_id: str):
|
|
50
|
+
"""
|
|
51
|
+
Initialize EventBuffer for a specific execution.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
execution_id: The execution ID this buffer is associated with
|
|
55
|
+
"""
|
|
56
|
+
self.execution_id = execution_id
|
|
57
|
+
self.buffer: deque = deque(maxlen=self.MAX_BUFFER_EVENTS)
|
|
58
|
+
self._current_size = 0
|
|
59
|
+
|
|
60
|
+
def add_event(self, event_id: str, event_type: str, data: str) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Add event to buffer with automatic eviction.
|
|
63
|
+
|
|
64
|
+
Events are stored as (event_id, event_type, data, size) tuples.
|
|
65
|
+
If buffer exceeds MAX_BUFFER_SIZE, oldest events are evicted.
|
|
66
|
+
The deque's maxlen ensures automatic eviction at MAX_BUFFER_EVENTS.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
event_id: Unique event identifier (format: {execution_id}_{counter}_{timestamp})
|
|
70
|
+
event_type: Type of event (e.g., 'status', 'tool_call', 'message')
|
|
71
|
+
data: JSON string of event data
|
|
72
|
+
"""
|
|
73
|
+
event_size = len(data)
|
|
74
|
+
self._current_size += event_size
|
|
75
|
+
|
|
76
|
+
# Add event to buffer (deque automatically evicts oldest if at maxlen)
|
|
77
|
+
# Note: If eviction happens, we need to track it for size accounting
|
|
78
|
+
old_len = len(self.buffer)
|
|
79
|
+
self.buffer.append((event_id, event_type, data, event_size))
|
|
80
|
+
|
|
81
|
+
# If deque evicted an event (length didn't increase), subtract its size
|
|
82
|
+
if len(self.buffer) == old_len and old_len == self.MAX_BUFFER_EVENTS:
|
|
83
|
+
# Deque evicted the oldest event, need to account for it
|
|
84
|
+
# We can't access the evicted event, so we need to recalculate
|
|
85
|
+
self._recalculate_size()
|
|
86
|
+
|
|
87
|
+
# Remove old events if buffer exceeds size limit
|
|
88
|
+
while self._current_size > self.MAX_BUFFER_SIZE and len(self.buffer) > 1:
|
|
89
|
+
_, _, _, old_size = self.buffer.popleft()
|
|
90
|
+
self._current_size -= old_size
|
|
91
|
+
|
|
92
|
+
logger.debug(
|
|
93
|
+
"event_buffered",
|
|
94
|
+
execution_id=self.execution_id,
|
|
95
|
+
event_id=event_id,
|
|
96
|
+
event_type=event_type,
|
|
97
|
+
buffer_count=len(self.buffer),
|
|
98
|
+
buffer_size=self._current_size
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def _recalculate_size(self) -> None:
|
|
102
|
+
"""Recalculate buffer size from scratch."""
|
|
103
|
+
self._current_size = sum(size for _, _, _, size in self.buffer)
|
|
104
|
+
|
|
105
|
+
def replay_from_id(self, last_event_id: str) -> List[Tuple[str, str, str]]:
|
|
106
|
+
"""
|
|
107
|
+
Replay events after the given event ID.
|
|
108
|
+
|
|
109
|
+
Parses the last_event_id to extract the counter, then returns all
|
|
110
|
+
buffered events with higher counters in chronological order.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
last_event_id: Last event ID received by client
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of (event_id, event_type, data) tuples to replay
|
|
117
|
+
"""
|
|
118
|
+
last_counter = self._parse_event_id(last_event_id)
|
|
119
|
+
|
|
120
|
+
if last_counter is None or not self.buffer:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
replay_events: List[Tuple[str, str, str]] = []
|
|
124
|
+
|
|
125
|
+
for buf_event_id, buf_event_type, buf_data, _ in self.buffer:
|
|
126
|
+
buf_counter = self._parse_event_id(buf_event_id)
|
|
127
|
+
|
|
128
|
+
if buf_counter is not None and buf_counter > last_counter:
|
|
129
|
+
replay_events.append((buf_event_id, buf_event_type, buf_data))
|
|
130
|
+
|
|
131
|
+
if replay_events:
|
|
132
|
+
logger.info(
|
|
133
|
+
"replaying_buffered_events",
|
|
134
|
+
execution_id=self.execution_id,
|
|
135
|
+
last_counter=last_counter,
|
|
136
|
+
replay_count=len(replay_events)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return replay_events
|
|
140
|
+
|
|
141
|
+
def detect_gap(self, last_event_id: str, current_event_id: str) -> Optional[Dict[str, Any]]:
|
|
142
|
+
"""
|
|
143
|
+
Detect gap between last and current event IDs.
|
|
144
|
+
|
|
145
|
+
Compares sequence counters to identify missing events. Returns gap
|
|
146
|
+
information if events are missing from the buffer.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
last_event_id: Last event ID received by client
|
|
150
|
+
current_event_id: Current event ID being processed
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Gap information dict if gap detected, None otherwise:
|
|
154
|
+
{
|
|
155
|
+
"gap_detected": True,
|
|
156
|
+
"last_counter": int,
|
|
157
|
+
"current_counter": int,
|
|
158
|
+
"missing_count": int,
|
|
159
|
+
"reason": str
|
|
160
|
+
}
|
|
161
|
+
"""
|
|
162
|
+
last_counter = self._parse_event_id(last_event_id)
|
|
163
|
+
current_counter = self._parse_event_id(current_event_id)
|
|
164
|
+
|
|
165
|
+
if last_counter is None or current_counter is None:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
# Check if there's a gap
|
|
169
|
+
expected_counter = last_counter + 1
|
|
170
|
+
if current_counter > expected_counter:
|
|
171
|
+
missing_count = current_counter - expected_counter
|
|
172
|
+
|
|
173
|
+
logger.warning(
|
|
174
|
+
"gap_detected",
|
|
175
|
+
execution_id=self.execution_id,
|
|
176
|
+
last_counter=last_counter,
|
|
177
|
+
current_counter=current_counter,
|
|
178
|
+
missing_count=missing_count
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"gap_detected": True,
|
|
183
|
+
"last_counter": last_counter,
|
|
184
|
+
"current_counter": current_counter,
|
|
185
|
+
"missing_count": missing_count,
|
|
186
|
+
"reason": f"Missing {missing_count} events between {last_counter} and {current_counter}"
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
def check_buffer_miss(self, last_event_id: str) -> Optional[Dict[str, Any]]:
|
|
192
|
+
"""
|
|
193
|
+
Check if last_event_id is no longer in buffer (too old).
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
last_event_id: Last event ID received by client
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Buffer miss information if events are too old, None otherwise:
|
|
200
|
+
{
|
|
201
|
+
"buffer_miss": True,
|
|
202
|
+
"last_known_id": str,
|
|
203
|
+
"buffer_oldest": str,
|
|
204
|
+
"reason": str
|
|
205
|
+
}
|
|
206
|
+
"""
|
|
207
|
+
if not self.buffer:
|
|
208
|
+
return {
|
|
209
|
+
"buffer_miss": True,
|
|
210
|
+
"last_known_id": last_event_id,
|
|
211
|
+
"buffer_oldest": None,
|
|
212
|
+
"reason": "Event buffer is empty"
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
last_counter = self._parse_event_id(last_event_id)
|
|
216
|
+
if last_counter is None:
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
# Check if any buffered event is after the last_event_id
|
|
220
|
+
has_newer_events = False
|
|
221
|
+
for buf_event_id, _, _, _ in self.buffer:
|
|
222
|
+
buf_counter = self._parse_event_id(buf_event_id)
|
|
223
|
+
if buf_counter is not None and buf_counter > last_counter:
|
|
224
|
+
has_newer_events = True
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
# If we have newer events but couldn't replay (empty replay_from_id),
|
|
228
|
+
# it means the last_event_id is older than our oldest buffered event
|
|
229
|
+
if not has_newer_events and last_counter > 0:
|
|
230
|
+
oldest_event_id = self.buffer[0][0] if self.buffer else None
|
|
231
|
+
|
|
232
|
+
logger.warning(
|
|
233
|
+
"gap_detected_buffer_miss",
|
|
234
|
+
execution_id=self.execution_id,
|
|
235
|
+
last_counter=last_counter,
|
|
236
|
+
buffer_size=len(self.buffer),
|
|
237
|
+
buffer_oldest=oldest_event_id
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"buffer_miss": True,
|
|
242
|
+
"last_known_id": last_event_id,
|
|
243
|
+
"buffer_oldest": oldest_event_id,
|
|
244
|
+
"reason": "Event buffer miss - events too old"
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
def _parse_event_id(self, event_id: str) -> Optional[int]:
|
|
250
|
+
"""
|
|
251
|
+
Extract sequence counter from event ID format: {execution_id}_{counter}_{timestamp}.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
event_id: Event ID string to parse
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Sequence counter as integer, or None if parsing fails
|
|
258
|
+
"""
|
|
259
|
+
try:
|
|
260
|
+
parts = event_id.split("_")
|
|
261
|
+
|
|
262
|
+
# Format: {execution_id}_{counter}_{timestamp_micros}
|
|
263
|
+
# Validate execution_id matches (first part)
|
|
264
|
+
if len(parts) >= 2 and parts[0] == self.execution_id:
|
|
265
|
+
return int(parts[1])
|
|
266
|
+
|
|
267
|
+
# If execution_id doesn't match, still try to parse counter
|
|
268
|
+
# for compatibility with different ID formats
|
|
269
|
+
if len(parts) >= 2:
|
|
270
|
+
return int(parts[1])
|
|
271
|
+
|
|
272
|
+
except (ValueError, IndexError) as e:
|
|
273
|
+
logger.warning(
|
|
274
|
+
"invalid_event_id_format",
|
|
275
|
+
execution_id=self.execution_id,
|
|
276
|
+
event_id=event_id,
|
|
277
|
+
error=str(e)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
def _estimate_size(self, data: Dict[str, Any]) -> int:
|
|
283
|
+
"""
|
|
284
|
+
Estimate size of event data in bytes.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
data: Dictionary of event data
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Estimated size in bytes
|
|
291
|
+
"""
|
|
292
|
+
try:
|
|
293
|
+
return len(json.dumps(data))
|
|
294
|
+
except Exception:
|
|
295
|
+
# Fallback: rough estimate based on string representation
|
|
296
|
+
return len(str(data))
|
|
297
|
+
|
|
298
|
+
def get_buffer_size(self) -> int:
|
|
299
|
+
"""
|
|
300
|
+
Get current buffer size in bytes.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Total size of buffered event data in bytes
|
|
304
|
+
"""
|
|
305
|
+
return self._current_size
|
|
306
|
+
|
|
307
|
+
def get_buffer_info(self) -> Dict[str, Any]:
|
|
308
|
+
"""
|
|
309
|
+
Get buffer statistics for debugging.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
Dictionary with buffer statistics:
|
|
313
|
+
{
|
|
314
|
+
"execution_id": str,
|
|
315
|
+
"event_count": int,
|
|
316
|
+
"total_size_bytes": int,
|
|
317
|
+
"max_events": int,
|
|
318
|
+
"max_size_bytes": int,
|
|
319
|
+
"oldest_event_id": str,
|
|
320
|
+
"newest_event_id": str,
|
|
321
|
+
"utilization_percent": float
|
|
322
|
+
}
|
|
323
|
+
"""
|
|
324
|
+
oldest_event_id = None
|
|
325
|
+
newest_event_id = None
|
|
326
|
+
|
|
327
|
+
if self.buffer:
|
|
328
|
+
oldest_event_id = self.buffer[0][0]
|
|
329
|
+
newest_event_id = self.buffer[-1][0]
|
|
330
|
+
|
|
331
|
+
event_count = len(self.buffer)
|
|
332
|
+
utilization = (event_count / self.MAX_BUFFER_EVENTS) * 100
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
"execution_id": self.execution_id,
|
|
336
|
+
"event_count": event_count,
|
|
337
|
+
"total_size_bytes": self._current_size,
|
|
338
|
+
"max_events": self.MAX_BUFFER_EVENTS,
|
|
339
|
+
"max_size_bytes": self.MAX_BUFFER_SIZE,
|
|
340
|
+
"oldest_event_id": oldest_event_id,
|
|
341
|
+
"newest_event_id": newest_event_id,
|
|
342
|
+
"utilization_percent": round(utilization, 2)
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
def clear(self) -> None:
|
|
346
|
+
"""Clear all buffered events."""
|
|
347
|
+
self.buffer.clear()
|
|
348
|
+
self._current_size = 0
|
|
349
|
+
|
|
350
|
+
logger.debug(
|
|
351
|
+
"event_buffer_cleared",
|
|
352
|
+
execution_id=self.execution_id
|
|
353
|
+
)
|