kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,303 @@
1
+ """
2
+ Prometheus metrics for Kubiya Control Plane.
3
+
4
+ This module provides comprehensive metrics for monitoring the AI orchestration layer:
5
+ - HTTP request metrics (latency, error rates, request counts)
6
+ - Execution metrics (active count, failures, duration, wait time)
7
+ - Worker and queue metrics
8
+ - LLM/AI metrics (requests, latency, tokens)
9
+ - Tool execution metrics
10
+ - Business metrics (agents, jobs, organizations)
11
+
12
+ Metrics naming convention follows Prometheus best practices:
13
+ - Prefix: kubiya_control_plane_ for HTTP metrics
14
+ - Prefix: kubiya_ for business metrics
15
+ - Suffix: _total for counters, _seconds for durations, _count for gauges
16
+ """
17
+
18
+ import re
19
+ import os
20
+ import structlog
21
+ from typing import Tuple, Optional
22
+ from functools import lru_cache
23
+
24
+ from prometheus_client import (
25
+ Counter,
26
+ Histogram,
27
+ Gauge,
28
+ REGISTRY,
29
+ generate_latest,
30
+ CONTENT_TYPE_LATEST,
31
+ CollectorRegistry,
32
+ )
33
+
34
+ logger = structlog.get_logger(__name__)
35
+
36
+ PROMETHEUS_MULTIPROC_DIR = os.environ.get('PROMETHEUS_MULTIPROC_DIR')
37
+
38
+ HTTP_REQUESTS_TOTAL = Counter(
39
+ 'kubiya_control_plane_http_requests_total',
40
+ 'Total number of HTTP requests received by the Control Plane API',
41
+ ['method', 'endpoint', 'status_code'],
42
+ )
43
+
44
+ HTTP_REQUEST_DURATION_SECONDS = Histogram(
45
+ 'kubiya_control_plane_http_request_duration_seconds',
46
+ 'HTTP request duration in seconds (latency)',
47
+ ['method', 'endpoint'],
48
+ buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0),
49
+ )
50
+
51
+
52
+ ACTIVE_TASKS_COUNT = Gauge(
53
+ 'kubiya_active_tasks_count',
54
+ 'Number of currently active tasks/executions by type and status',
55
+ ['execution_type', 'status'],
56
+ )
57
+
58
+ TASK_FAILURE_TOTAL = Gauge(
59
+ 'kubiya_task_failure_total',
60
+ 'Total number of failed task/executions by type',
61
+ ['execution_type'],
62
+ )
63
+
64
+
65
+ # Execution duration - how long executions take
66
+ EXECUTION_DURATION_SECONDS = Gauge(
67
+ 'kubiya_execution_duration_seconds',
68
+ 'Average execution duration in seconds by type and status (from completed executions)',
69
+ ['execution_type', 'status'],
70
+ )
71
+
72
+ # Worker queue depth - pending executions per queue
73
+ WORKER_QUEUE_DEPTH = Gauge(
74
+ 'kubiya_worker_queue_depth',
75
+ 'Number of pending executions waiting in queue',
76
+ ['queue_id'],
77
+ )
78
+
79
+ # LLM requests total
80
+ LLM_REQUESTS_TOTAL = Gauge(
81
+ 'kubiya_llm_requests_total',
82
+ 'Total number of LLM API requests by model and status',
83
+ ['model', 'status'],
84
+ )
85
+
86
+ # LLM latency
87
+ LLM_LATENCY_SECONDS = Gauge(
88
+ 'kubiya_llm_latency_seconds',
89
+ 'Average LLM request latency in seconds by model',
90
+ ['model'],
91
+ )
92
+
93
+ # LLM tokens total
94
+ LLM_TOKENS_TOTAL = Gauge(
95
+ 'kubiya_llm_tokens_total',
96
+ 'Total tokens used in LLM requests by model and type',
97
+ ['model', 'token_type'],
98
+ )
99
+
100
+
101
+ # Active streaming connections
102
+ STREAMING_CONNECTIONS_ACTIVE = Gauge(
103
+ 'kubiya_streaming_connections_active',
104
+ 'Number of active SSE streaming connections',
105
+ )
106
+
107
+ # Tool execution duration
108
+ TOOL_EXECUTION_DURATION_SECONDS = Gauge(
109
+ 'kubiya_tool_execution_duration_seconds',
110
+ 'Average tool execution duration in seconds by tool name',
111
+ ['tool_name'],
112
+ )
113
+
114
+ # Tool executions total
115
+ TOOL_EXECUTIONS_TOTAL = Gauge(
116
+ 'kubiya_tool_executions_total',
117
+ 'Total number of tool executions by tool name and status',
118
+ ['tool_name', 'status'],
119
+ )
120
+
121
+ # Execution wait time (time from created to running)
122
+ EXECUTION_WAIT_TIME_SECONDS = Gauge(
123
+ 'kubiya_execution_wait_time_seconds',
124
+ 'Average wait time from creation to running in seconds by type',
125
+ ['execution_type'],
126
+ )
127
+
128
+ # Webhook requests total
129
+ WEBHOOK_REQUESTS_TOTAL = Gauge(
130
+ 'kubiya_webhook_requests_total',
131
+ 'Total number of webhook trigger requests by status',
132
+ ['status'],
133
+ )
134
+
135
+
136
+ # Executions by organization
137
+ EXECUTIONS_BY_ORG_TOTAL = Gauge(
138
+ 'kubiya_executions_by_org_total',
139
+ 'Total number of executions by organization',
140
+ ['organization_id'],
141
+ )
142
+
143
+ # Active agents
144
+ AGENTS_ACTIVE = Gauge(
145
+ 'kubiya_agents_active',
146
+ 'Number of active agents by organization',
147
+ ['organization_id'],
148
+ )
149
+
150
+ # Scheduled jobs total
151
+ SCHEDULED_JOBS_TOTAL = Gauge(
152
+ 'kubiya_scheduled_jobs_total',
153
+ 'Total number of scheduled jobs by status',
154
+ ['status'],
155
+ )
156
+
157
+ # Conversation turns total
158
+ CONVERSATION_TURNS_TOTAL = Gauge(
159
+ 'kubiya_conversation_turns_total',
160
+ 'Total conversation turns by execution type',
161
+ ['execution_type'],
162
+ )
163
+
164
+
165
+ UUID_PATTERN = re.compile(
166
+ r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
167
+ re.IGNORECASE
168
+ )
169
+ NUMERIC_ID_PATTERN = re.compile(r'/\d+(?=/|$)')
170
+
171
+
172
+ @lru_cache(maxsize=1000)
173
+ def normalize_endpoint(path: str) -> str:
174
+ """Normalize endpoint paths to reduce cardinality in metrics."""
175
+ normalized = UUID_PATTERN.sub('{id}', path)
176
+ normalized = NUMERIC_ID_PATTERN.sub('/{id}', normalized)
177
+ return normalized
178
+
179
+
180
+ def get_metrics_response() -> Tuple[bytes, str]:
181
+ """Generate Prometheus metrics response."""
182
+ try:
183
+ if PROMETHEUS_MULTIPROC_DIR:
184
+ from prometheus_client.multiprocess import MultiProcessCollector
185
+ registry = CollectorRegistry()
186
+ MultiProcessCollector(registry)
187
+ output = generate_latest(registry)
188
+ else:
189
+ output = generate_latest(REGISTRY)
190
+ return output, CONTENT_TYPE_LATEST
191
+ except Exception as e:
192
+ logger.error("metrics_generation_failed", error=str(e))
193
+ return b"# Error generating metrics\n", CONTENT_TYPE_LATEST
194
+
195
+
196
+ def record_http_request(
197
+ method: str,
198
+ endpoint: str,
199
+ status_code: int,
200
+ duration_seconds: float,
201
+ trace_id: Optional[str] = None,
202
+ ):
203
+ """Record an HTTP request in Prometheus metrics."""
204
+ HTTP_REQUESTS_TOTAL.labels(
205
+ method=method,
206
+ endpoint=endpoint,
207
+ status_code=str(status_code),
208
+ ).inc()
209
+
210
+ HTTP_REQUEST_DURATION_SECONDS.labels(
211
+ method=method,
212
+ endpoint=endpoint,
213
+ ).observe(duration_seconds)
214
+
215
+ if status_code >= 500:
216
+ logger.error(
217
+ "prometheus_5xx_recorded",
218
+ method=method,
219
+ endpoint=endpoint,
220
+ status_code=status_code,
221
+ duration_seconds=round(duration_seconds, 4),
222
+ trace_id=trace_id or "unknown",
223
+ )
224
+
225
+
226
+ def update_active_tasks(execution_type: str, status: str, count: int):
227
+ """Update the active tasks gauge."""
228
+ ACTIVE_TASKS_COUNT.labels(execution_type=execution_type, status=status).set(count)
229
+
230
+
231
+ def update_task_failures(execution_type: str, count: int):
232
+ """Update the task failures gauge."""
233
+ TASK_FAILURE_TOTAL.labels(execution_type=execution_type).set(count)
234
+
235
+
236
+ def update_execution_duration(execution_type: str, status: str, avg_seconds: float):
237
+ """Update average execution duration."""
238
+ EXECUTION_DURATION_SECONDS.labels(execution_type=execution_type, status=status).set(avg_seconds)
239
+
240
+
241
+ def update_worker_queue_depth(queue_id: str, count: int):
242
+ """Update worker queue depth."""
243
+ WORKER_QUEUE_DEPTH.labels(queue_id=queue_id).set(count)
244
+
245
+
246
+ def update_llm_requests(model: str, status: str, count: int):
247
+ """Update LLM requests total."""
248
+ LLM_REQUESTS_TOTAL.labels(model=model, status=status).set(count)
249
+
250
+
251
+ def update_llm_latency(model: str, avg_seconds: float):
252
+ """Update average LLM latency."""
253
+ LLM_LATENCY_SECONDS.labels(model=model).set(avg_seconds)
254
+
255
+
256
+ def update_llm_tokens(model: str, token_type: str, count: int):
257
+ """Update LLM tokens total."""
258
+ LLM_TOKENS_TOTAL.labels(model=model, token_type=token_type).set(count)
259
+
260
+
261
+ def update_streaming_connections(count: int):
262
+ """Update active streaming connections."""
263
+ STREAMING_CONNECTIONS_ACTIVE.set(count)
264
+
265
+
266
+ def update_tool_execution_duration(tool_name: str, avg_seconds: float):
267
+ """Update average tool execution duration."""
268
+ TOOL_EXECUTION_DURATION_SECONDS.labels(tool_name=tool_name).set(avg_seconds)
269
+
270
+
271
+ def update_tool_executions(tool_name: str, status: str, count: int):
272
+ """Update tool executions total."""
273
+ TOOL_EXECUTIONS_TOTAL.labels(tool_name=tool_name, status=status).set(count)
274
+
275
+
276
+ def update_execution_wait_time(execution_type: str, avg_seconds: float):
277
+ """Update average execution wait time."""
278
+ EXECUTION_WAIT_TIME_SECONDS.labels(execution_type=execution_type).set(avg_seconds)
279
+
280
+
281
+ def update_webhook_requests(status: str, count: int):
282
+ """Update webhook requests total."""
283
+ WEBHOOK_REQUESTS_TOTAL.labels(status=status).set(count)
284
+
285
+
286
+ def update_executions_by_org(organization_id: str, count: int):
287
+ """Update executions by organization."""
288
+ EXECUTIONS_BY_ORG_TOTAL.labels(organization_id=organization_id).set(count)
289
+
290
+
291
+ def update_agents_active(organization_id: str, count: int):
292
+ """Update active agents count."""
293
+ AGENTS_ACTIVE.labels(organization_id=organization_id).set(count)
294
+
295
+
296
+ def update_scheduled_jobs(status: str, count: int):
297
+ """Update scheduled jobs total."""
298
+ SCHEDULED_JOBS_TOTAL.labels(status=status).set(count)
299
+
300
+
301
+ def update_conversation_turns(execution_type: str, count: int):
302
+ """Update conversation turns total."""
303
+ CONVERSATION_TURNS_TOTAL.labels(execution_type=execution_type).set(count)
@@ -0,0 +1,246 @@
1
+ """
2
+ OpenTelemetry middleware for FastAPI.
3
+
4
+ This module provides middleware to:
5
+ - Add trace ID to response headers (X-Trace-ID)
6
+ - Enrich spans with organizational and user context from request.state
7
+ - Set span status based on HTTP status codes
8
+ """
9
+
10
+ import structlog
11
+ from starlette.middleware.base import BaseHTTPMiddleware
12
+ from starlette.requests import Request
13
+ from starlette.responses import Response
14
+ from opentelemetry import trace
15
+ from opentelemetry.trace import Status, StatusCode
16
+
17
+ logger = structlog.get_logger(__name__)
18
+
19
+
20
+ class TraceContextMiddleware(BaseHTTPMiddleware):
21
+ """
22
+ Middleware to enrich spans with organizational context and add trace ID to responses.
23
+
24
+ This middleware should be added AFTER the OpenTelemetry FastAPI instrumentation,
25
+ so that it can enrich the automatically created spans.
26
+ """
27
+
28
+ # Paths to exclude from tracing (health checks, metrics, etc.)
29
+ EXCLUDED_PATHS = {
30
+ "/api/health",
31
+ "/health",
32
+ "/health/live",
33
+ "/health/ready",
34
+ "/health/detailed",
35
+ "/health/event-bus",
36
+ "/health/temporal-credentials",
37
+ "/ready",
38
+ "/metrics",
39
+ "/favicon.ico"
40
+ }
41
+
42
+ async def dispatch(self, request: Request, call_next):
43
+ """
44
+ Process request and enrich span with organizational context.
45
+
46
+ Args:
47
+ request: Incoming HTTP request
48
+ call_next: Next middleware in chain
49
+
50
+ Returns:
51
+ HTTP response with X-Trace-ID header
52
+ """
53
+ # Skip tracing for health checks and other excluded paths
54
+ if request.url.path in self.EXCLUDED_PATHS:
55
+ return await call_next(request)
56
+
57
+ # Get current span (created by FastAPI instrumentation)
58
+ span = trace.get_current_span()
59
+
60
+ # Add span event for request received
61
+ if span and span.is_recording():
62
+ from control_plane_api.app.observability import add_span_event
63
+ add_span_event(
64
+ f"HTTP request received: {request.method} {request.url.path}",
65
+ {
66
+ "http.method": request.method,
67
+ "http.path": request.url.path,
68
+ "http.query": request.url.query if request.url.query else "",
69
+ "client.host": request.client.host if request.client else "unknown",
70
+ }
71
+ )
72
+
73
+ # Capture request body for non-GET requests (for debugging)
74
+ request_body = None
75
+ if request.method in ("POST", "PUT", "PATCH"):
76
+ try:
77
+ body_bytes = await request.body()
78
+ if body_bytes and len(body_bytes) < 10000: # Only capture if < 10KB
79
+ request_body = body_bytes.decode('utf-8')
80
+ # Re-create request with body for downstream handlers
81
+ async def receive():
82
+ return {"type": "http.request", "body": body_bytes}
83
+ request._receive = receive
84
+ except Exception as e:
85
+ logger.warning("failed_to_capture_request_body", error=str(e))
86
+
87
+ # Enrich span with organizational context from request.state
88
+ # These are set by the auth dependency (get_current_organization)
89
+ if span and span.is_recording():
90
+ try:
91
+ # Debug: Check if organization exists
92
+ has_org = hasattr(request.state, "organization")
93
+ logger.debug(
94
+ "trace_context_check",
95
+ has_organization=has_org,
96
+ path=request.url.path
97
+ )
98
+
99
+ # Add organization context
100
+ if has_org:
101
+ org = request.state.organization
102
+ if isinstance(org, dict):
103
+ span.set_attribute("organization.id", org.get("id", ""))
104
+ span.set_attribute("organization.name", org.get("name", ""))
105
+
106
+ from control_plane_api.app.observability import add_span_event
107
+ add_span_event(
108
+ "Organizational context added to span",
109
+ {
110
+ "organization.id": org.get("id", ""),
111
+ "organization.name": org.get("name", ""),
112
+ "user.email": org.get("user_email", ""),
113
+ }
114
+ )
115
+
116
+ logger.info(
117
+ "span_enriched_with_org",
118
+ org_id=org.get("id"),
119
+ path=request.url.path
120
+ )
121
+
122
+ # Add user context
123
+ if org.get("user_id"):
124
+ span.set_attribute("user.id", org["user_id"])
125
+ if org.get("user_email"):
126
+ span.set_attribute("user.email", org["user_email"])
127
+ if org.get("user_name"):
128
+ span.set_attribute("user.name", org["user_name"])
129
+ if org.get("user_avatar"):
130
+ span.set_attribute("user.avatar", org["user_avatar"])
131
+
132
+ # Add request ID
133
+ if hasattr(request.state, "request_id"):
134
+ span.set_attribute("request.id", request.state.request_id)
135
+
136
+ # Add request path and method
137
+ span.set_attribute("http.route", request.url.path)
138
+ span.set_attribute("http.method", request.method)
139
+
140
+ # Add query parameters
141
+ if request.url.query:
142
+ span.set_attribute("http.query", request.url.query)
143
+
144
+ # Add request body for debugging (sanitize sensitive data)
145
+ if request_body:
146
+ # Sanitize passwords, tokens, etc.
147
+ sanitized_body = request_body
148
+ for sensitive_key in ["password", "token", "secret", "api_key", "apiKey"]:
149
+ if sensitive_key in sanitized_body.lower():
150
+ sanitized_body = sanitized_body[:100] + "...[REDACTED]"
151
+ break
152
+ span.set_attribute("http.request.body", sanitized_body[:500]) # Max 500 chars
153
+
154
+ except Exception as e:
155
+ logger.warning(
156
+ "span_enrichment_failed",
157
+ error=str(e),
158
+ exc_info=True
159
+ )
160
+
161
+ # Get trace ID before processing request for logging correlation
162
+ trace_id = None
163
+ span_id = None
164
+ if span and span.is_recording():
165
+ trace_id = format(span.get_span_context().trace_id, '032x')
166
+ span_id = format(span.get_span_context().span_id, '016x')
167
+
168
+ # Process request
169
+ response = await call_next(request)
170
+
171
+ # Add trace ID to response headers and capture response
172
+ if span and span.is_recording():
173
+ try:
174
+ response.headers["X-Trace-ID"] = trace_id
175
+ response.headers["X-Span-ID"] = span_id
176
+
177
+ # Set span status based on HTTP status code
178
+ from control_plane_api.app.observability import add_span_event
179
+ if response.status_code >= 500:
180
+ span.set_status(Status(StatusCode.ERROR, f"HTTP {response.status_code}"))
181
+ span.set_attribute("error", True)
182
+ add_span_event(
183
+ f"HTTP response: Server error {response.status_code}",
184
+ {
185
+ "http.status_code": response.status_code,
186
+ "status": "error",
187
+ }
188
+ )
189
+ elif response.status_code >= 400:
190
+ # Client errors are not span errors (they're expected)
191
+ span.set_attribute("http.client_error", True)
192
+ span.set_status(Status(StatusCode.OK))
193
+ add_span_event(
194
+ f"HTTP response: Client error {response.status_code}",
195
+ {
196
+ "http.status_code": response.status_code,
197
+ "status": "client_error",
198
+ }
199
+ )
200
+ else:
201
+ span.set_status(Status(StatusCode.OK))
202
+ add_span_event(
203
+ f"HTTP response: Success {response.status_code}",
204
+ {
205
+ "http.status_code": response.status_code,
206
+ "status": "success",
207
+ }
208
+ )
209
+
210
+ # Add HTTP status code attribute
211
+ span.set_attribute("http.status_code", response.status_code)
212
+
213
+ # NOW enrich span with user context (AFTER route handler ran, so organization is set)
214
+ # This is the correct place because auth dependency sets request.state.organization
215
+ if hasattr(request.state, "organization"):
216
+ org = request.state.organization
217
+ if isinstance(org, dict):
218
+ span.set_attribute("organization.id", org.get("id", ""))
219
+ if org.get("user_id"):
220
+ span.set_attribute("user.id", org["user_id"])
221
+ if org.get("user_email"):
222
+ span.set_attribute("user.email", org["user_email"])
223
+ if org.get("user_name"):
224
+ span.set_attribute("user.name", org["user_name"])
225
+ if org.get("user_avatar"):
226
+ span.set_attribute("user.avatar", org["user_avatar"])
227
+
228
+ # Log request completion with trace correlation
229
+ logger.info(
230
+ "http_request_completed",
231
+ method=request.method,
232
+ path=request.url.path,
233
+ status_code=response.status_code,
234
+ trace_id=trace_id,
235
+ span_id=span_id,
236
+ organization_id=getattr(request.state, "organization", {}).get("id") if hasattr(request.state, "organization") else None
237
+ )
238
+
239
+ except Exception as e:
240
+ logger.warning(
241
+ "trace_id_header_failed",
242
+ error=str(e),
243
+ exc_info=True
244
+ )
245
+
246
+ return response
@@ -0,0 +1,115 @@
1
+ """
2
+ Optional OpenTelemetry imports for environments with size constraints.
3
+
4
+ This module provides safe imports for OpenTelemetry that gracefully degrade
5
+ when the packages are not available (e.g., in Vercel serverless deployments
6
+ where package size matters).
7
+ """
8
+
9
+ import os
10
+ from typing import Optional, Any
11
+ from contextlib import nullcontext
12
+
13
+ # Check if we should disable tracing entirely
14
+ TRACING_ENABLED = os.getenv("TRACING_ENABLED", "true").lower() in ("true", "1", "yes")
15
+
16
+ # Try to import OpenTelemetry, but fall back gracefully
17
+ try:
18
+ if not TRACING_ENABLED:
19
+ raise ImportError("Tracing disabled via TRACING_ENABLED env var")
20
+
21
+ from opentelemetry import trace as _trace
22
+ from opentelemetry.trace import Status, StatusCode, Span, Tracer
23
+
24
+ HAS_OPENTELEMETRY = True
25
+ trace = _trace
26
+
27
+ except ImportError:
28
+ HAS_OPENTELEMETRY = False
29
+
30
+ # Create no-op implementations
31
+ class NoOpSpan:
32
+ """No-op span that does nothing."""
33
+ def set_attribute(self, key: str, value: Any) -> None:
34
+ pass
35
+
36
+ def set_attributes(self, attributes: dict) -> None:
37
+ pass
38
+
39
+ def set_status(self, status: Any) -> None:
40
+ pass
41
+
42
+ def record_exception(self, exception: Exception) -> None:
43
+ pass
44
+
45
+ def is_recording(self) -> bool:
46
+ return False
47
+
48
+ def get_span_context(self) -> Any:
49
+ return None
50
+
51
+ def __enter__(self):
52
+ return self
53
+
54
+ def __exit__(self, *args):
55
+ pass
56
+
57
+ class NoOpTracer:
58
+ """No-op tracer that does nothing."""
59
+ def start_as_current_span(self, name: str, *args, **kwargs):
60
+ return nullcontext(NoOpSpan())
61
+
62
+ def start_span(self, name: str, *args, **kwargs):
63
+ return NoOpSpan()
64
+
65
+ class NoOpTracerProvider:
66
+ """No-op tracer provider."""
67
+ def get_tracer(self, *args, **kwargs) -> NoOpTracer:
68
+ return NoOpTracer()
69
+
70
+ class NoOpTrace:
71
+ """No-op trace module replacement."""
72
+ @staticmethod
73
+ def get_tracer(name: str, version: str = "") -> NoOpTracer:
74
+ return NoOpTracer()
75
+
76
+ @staticmethod
77
+ def get_current_span() -> NoOpSpan:
78
+ return NoOpSpan()
79
+
80
+ @staticmethod
81
+ def get_tracer_provider() -> NoOpTracerProvider:
82
+ return NoOpTracerProvider()
83
+
84
+ @staticmethod
85
+ def set_tracer_provider(provider: Any) -> None:
86
+ pass
87
+
88
+ # Status and StatusCode placeholders
89
+ class Status:
90
+ """No-op Status."""
91
+ def __init__(self, status_code: Any, description: str = ""):
92
+ pass
93
+
94
+ class StatusCode:
95
+ """No-op StatusCode."""
96
+ OK = "OK"
97
+ ERROR = "ERROR"
98
+ UNSET = "UNSET"
99
+
100
+ # Type hints
101
+ Span = NoOpSpan
102
+ Tracer = NoOpTracer
103
+
104
+ # Create the no-op trace module
105
+ trace = NoOpTrace()
106
+
107
+
108
+ def get_tracer(name: str, version: str = "") -> Tracer:
109
+ """Get a tracer, or a no-op tracer if OpenTelemetry is not available."""
110
+ return trace.get_tracer(name, version)
111
+
112
+
113
+ def get_current_span() -> Span:
114
+ """Get the current span, or a no-op span if OpenTelemetry is not available."""
115
+ return trace.get_current_span()