kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,165 @@
1
+ """Health check endpoints"""
2
+
3
+ from fastapi import APIRouter, Request, HTTPException, status
4
+ from datetime import datetime
5
+ import structlog
6
+
7
+ logger = structlog.get_logger()
8
+
9
+ router = APIRouter()
10
+
11
+
12
+ @router.get("/health")
13
+ async def health_check(request: Request):
14
+ """
15
+ Health check endpoint (no authentication required).
16
+
17
+ Returns health status - all services shown as operational by default.
18
+ External service health is checked in background, not blocking.
19
+ """
20
+ from control_plane_api.app.config import settings
21
+
22
+ # Always return healthy for the control plane itself
23
+ # External services are assumed operational unless we can't reach them
24
+ services_status = {
25
+ "kubiya_api": "healthy",
26
+ "context_graph": "healthy",
27
+ "cognitive_memory": "healthy"
28
+ }
29
+
30
+ return {
31
+ "status": "healthy",
32
+ "service": "agent-control-plane",
33
+ "timestamp": datetime.utcnow().isoformat(),
34
+ "services": services_status
35
+ }
36
+
37
+
38
+ @router.get("/ready")
39
+ async def readiness_check():
40
+ """Readiness check endpoint (no authentication required)"""
41
+ return {"status": "ready", "timestamp": datetime.utcnow().isoformat()}
42
+
43
+
44
+ @router.get("/health/detailed")
45
+ async def detailed_health_check(request: Request):
46
+ """
47
+ Detailed health check with dependency status.
48
+
49
+ Checks connectivity to database, Redis, and Temporal.
50
+ No authentication required for health checks.
51
+ """
52
+ checks = {
53
+ "api": "healthy",
54
+ "timestamp": datetime.utcnow().isoformat(),
55
+ }
56
+
57
+ # Database health check using SQLAlchemy
58
+ try:
59
+ from control_plane_api.app.database import health_check_db
60
+ if health_check_db():
61
+ checks["database"] = "healthy"
62
+ else:
63
+ checks["database"] = "unhealthy"
64
+ except Exception as e:
65
+ logger.error("database_health_check_failed", error=str(e))
66
+ checks["database"] = "unhealthy"
67
+
68
+ # Check Redis
69
+ try:
70
+ import redis
71
+ from control_plane_api.app.config import settings
72
+ r = redis.from_url(settings.redis_url)
73
+ r.ping()
74
+ checks["redis"] = "healthy"
75
+ except Exception as e:
76
+ logger.error("redis_health_check_failed", error=str(e))
77
+ checks["redis"] = f"unhealthy: {str(e)}"
78
+
79
+ # Check Temporal (just configuration check, not actual connection)
80
+ try:
81
+ from control_plane_api.app.config import settings
82
+ if settings.temporal_host and settings.temporal_namespace:
83
+ checks["temporal"] = "configured"
84
+ else:
85
+ checks["temporal"] = "not configured"
86
+ except Exception as e:
87
+ logger.error("temporal_health_check_failed", error=str(e))
88
+ checks["temporal"] = f"error: {str(e)}"
89
+
90
+ # Determine overall status
91
+ checks["status"] = "healthy" if all(
92
+ v in ["healthy", "configured"]
93
+ for k, v in checks.items()
94
+ if k not in ["timestamp", "status"]
95
+ ) else "degraded"
96
+
97
+ return checks
98
+
99
+
100
+ @router.get("/health/event-bus")
101
+ async def event_bus_health_check():
102
+ """
103
+ Event bus health check with provider-level status.
104
+
105
+ Checks health of all enabled event bus providers:
106
+ - HTTP provider
107
+ - WebSocket provider
108
+ - Redis provider
109
+ - NATS provider (if enabled)
110
+
111
+ No authentication required for health checks.
112
+ """
113
+ try:
114
+ from control_plane_api.app.config import settings
115
+
116
+ # Check if event bus is configured
117
+ if not hasattr(settings, "event_bus") or not settings.event_bus:
118
+ return {
119
+ "status": "not_configured",
120
+ "message": "Event bus not configured - using default HTTP event publishing",
121
+ "timestamp": datetime.utcnow().isoformat(),
122
+ }
123
+
124
+ # Initialize event bus manager
125
+ from control_plane_api.app.lib.event_bus.manager import (
126
+ EventBusManager,
127
+ EventBusManagerConfig,
128
+ )
129
+
130
+ # Build config from settings
131
+ try:
132
+ manager_config = EventBusManagerConfig(**settings.event_bus)
133
+ manager = EventBusManager(manager_config)
134
+
135
+ # Initialize providers
136
+ await manager.initialize()
137
+
138
+ # Get health status from all providers
139
+ provider_health = await manager.health_check()
140
+
141
+ # Determine overall status
142
+ overall_healthy = provider_health.get("_overall", {}).get("healthy", False)
143
+
144
+ return {
145
+ "status": "healthy" if overall_healthy else "degraded",
146
+ "providers": provider_health,
147
+ "timestamp": datetime.utcnow().isoformat(),
148
+ }
149
+
150
+ except Exception as e:
151
+ logger.error("event_bus_health_check_failed", error=str(e))
152
+ return {
153
+ "status": "error",
154
+ "error": str(e),
155
+ "timestamp": datetime.utcnow().isoformat(),
156
+ }
157
+
158
+ except ImportError as e:
159
+ # Event bus dependencies not installed
160
+ return {
161
+ "status": "dependencies_missing",
162
+ "message": "Event bus dependencies not installed",
163
+ "error": str(e),
164
+ "timestamp": datetime.utcnow().isoformat(),
165
+ }
@@ -0,0 +1,394 @@
1
+ """
2
+ Enhanced health check endpoints for production monitoring.
3
+
4
+ Provides:
5
+ - Basic health check (/health)
6
+ - Readiness check with dependency validation (/health/ready)
7
+ - Liveness check (/health/live)
8
+ - Detailed health status (/health/detailed)
9
+ """
10
+
11
+ from fastapi import APIRouter, Depends, HTTPException, status
12
+ from fastapi.responses import JSONResponse
13
+ from typing import Dict, Any, Optional
14
+ from datetime import datetime, timezone
15
+ from sqlalchemy.ext.asyncio import AsyncSession
16
+ from sqlalchemy import text
17
+ import structlog
18
+ import httpx
19
+ import asyncio
20
+ import time
21
+ import os
22
+ import psutil
23
+
24
+ from control_plane_api.app.database import get_session
25
+ from control_plane_api.app.lib.redis_client import get_redis_client
26
+ from control_plane_api.app.lib.temporal_client import get_temporal_client
27
+ from control_plane_api.app.config import settings
28
+
29
+ logger = structlog.get_logger()
30
+
31
+ router = APIRouter()
32
+
33
+ # Track application start time
34
+ APP_START_TIME = time.time()
35
+
36
+
37
+ @router.get("/health", tags=["Health"])
38
+ async def health_check() -> Dict[str, str]:
39
+ """
40
+ Basic health check endpoint.
41
+
42
+ Returns 200 if the service is running.
43
+ Used by load balancers for basic availability checks.
44
+ """
45
+ return {
46
+ "status": "healthy",
47
+ "service": "agent-control-plane",
48
+ "version": settings.api_version,
49
+ "timestamp": datetime.now(timezone.utc).isoformat(),
50
+ }
51
+
52
+
53
+ @router.get("/health/live", tags=["Health"])
54
+ async def liveness_check() -> Dict[str, Any]:
55
+ """
56
+ Liveness probe for Kubernetes.
57
+
58
+ Checks if the application is running and not deadlocked.
59
+ Returns 200 if alive, 503 if the application needs to be restarted.
60
+ """
61
+ try:
62
+ # Simple check - can we allocate memory and respond?
63
+ test_data = list(range(1000))
64
+
65
+ uptime = time.time() - APP_START_TIME
66
+
67
+ return {
68
+ "status": "alive",
69
+ "uptime_seconds": round(uptime, 2),
70
+ "timestamp": datetime.now(timezone.utc).isoformat(),
71
+ }
72
+ except Exception as e:
73
+ logger.error("liveness_check_failed", error=str(e))
74
+ raise HTTPException(
75
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
76
+ detail="Liveness check failed",
77
+ )
78
+
79
+
80
+ @router.get("/health/ready", tags=["Health"])
81
+ async def readiness_check(
82
+ db_session: Optional[AsyncSession] = Depends(get_session),
83
+ ) -> Dict[str, Any]:
84
+ """
85
+ Readiness probe for Kubernetes and monitoring.
86
+
87
+ Checks if the application is ready to serve traffic by validating:
88
+ - Database connectivity
89
+ - Redis connectivity (if configured)
90
+ - Temporal connectivity (if configured)
91
+
92
+ Returns 200 if ready, 503 if not ready to serve traffic.
93
+ """
94
+ checks = {
95
+ "database": False,
96
+ "redis": False,
97
+ "temporal": False,
98
+ }
99
+
100
+ errors = []
101
+
102
+ # Check database
103
+ if db_session:
104
+ try:
105
+ result = await db_session.execute(text("SELECT 1"))
106
+ checks["database"] = result.scalar() == 1
107
+ except Exception as e:
108
+ logger.warning("database_health_check_failed", error=str(e))
109
+ errors.append(f"Database: {str(e)}")
110
+ else:
111
+ errors.append("Database: No session available")
112
+
113
+ # Check Redis (if configured)
114
+ try:
115
+ redis_client = get_redis_client()
116
+ if redis_client:
117
+ await redis_client.ping()
118
+ checks["redis"] = True
119
+ except Exception as e:
120
+ logger.warning("redis_health_check_failed", error=str(e))
121
+ errors.append(f"Redis: {str(e)}")
122
+
123
+ # Check Temporal (if configured)
124
+ try:
125
+ temporal_client = await get_temporal_client()
126
+ if temporal_client:
127
+ # Try to describe the namespace
128
+ await temporal_client.service_client.describe_namespace(
129
+ settings.temporal_namespace
130
+ )
131
+ checks["temporal"] = True
132
+ except Exception as e:
133
+ logger.warning("temporal_health_check_failed", error=str(e))
134
+ errors.append(f"Temporal: {str(e)}")
135
+
136
+ # Determine overall readiness
137
+ # Database is required, Redis and Temporal are optional
138
+ is_ready = checks["database"]
139
+
140
+ response = {
141
+ "status": "ready" if is_ready else "not_ready",
142
+ "timestamp": datetime.now(timezone.utc).isoformat(),
143
+ "checks": checks,
144
+ }
145
+
146
+ if errors:
147
+ response["errors"] = errors
148
+
149
+ if not is_ready:
150
+ return JSONResponse(
151
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
152
+ content=response,
153
+ )
154
+
155
+ return response
156
+
157
+
158
+ @router.get("/health/detailed", tags=["Health"])
159
+ async def detailed_health_check(
160
+ db_session: Optional[AsyncSession] = Depends(get_session),
161
+ ) -> Dict[str, Any]:
162
+ """
163
+ Detailed health check with comprehensive system information.
164
+
165
+ Provides:
166
+ - Service health status
167
+ - Dependency health checks
168
+ - System metrics (CPU, memory, disk)
169
+ - Configuration information
170
+
171
+ Used for debugging and monitoring dashboards.
172
+ """
173
+ uptime = time.time() - APP_START_TIME
174
+
175
+ # System metrics
176
+ cpu_percent = psutil.cpu_percent(interval=0.1)
177
+ memory = psutil.virtual_memory()
178
+ disk = psutil.disk_usage('/')
179
+
180
+ # Dependency checks
181
+ dependencies = {}
182
+
183
+ # Database check with latency
184
+ db_latency = None
185
+ if db_session:
186
+ try:
187
+ start = time.time()
188
+ result = await db_session.execute(text("SELECT 1"))
189
+ db_latency = (time.time() - start) * 1000 # Convert to ms
190
+ dependencies["database"] = {
191
+ "healthy": result.scalar() == 1,
192
+ "latency_ms": round(db_latency, 2),
193
+ }
194
+ except Exception as e:
195
+ dependencies["database"] = {
196
+ "healthy": False,
197
+ "error": str(e),
198
+ }
199
+
200
+ # Redis check with latency
201
+ try:
202
+ redis_client = get_redis_client()
203
+ if redis_client:
204
+ start = time.time()
205
+ await redis_client.ping()
206
+ redis_latency = (time.time() - start) * 1000
207
+ dependencies["redis"] = {
208
+ "healthy": True,
209
+ "latency_ms": round(redis_latency, 2),
210
+ }
211
+ except Exception as e:
212
+ dependencies["redis"] = {
213
+ "healthy": False,
214
+ "error": str(e),
215
+ }
216
+
217
+ # Temporal check
218
+ try:
219
+ temporal_client = await get_temporal_client()
220
+ if temporal_client:
221
+ start = time.time()
222
+ await temporal_client.service_client.describe_namespace(
223
+ settings.temporal_namespace
224
+ )
225
+ temporal_latency = (time.time() - start) * 1000
226
+ dependencies["temporal"] = {
227
+ "healthy": True,
228
+ "latency_ms": round(temporal_latency, 2),
229
+ "namespace": settings.temporal_namespace,
230
+ }
231
+ except Exception as e:
232
+ dependencies["temporal"] = {
233
+ "healthy": False,
234
+ "error": str(e),
235
+ }
236
+
237
+ # External services check (if configured)
238
+ external_services = {}
239
+
240
+ # Check Kubiya API
241
+ if settings.kubiya_api_base:
242
+ try:
243
+ async with httpx.AsyncClient(timeout=5.0) as client:
244
+ start = time.time()
245
+ response = await client.get(f"{settings.kubiya_api_base}/health")
246
+ kubiya_latency = (time.time() - start) * 1000
247
+ external_services["kubiya_api"] = {
248
+ "healthy": response.status_code == 200,
249
+ "latency_ms": round(kubiya_latency, 2),
250
+ "status_code": response.status_code,
251
+ }
252
+ except Exception as e:
253
+ external_services["kubiya_api"] = {
254
+ "healthy": False,
255
+ "error": str(e),
256
+ }
257
+
258
+ # Check LiteLLM Proxy
259
+ if settings.litellm_api_base:
260
+ try:
261
+ async with httpx.AsyncClient(timeout=5.0) as client:
262
+ start = time.time()
263
+ response = await client.get(f"{settings.litellm_api_base}/health")
264
+ litellm_latency = (time.time() - start) * 1000
265
+ external_services["litellm_proxy"] = {
266
+ "healthy": response.status_code == 200,
267
+ "latency_ms": round(litellm_latency, 2),
268
+ "status_code": response.status_code,
269
+ }
270
+ except Exception as e:
271
+ external_services["litellm_proxy"] = {
272
+ "healthy": False,
273
+ "error": str(e),
274
+ }
275
+
276
+ # Determine overall health
277
+ all_healthy = all(
278
+ dep.get("healthy", False) for dep in dependencies.values()
279
+ )
280
+
281
+ return {
282
+ "status": "healthy" if all_healthy else "degraded",
283
+ "timestamp": datetime.now(timezone.utc).isoformat(),
284
+ "version": settings.api_version,
285
+ "environment": settings.environment,
286
+ "uptime": {
287
+ "seconds": round(uptime, 2),
288
+ "human_readable": _format_uptime(uptime),
289
+ },
290
+ "system": {
291
+ "cpu": {
292
+ "percent": cpu_percent,
293
+ "cores": psutil.cpu_count(),
294
+ },
295
+ "memory": {
296
+ "percent": memory.percent,
297
+ "used_gb": round(memory.used / (1024**3), 2),
298
+ "total_gb": round(memory.total / (1024**3), 2),
299
+ },
300
+ "disk": {
301
+ "percent": disk.percent,
302
+ "used_gb": round(disk.used / (1024**3), 2),
303
+ "total_gb": round(disk.total / (1024**3), 2),
304
+ },
305
+ },
306
+ "dependencies": dependencies,
307
+ "external_services": external_services if external_services else None,
308
+ }
309
+
310
+
311
+ @router.get("/health/temporal-credentials", tags=["Health"])
312
+ async def check_temporal_credentials(
313
+ request: Any = Depends(lambda: None),
314
+ organization: dict = Depends(lambda: {"id": "test-org"}),
315
+ ) -> Dict[str, Any]:
316
+ """
317
+ Check if organization has valid Temporal credentials.
318
+
319
+ This endpoint helps diagnose credential issues by attempting to
320
+ fetch Temporal credentials for the organization.
321
+
322
+ Returns credential status including namespace, org, TTL, and API key presence.
323
+ """
324
+ from fastapi import Request
325
+ from control_plane_api.app.middleware.auth import get_current_organization
326
+ from control_plane_api.app.lib.temporal_credentials_service import get_temporal_credentials_for_org
327
+
328
+ # This is a diagnostic endpoint, so we'll make the auth optional for testing
329
+ # In production, you may want to require authentication
330
+ try:
331
+ # Try to get request and token if available
332
+ from starlette.requests import Request as StarletteRequest
333
+
334
+ # For authenticated requests
335
+ if hasattr(request, 'state') and hasattr(request.state, 'kubiya_token'):
336
+ token = request.state.kubiya_token
337
+ org_id = organization.get("id", "unknown")
338
+ else:
339
+ # For unauthenticated health checks, return local status
340
+ from control_plane_api.app.lib.temporal_credentials_service import is_local_temporal
341
+ if is_local_temporal():
342
+ return {
343
+ "status": "ok",
344
+ "mode": "local",
345
+ "message": "Using local Temporal server",
346
+ "namespace": os.getenv("TEMPORAL_NAMESPACE", "default"),
347
+ "host": os.getenv("TEMPORAL_HOST", "localhost:7233"),
348
+ }
349
+ else:
350
+ return {
351
+ "status": "info",
352
+ "message": "Authentication required to check cloud Temporal credentials",
353
+ "mode": "cloud",
354
+ }
355
+
356
+ credentials = await get_temporal_credentials_for_org(
357
+ org_id=org_id,
358
+ token=token,
359
+ use_fallback=False # Don't fallback for health check
360
+ )
361
+
362
+ return {
363
+ "status": "ok",
364
+ "namespace": credentials.get("namespace"),
365
+ "org": credentials.get("org"),
366
+ "ttl": credentials.get("ttl"),
367
+ "has_api_key": bool(credentials.get("api_key")),
368
+ "host": credentials.get("host"),
369
+ }
370
+ except Exception as e:
371
+ logger.error("temporal_credentials_health_check_failed", error=str(e))
372
+ return {
373
+ "status": "error",
374
+ "error": str(e),
375
+ "message": "Failed to fetch Temporal credentials",
376
+ }
377
+
378
+
379
+ def _format_uptime(seconds: float) -> str:
380
+ """Format uptime in human-readable format."""
381
+ days, remainder = divmod(int(seconds), 86400)
382
+ hours, remainder = divmod(remainder, 3600)
383
+ minutes, seconds = divmod(remainder, 60)
384
+
385
+ parts = []
386
+ if days > 0:
387
+ parts.append(f"{days}d")
388
+ if hours > 0:
389
+ parts.append(f"{hours}h")
390
+ if minutes > 0:
391
+ parts.append(f"{minutes}m")
392
+ parts.append(f"{seconds}s")
393
+
394
+ return " ".join(parts)