kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1004 @@
1
+ """Workers endpoint - shows registered Temporal workers and handles worker registration"""
2
+
3
+ from fastapi import APIRouter, Depends, HTTPException, status, Request
4
+ from typing import List, Dict, Any, Optional
5
+ from pydantic import BaseModel
6
+ from datetime import datetime, timezone
7
+ from sqlalchemy.orm import Session, joinedload
8
+ import structlog
9
+ import uuid
10
+ import json
11
+
12
+ from control_plane_api.app.middleware.auth import get_current_organization
13
+ from control_plane_api.app.lib.temporal_client import get_temporal_client
14
+ from control_plane_api.app.database import get_db
15
+ from control_plane_api.app.lib.redis_client import get_redis_client
16
+ from control_plane_api.app.models.worker import WorkerHeartbeat, WorkerQueue
17
+ from control_plane_api.app.models.environment import Environment
18
+ from control_plane_api.app.observability import (
19
+ instrument_endpoint,
20
+ create_span_with_context,
21
+ add_span_event,
22
+ add_span_error,
23
+ )
24
+
25
+ logger = structlog.get_logger()
26
+
27
+ router = APIRouter()
28
+
29
+
30
+ class WorkerInfo(BaseModel):
31
+ """Worker information"""
32
+ identity: str
33
+ last_access_time: str | None
34
+ rate_per_second: float | None
35
+
36
+
37
+ class TaskQueueInfo(BaseModel):
38
+ """Task queue with worker information"""
39
+ task_queue: str
40
+ organization_id: str
41
+ runner_name: str
42
+ workers: List[WorkerInfo]
43
+ worker_count: int
44
+ approximate_backlog_count: int | None
45
+
46
+
47
+ @router.get("", response_model=List[TaskQueueInfo])
48
+ @instrument_endpoint("workers.list_workers")
49
+ async def list_workers(
50
+ request: Request,
51
+ organization: dict = Depends(get_current_organization),
52
+ ):
53
+ """
54
+ List registered Temporal workers for the organization.
55
+
56
+ This queries Temporal to get all task queues for the organization
57
+ and returns information about registered workers on each queue.
58
+
59
+ Task queue naming convention: {organization_id}.{runner_name}
60
+ """
61
+ try:
62
+ temporal_client = await get_temporal_client()
63
+ org_id = organization["id"]
64
+
65
+ # Get runners from Kubiya API to know which task queues to check
66
+ from control_plane_api.app.lib.kubiya_client import get_kubiya_client
67
+ kubiya_client = get_kubiya_client()
68
+ token = request.state.kubiya_token
69
+
70
+ try:
71
+ runners = await kubiya_client.get_runners(token, org_id)
72
+ except Exception as e:
73
+ logger.warning(
74
+ "failed_to_fetch_kubiya_runners",
75
+ error=str(e),
76
+ org_id=org_id
77
+ )
78
+ # If we can't get runners from Kubiya, fall back to checking common ones
79
+ runners = [{"name": "default"}]
80
+
81
+ environments_info = []
82
+
83
+ for runner in runners:
84
+ # Runner might be a dict or a string
85
+ if isinstance(runner, dict):
86
+ runner_name = runner.get("name", "default")
87
+ else:
88
+ runner_name = str(runner) if runner else "default"
89
+
90
+ task_queue = f"{org_id}.{runner_name}"
91
+
92
+ try:
93
+ # Describe the task queue to get worker information
94
+ desc = await temporal_client.describe_task_queue(
95
+ task_queue=task_queue,
96
+ task_queue_type=1, # TaskQueueType.WORKFLOW
97
+ )
98
+
99
+ workers = []
100
+ approximate_backlog = None
101
+
102
+ # Extract worker information from pollers
103
+ if desc.pollers:
104
+ for poller in desc.pollers:
105
+ worker_info = WorkerInfo(
106
+ identity=poller.identity,
107
+ last_access_time=poller.last_access_time.isoformat() if poller.last_access_time else None,
108
+ rate_per_second=poller.rate_per_second if hasattr(poller, 'rate_per_second') else None,
109
+ )
110
+ workers.append(worker_info)
111
+
112
+ # Get approximate backlog count if available
113
+ if hasattr(desc, 'approximate_backlog_count'):
114
+ approximate_backlog = desc.approximate_backlog_count
115
+
116
+ task_queue_info = TaskQueueInfo(
117
+ task_queue=task_queue,
118
+ organization_id=org_id,
119
+ runner_name=runner_name,
120
+ workers=workers,
121
+ worker_count=len(workers),
122
+ approximate_backlog_count=approximate_backlog,
123
+ )
124
+
125
+ environments_info.append(task_queue_info)
126
+
127
+ logger.info(
128
+ "task_queue_described",
129
+ task_queue=task_queue,
130
+ worker_count=len(workers),
131
+ org_id=org_id,
132
+ )
133
+
134
+ except Exception as e:
135
+ # Task queue might not exist yet if no worker has registered
136
+ logger.debug(
137
+ "task_queue_not_found",
138
+ task_queue=task_queue,
139
+ error=str(e),
140
+ org_id=org_id,
141
+ )
142
+ # Add empty task queue info
143
+ task_queue_info = TaskQueueInfo(
144
+ task_queue=task_queue,
145
+ organization_id=org_id,
146
+ runner_name=runner_name,
147
+ workers=[],
148
+ worker_count=0,
149
+ approximate_backlog_count=None,
150
+ )
151
+ environments_info.append(task_queue_info)
152
+
153
+ logger.info(
154
+ "workers_listed",
155
+ org_id=org_id,
156
+ task_queue_count=len(environments_info),
157
+ total_workers=sum(tq.worker_count for tq in environments_info),
158
+ )
159
+
160
+ return environments_info
161
+
162
+ except Exception as e:
163
+ logger.error(
164
+ "workers_list_failed",
165
+ error=str(e),
166
+ org_id=organization["id"]
167
+ )
168
+ raise HTTPException(
169
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
170
+ detail=f"Failed to list workers: {str(e)}"
171
+ )
172
+
173
+
174
+ @router.get("/{runner_name}", response_model=TaskQueueInfo)
175
+ @instrument_endpoint("workers.get_workers_for_runner")
176
+ async def get_workers_for_runner(
177
+ runner_name: str,
178
+ request: Request,
179
+ organization: dict = Depends(get_current_organization),
180
+ ):
181
+ """
182
+ Get worker information for a specific runner.
183
+
184
+ Args:
185
+ runner_name: The runner name (e.g., "default", "production-runner")
186
+ """
187
+ try:
188
+ temporal_client = await get_temporal_client()
189
+ org_id = organization["id"]
190
+ task_queue = f"{org_id}.{runner_name}"
191
+
192
+ try:
193
+ # Describe the task queue
194
+ desc = await temporal_client.describe_task_queue(
195
+ task_queue=task_queue,
196
+ task_queue_type=1, # TaskQueueType.WORKFLOW
197
+ )
198
+
199
+ workers = []
200
+ approximate_backlog = None
201
+
202
+ # Extract worker information
203
+ if desc.pollers:
204
+ for poller in desc.pollers:
205
+ worker_info = WorkerInfo(
206
+ identity=poller.identity,
207
+ last_access_time=poller.last_access_time.isoformat() if poller.last_access_time else None,
208
+ rate_per_second=poller.rate_per_second if hasattr(poller, 'rate_per_second') else None,
209
+ )
210
+ workers.append(worker_info)
211
+
212
+ if hasattr(desc, 'approximate_backlog_count'):
213
+ approximate_backlog = desc.approximate_backlog_count
214
+
215
+ task_queue_info = TaskQueueInfo(
216
+ task_queue=task_queue,
217
+ organization_id=org_id,
218
+ runner_name=runner_name,
219
+ workers=workers,
220
+ worker_count=len(workers),
221
+ approximate_backlog_count=approximate_backlog,
222
+ )
223
+
224
+ logger.info(
225
+ "workers_fetched_for_runner",
226
+ runner_name=runner_name,
227
+ worker_count=len(workers),
228
+ org_id=org_id,
229
+ )
230
+
231
+ return task_queue_info
232
+
233
+ except Exception as e:
234
+ logger.warning(
235
+ "task_queue_not_found",
236
+ task_queue=task_queue,
237
+ error=str(e),
238
+ org_id=org_id,
239
+ )
240
+ # Return empty worker info if task queue doesn't exist
241
+ return TaskQueueInfo(
242
+ task_queue=task_queue,
243
+ organization_id=org_id,
244
+ runner_name=runner_name,
245
+ workers=[],
246
+ worker_count=0,
247
+ approximate_backlog_count=None,
248
+ )
249
+
250
+ except Exception as e:
251
+ logger.error(
252
+ "workers_fetch_failed",
253
+ error=str(e),
254
+ runner_name=runner_name,
255
+ org_id=organization["id"]
256
+ )
257
+ raise HTTPException(
258
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
259
+ detail=f"Failed to fetch workers: {str(e)}"
260
+ )
261
+
262
+
263
+ # Worker Registration for Decoupled Architecture
264
+
265
+
266
+ class WorkerRegistrationRequest(BaseModel):
267
+ """Worker registration request"""
268
+ environment_name: str # Task queue / environment name worker wants to join
269
+ hostname: Optional[str] = None
270
+ worker_metadata: Dict[str, Any] = {}
271
+
272
+
273
+ class WorkerRegistrationResponse(BaseModel):
274
+ """Worker registration response with all config needed"""
275
+ worker_id: str # Unique worker ID
276
+ worker_token: str # Token for this worker (from environment)
277
+ environment_name: str # Task queue name (format: org_id.environment)
278
+ temporal_namespace: str
279
+ temporal_host: str
280
+ temporal_api_key: str
281
+ organization_id: str
282
+ control_plane_url: str
283
+
284
+
285
+ class WorkerHeartbeatRequest(BaseModel):
286
+ """Worker heartbeat request"""
287
+ worker_id: str
288
+ environment_name: str
289
+ status: str = "active" # active, idle, busy
290
+ tasks_processed: int = 0
291
+ current_task_id: Optional[str] = None
292
+ worker_metadata: Dict[str, Any] = {}
293
+
294
+
295
+ @router.post("/register", response_model=WorkerRegistrationResponse)
296
+ @instrument_endpoint("workers.register_worker")
297
+ async def register_worker(
298
+ registration: WorkerRegistrationRequest,
299
+ request: Request,
300
+ organization: dict = Depends(get_current_organization),
301
+ db: Session = Depends(get_db),
302
+ ):
303
+ """
304
+ Register a new worker with the control plane.
305
+
306
+ This endpoint is called by workers on startup to get their configuration.
307
+ The worker authenticates using KUBIYA_API_KEY (same auth as other API calls).
308
+
309
+ Returns:
310
+ All configuration needed for worker to connect to Temporal and operate:
311
+ - worker_id: Unique ID for this worker instance
312
+ - worker_token: Environment's worker token
313
+ - environment_name: Formatted task queue name (org_id.environment)
314
+ - temporal_namespace, temporal_host, temporal_api_key: Temporal Cloud config
315
+ - organization_id: Organization ID
316
+ - control_plane_url: URL to send heartbeats
317
+ """
318
+ try:
319
+ org_id = organization["id"]
320
+
321
+ # Look up the environment by name
322
+ environment = db.query(Environment).filter(
323
+ Environment.organization_id == org_id,
324
+ Environment.name == registration.environment_name
325
+ ).first()
326
+
327
+ # If environment doesn't exist, create it
328
+ if not environment:
329
+ logger.info(
330
+ "creating_environment_for_worker",
331
+ environment_name=registration.environment_name,
332
+ org_id=org_id,
333
+ )
334
+
335
+ # Generate worker token for this environment (UUID format)
336
+ worker_token = uuid.uuid4()
337
+
338
+ # Create the environment
339
+ environment = Environment(
340
+ id=uuid.uuid4(),
341
+ organization_id=org_id,
342
+ name=registration.environment_name,
343
+ worker_token=worker_token,
344
+ status="active", # Mark as active immediately
345
+ created_at=datetime.now(timezone.utc),
346
+ updated_at=datetime.now(timezone.utc),
347
+ )
348
+
349
+ db.add(environment)
350
+ db.commit()
351
+ db.refresh(environment)
352
+
353
+ logger.info(
354
+ "environment_created_for_worker",
355
+ environment_name=registration.environment_name,
356
+ environment_id=str(environment.id),
357
+ org_id=org_id,
358
+ )
359
+
360
+ # Check if environment is ready
361
+ if environment.status not in ["ready", "active"]:
362
+ raise HTTPException(
363
+ status_code=status.HTTP_400_BAD_REQUEST,
364
+ detail=f"Environment is not ready (status: {environment.status}). "
365
+ f"Please wait for provisioning to complete."
366
+ )
367
+
368
+ # Get organization-specific Temporal credentials
369
+ import os
370
+ from control_plane_api.app.lib.temporal_credentials_service import (
371
+ get_temporal_credentials_for_org,
372
+ is_local_temporal
373
+ )
374
+
375
+ token = request.state.kubiya_token
376
+
377
+ # Check if local Temporal (for development)
378
+ if is_local_temporal():
379
+ logger.info("using_local_temporal_config", org_id=org_id)
380
+ temporal_credentials = {
381
+ "namespace": os.getenv("TEMPORAL_NAMESPACE", "default"),
382
+ "api_key": "",
383
+ "host": os.getenv("TEMPORAL_HOST", "localhost:7233"),
384
+ "org": org_id,
385
+ }
386
+ else:
387
+ # Fetch org-specific credentials from Kubiya API
388
+ try:
389
+ temporal_credentials = await get_temporal_credentials_for_org(
390
+ org_id=org_id,
391
+ token=token,
392
+ use_fallback=True # Enable fallback during migration
393
+ )
394
+
395
+ logger.info(
396
+ "temporal_credentials_fetched_for_worker",
397
+ org_id=org_id,
398
+ namespace=temporal_credentials["namespace"],
399
+ source="kubiya_api"
400
+ )
401
+ except Exception as e:
402
+ logger.error(
403
+ "temporal_credentials_fetch_failed",
404
+ org_id=org_id,
405
+ error=str(e)
406
+ )
407
+ raise HTTPException(
408
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
409
+ detail="Failed to fetch Temporal credentials. Please contact support."
410
+ )
411
+
412
+ # For backwards compatibility with existing code
413
+ namespace = {
414
+ "namespace_name": temporal_credentials["namespace"],
415
+ "api_key_encrypted": temporal_credentials["api_key"],
416
+ "status": "ready"
417
+ }
418
+
419
+ logger.info(
420
+ "using_org_specific_namespace",
421
+ namespace_name=namespace["namespace_name"],
422
+ org_id=org_id,
423
+ )
424
+
425
+ # Generate worker ID
426
+ worker_id = uuid.uuid4()
427
+
428
+ # Create worker record in database
429
+ worker_heartbeat = WorkerHeartbeat(
430
+ id=worker_id,
431
+ worker_id=str(worker_id), # Also set worker_id (has NOT NULL constraint)
432
+ organization_id=org_id,
433
+ environment_name=registration.environment_name,
434
+ worker_token=environment.worker_token,
435
+ hostname=registration.hostname,
436
+ worker_metadata=registration.worker_metadata,
437
+ status="active",
438
+ tasks_processed=0,
439
+ registered_at=datetime.now(timezone.utc),
440
+ last_heartbeat=datetime.now(timezone.utc),
441
+ updated_at=datetime.now(timezone.utc),
442
+ )
443
+
444
+ db.add(worker_heartbeat)
445
+ db.commit()
446
+ db.refresh(worker_heartbeat)
447
+
448
+ # Format task queue name: org_id.environment_name
449
+ task_queue_name = f"{org_id}.{registration.environment_name}"
450
+
451
+ # Get Temporal Cloud configuration
452
+ import os
453
+ temporal_host = os.getenv("TEMPORAL_HOST", "us-east-1.aws.api.temporal.io:7233")
454
+
455
+ # Decrypt API key from namespace (TODO: implement proper decryption)
456
+ temporal_api_key = namespace.get("api_key_encrypted", "")
457
+
458
+ # Get control plane URL from environment or construct from request
459
+ control_plane_url = os.getenv("CONTROL_PLANE_URL")
460
+ if not control_plane_url:
461
+ # Construct from request if not set
462
+ control_plane_url = f"{request.url.scheme}://{request.url.netloc}"
463
+
464
+ logger.info(
465
+ "worker_registered",
466
+ worker_id=str(worker_id),
467
+ environment_name=registration.environment_name,
468
+ task_queue=task_queue_name,
469
+ org_id=org_id,
470
+ )
471
+
472
+ return WorkerRegistrationResponse(
473
+ worker_id=str(worker_id),
474
+ worker_token=str(environment.worker_token),
475
+ environment_name=task_queue_name, # Return formatted name
476
+ temporal_namespace=namespace.get("namespace_name"),
477
+ temporal_host=temporal_host,
478
+ temporal_api_key=temporal_api_key,
479
+ organization_id=org_id,
480
+ control_plane_url=control_plane_url,
481
+ )
482
+
483
+ except HTTPException:
484
+ raise
485
+ except Exception as e:
486
+ logger.error(
487
+ "worker_registration_failed",
488
+ error=str(e),
489
+ environment_name=registration.environment_name,
490
+ org_id=organization["id"]
491
+ )
492
+ raise HTTPException(
493
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
494
+ detail=f"Failed to register worker: {str(e)}"
495
+ )
496
+
497
+
498
+ @router.post("/heartbeat", status_code=status.HTTP_204_NO_CONTENT)
499
+ @instrument_endpoint("workers.worker_heartbeat")
500
+ async def worker_heartbeat(
501
+ heartbeat: WorkerHeartbeatRequest,
502
+ request: Request,
503
+ organization: dict = Depends(get_current_organization),
504
+ ):
505
+ """
506
+ Receive heartbeat from a worker.
507
+
508
+ OPTIMIZATION: Uses Redis for scalable heartbeat storage instead of database.
509
+ Database writes are expensive and heartbeats happen every 30s per worker.
510
+
511
+ Workers should call this endpoint periodically (e.g., every 30 seconds) to:
512
+ - Confirm they're still alive
513
+ - Update their status (active, idle, busy)
514
+ - Report tasks processed
515
+ - Update metadata
516
+ """
517
+ try:
518
+ org_id = organization["id"]
519
+ redis_client = get_redis_client()
520
+
521
+ if not redis_client:
522
+ # Redis not available - log warning but don't fail (graceful degradation)
523
+ logger.warning(
524
+ "worker_heartbeat_redis_unavailable",
525
+ worker_id=heartbeat.worker_id,
526
+ org_id=org_id,
527
+ )
528
+ return None
529
+
530
+ # Build heartbeat data for Redis
531
+ heartbeat_data = {
532
+ "worker_id": heartbeat.worker_id,
533
+ "organization_id": org_id,
534
+ "environment_name": heartbeat.environment_name,
535
+ "status": heartbeat.status,
536
+ "tasks_processed": heartbeat.tasks_processed,
537
+ "current_task_id": heartbeat.current_task_id,
538
+ "last_heartbeat": datetime.now(timezone.utc).isoformat(),
539
+ "metadata": heartbeat.worker_metadata,
540
+ }
541
+
542
+ # Store in Redis with 5-minute TTL (if worker crashes, heartbeat expires)
543
+ redis_key = f"worker:{heartbeat.worker_id}:heartbeat"
544
+ await redis_client.set(redis_key, json.dumps(heartbeat_data), ex=300)
545
+
546
+ logger.debug(
547
+ "worker_heartbeat_received",
548
+ worker_id=heartbeat.worker_id,
549
+ status=heartbeat.status,
550
+ environment_name=heartbeat.environment_name,
551
+ org_id=org_id,
552
+ )
553
+
554
+ return None
555
+
556
+ except Exception as e:
557
+ logger.error(
558
+ "worker_heartbeat_failed",
559
+ error=str(e),
560
+ worker_id=heartbeat.worker_id,
561
+ org_id=organization["id"]
562
+ )
563
+ # Don't fail the worker if heartbeat fails - graceful degradation
564
+ return None
565
+
566
+
567
+ # Worker ID-based endpoints (new architecture)
568
+
569
+
570
+ class WorkerStartRequest(BaseModel):
571
+ """Request to start a worker and fetch its config"""
572
+ system_info: Dict[str, Any] = {}
573
+
574
+
575
+ class WorkerConfigResponse(BaseModel):
576
+ """Worker configuration response"""
577
+ worker_id: str
578
+ worker_queue_name: str
579
+ environment_name: str
580
+ task_queue_name: str # Full: org.env.worker_queue
581
+ temporal_namespace: str
582
+ temporal_host: str
583
+ temporal_api_key: str
584
+ organization_id: str
585
+ control_plane_url: str
586
+ heartbeat_interval: int = 60
587
+ # LiteLLM configuration
588
+ litellm_api_url: str
589
+ litellm_api_key: str
590
+ # OpenTelemetry (OTEL) configuration for distributed tracing
591
+ otel_enabled: bool = True
592
+ otel_exporter_otlp_endpoint: Optional[str] = None
593
+ otel_service_name: str = "agent-control-plane-worker"
594
+ otel_traces_sampler: str = "parentbased_always_on"
595
+ otel_traces_sampler_arg: Optional[float] = None
596
+
597
+
598
+ class WorkerSystemInfo(BaseModel):
599
+ """Worker system information"""
600
+ hostname: Optional[str] = None
601
+ platform: Optional[str] = None
602
+ os_name: Optional[str] = None
603
+ os_version: Optional[str] = None
604
+ python_version: Optional[str] = None
605
+ cli_version: Optional[str] = None
606
+ sdk_version: Optional[str] = None # Worker SDK version
607
+ pid: Optional[int] = None # Process ID
608
+ cwd: Optional[str] = None # Current working directory
609
+ supported_runtimes: Optional[List[str]] = None # Available runtimes (e.g., ["agno", "claude_code"])
610
+ llm_gateway_url: Optional[str] = None # LiteLLM/LLM gateway URL
611
+ docker_available: Optional[bool] = None
612
+ docker_version: Optional[str] = None
613
+ cpu_count: Optional[int] = None
614
+ cpu_percent: Optional[float] = None
615
+ memory_total: Optional[int] = None # bytes
616
+ memory_used: Optional[int] = None # bytes
617
+ memory_percent: Optional[float] = None
618
+ disk_total: Optional[int] = None # bytes
619
+ disk_used: Optional[int] = None # bytes
620
+ disk_percent: Optional[float] = None
621
+ uptime_seconds: Optional[float] = None
622
+
623
+
624
+ class WorkerHeartbeatSimple(BaseModel):
625
+ """Simplified heartbeat request (worker_id in URL)"""
626
+ status: str = "active"
627
+ tasks_processed: int = 0
628
+ current_task_id: Optional[str] = None
629
+ worker_metadata: Dict[str, Any] = {}
630
+ system_info: Optional[WorkerSystemInfo] = None
631
+ logs: Optional[List[str]] = None # Recent log lines since last heartbeat
632
+
633
+
634
+ @router.post("/{worker_id}/start", response_model=WorkerConfigResponse)
635
+ @instrument_endpoint("workers.start_worker")
636
+ async def start_worker(
637
+ worker_id: str,
638
+ start_request: WorkerStartRequest,
639
+ request: Request,
640
+ organization: dict = Depends(get_current_organization),
641
+ db: Session = Depends(get_db),
642
+ ):
643
+ """
644
+ Start a worker and fetch its configuration.
645
+
646
+ This endpoint is called by workers on startup with just worker_id and API key.
647
+ It returns all necessary configuration for the worker to connect to Temporal.
648
+
649
+ Args:
650
+ worker_id: Worker ID (UUID created in UI)
651
+ start_request: System information from worker
652
+
653
+ Returns:
654
+ Complete worker configuration including Temporal credentials
655
+ """
656
+ try:
657
+ org_id = organization["id"]
658
+
659
+ # Look up worker in database with eager loading
660
+ worker = db.query(WorkerHeartbeat).options(
661
+ joinedload(WorkerHeartbeat.worker_queue).joinedload(WorkerQueue.environment)
662
+ ).filter(
663
+ WorkerHeartbeat.id == worker_id,
664
+ WorkerHeartbeat.organization_id == org_id
665
+ ).first()
666
+
667
+ if not worker:
668
+ raise HTTPException(
669
+ status_code=status.HTTP_404_NOT_FOUND,
670
+ detail=f"Worker '{worker_id}' not found"
671
+ )
672
+
673
+ # Get worker queue separately
674
+ if not worker.worker_queue_id:
675
+ raise HTTPException(
676
+ status_code=status.HTTP_400_BAD_REQUEST,
677
+ detail=f"Worker has no queue assigned"
678
+ )
679
+
680
+ worker_queue = db.query(WorkerQueue).filter(
681
+ WorkerQueue.id == worker.worker_queue_id,
682
+ WorkerQueue.organization_id == org_id
683
+ ).first()
684
+
685
+ if not worker_queue:
686
+ raise HTTPException(
687
+ status_code=status.HTTP_404_NOT_FOUND,
688
+ detail=f"Worker queue not found"
689
+ )
690
+
691
+ worker_queue_name = worker_queue.name
692
+
693
+ # Get environment separately
694
+ environment_name = "default"
695
+ if worker_queue.environment_id:
696
+ environment = db.query(Environment).filter(
697
+ Environment.id == worker_queue.environment_id,
698
+ Environment.organization_id == org_id
699
+ ).first()
700
+ if environment:
701
+ environment_name = environment.name
702
+
703
+ # TEMPORARY: Skip database lookup and use fixed namespace + admin API key
704
+ import os
705
+
706
+ # Use fixed namespace for testing
707
+ namespace = {
708
+ "namespace_name": "agent-control-plane.lpagu",
709
+ "api_key_encrypted": os.getenv("TEMPORAL_CLOUD_ADMIN_TOKEN", ""),
710
+ "status": "ready"
711
+ }
712
+
713
+ logger.info(
714
+ "using_fixed_namespace_for_testing",
715
+ namespace_name=namespace["namespace_name"],
716
+ worker_id=worker_id,
717
+ org_id=org_id,
718
+ )
719
+
720
+ # Update worker with system info and mark as starting
721
+ current_metadata = worker.worker_metadata or {}
722
+ worker.worker_metadata = {
723
+ **current_metadata,
724
+ **start_request.system_info,
725
+ "last_start": datetime.now(timezone.utc).isoformat(),
726
+ }
727
+ worker.status = "active"
728
+ worker.last_heartbeat = datetime.now(timezone.utc)
729
+ worker.updated_at = datetime.now(timezone.utc)
730
+
731
+ db.commit()
732
+ db.refresh(worker)
733
+
734
+ # Build full task queue name
735
+ task_queue_name = f"{org_id}.{environment_name}.{worker_queue_name}"
736
+
737
+ # Get Temporal Cloud configuration
738
+ import os
739
+ temporal_host = os.getenv("TEMPORAL_HOST", "us-east-1.aws.api.temporal.io:7233")
740
+ temporal_api_key = namespace.get("api_key_encrypted", "")
741
+
742
+ # Get control plane URL
743
+ control_plane_url = os.getenv("CONTROL_PLANE_URL")
744
+ if not control_plane_url:
745
+ control_plane_url = f"{request.url.scheme}://{request.url.netloc}"
746
+
747
+ # Get LiteLLM configuration from environment
748
+ litellm_api_url = os.getenv("LITELLM_API_URL", "https://api.openai.com/v1")
749
+ litellm_api_key = os.getenv("LITELLM_API_KEY", "")
750
+
751
+ logger.info(
752
+ "worker_config_fetched",
753
+ worker_id=worker_id,
754
+ task_queue=task_queue_name,
755
+ environment=environment_name,
756
+ worker_queue=worker_queue_name,
757
+ org_id=org_id,
758
+ )
759
+
760
+ # Get OTEL configuration from settings (centralized configuration)
761
+ from control_plane_api.app.config import settings as app_settings
762
+
763
+ return WorkerConfigResponse(
764
+ worker_id=worker_id,
765
+ worker_queue_name=worker_queue_name,
766
+ environment_name=environment_name,
767
+ task_queue_name=task_queue_name,
768
+ temporal_namespace=namespace.get("namespace_name"),
769
+ temporal_host=temporal_host,
770
+ temporal_api_key=temporal_api_key,
771
+ organization_id=org_id,
772
+ control_plane_url=control_plane_url,
773
+ heartbeat_interval=worker_queue.heartbeat_interval or 60,
774
+ litellm_api_url=litellm_api_url,
775
+ litellm_api_key=litellm_api_key,
776
+ # Pass OTEL configuration to worker (centralized config)
777
+ otel_enabled=app_settings.OTEL_ENABLED,
778
+ otel_exporter_otlp_endpoint=app_settings.OTEL_EXPORTER_OTLP_ENDPOINT,
779
+ otel_service_name="agent-control-plane-worker",
780
+ otel_traces_sampler=app_settings.OTEL_TRACES_SAMPLER,
781
+ otel_traces_sampler_arg=app_settings.OTEL_TRACES_SAMPLER_ARG,
782
+ )
783
+
784
+ except HTTPException:
785
+ raise
786
+ except Exception as e:
787
+ logger.error(
788
+ "worker_start_failed",
789
+ error=str(e),
790
+ worker_id=worker_id,
791
+ org_id=organization.get("id")
792
+ )
793
+ raise HTTPException(
794
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
795
+ detail=f"Failed to start worker: {str(e)}"
796
+ )
797
+
798
+
799
+ @router.post("/{worker_id}/heartbeat", status_code=status.HTTP_204_NO_CONTENT)
800
+ @instrument_endpoint("workers.worker_heartbeat_simple")
801
+ async def worker_heartbeat_simple(
802
+ worker_id: str,
803
+ heartbeat: WorkerHeartbeatSimple,
804
+ request: Request,
805
+ organization: dict = Depends(get_current_organization),
806
+ ):
807
+ """
808
+ Receive heartbeat from a worker (simplified version with worker_id in URL).
809
+
810
+ OPTIMIZATION: Uses Redis for scalable heartbeat storage instead of database.
811
+ Database writes are expensive and heartbeats happen every 30s per worker.
812
+ Redis provides sub-millisecond writes and automatic TTL expiration.
813
+
814
+ Args:
815
+ worker_id: Worker ID (UUID)
816
+ heartbeat: Heartbeat data
817
+ """
818
+ try:
819
+ org_id = organization["id"]
820
+ redis_client = get_redis_client()
821
+
822
+ if not redis_client:
823
+ # Redis not available - log warning but don't fail (graceful degradation)
824
+ logger.warning(
825
+ "worker_heartbeat_redis_unavailable",
826
+ worker_id=worker_id,
827
+ org_id=org_id,
828
+ )
829
+ return None
830
+
831
+ # Build heartbeat data for Redis
832
+ heartbeat_data = {
833
+ "worker_id": worker_id,
834
+ "organization_id": org_id,
835
+ "status": heartbeat.status,
836
+ "tasks_processed": heartbeat.tasks_processed,
837
+ "current_task_id": heartbeat.current_task_id,
838
+ "last_heartbeat": datetime.now(timezone.utc).isoformat(),
839
+ "metadata": heartbeat.worker_metadata,
840
+ }
841
+
842
+ # Get existing heartbeat data from Redis (for merging)
843
+ redis_key = f"worker:{worker_id}:heartbeat"
844
+ existing_heartbeat = None
845
+ try:
846
+ existing_data = await redis_client.get(redis_key)
847
+ if existing_data:
848
+ existing_heartbeat = json.loads(existing_data)
849
+ except Exception as e:
850
+ logger.warning("heartbeat_redis_get_failed", error=str(e))
851
+
852
+ # Handle system_info - preserve from last full heartbeat if not provided (lightweight mode)
853
+ if heartbeat.system_info:
854
+ # Full heartbeat - update system info
855
+ heartbeat_data["system_info"] = heartbeat.system_info.dict(exclude_none=True)
856
+ elif existing_heartbeat and "system_info" in existing_heartbeat:
857
+ # Lightweight heartbeat - preserve existing system info
858
+ heartbeat_data["system_info"] = existing_heartbeat["system_info"]
859
+
860
+ # Handle logs - fetch from Redis and append new logs
861
+ if heartbeat.logs:
862
+ try:
863
+ if existing_heartbeat:
864
+ existing_logs = existing_heartbeat.get("logs", [])
865
+ all_logs = existing_logs + heartbeat.logs
866
+ heartbeat_data["logs"] = all_logs[-100:] # Keep last 100 lines
867
+ else:
868
+ heartbeat_data["logs"] = heartbeat.logs[-100:]
869
+ except Exception as log_error:
870
+ logger.warning("heartbeat_log_merge_failed", error=str(log_error))
871
+ heartbeat_data["logs"] = heartbeat.logs[-100:]
872
+ elif existing_heartbeat and "logs" in existing_heartbeat:
873
+ # Preserve existing logs if no new logs provided
874
+ heartbeat_data["logs"] = existing_heartbeat["logs"]
875
+
876
+ # Store in Redis with 5-minute TTL (if worker crashes, heartbeat expires)
877
+ # TTL is 5x the heartbeat interval (60s * 5 = 300s) for safety
878
+ await redis_client.set(redis_key, json.dumps(heartbeat_data), ex=300)
879
+
880
+ logger.debug(
881
+ "worker_heartbeat_received",
882
+ worker_id=worker_id,
883
+ status=heartbeat.status,
884
+ org_id=org_id,
885
+ )
886
+
887
+ return None
888
+
889
+ except Exception as e:
890
+ logger.error(
891
+ "worker_heartbeat_failed",
892
+ error=str(e),
893
+ worker_id=worker_id,
894
+ org_id=organization.get("id")
895
+ )
896
+ # Don't fail the worker if heartbeat fails - graceful degradation
897
+ return None
898
+
899
+
900
+ class WorkerDisconnectRequest(BaseModel):
901
+ """Worker disconnect request"""
902
+ reason: str = "shutdown" # shutdown, error, crash, etc.
903
+ exit_code: Optional[int] = None
904
+ error_message: Optional[str] = None
905
+
906
+
907
+ @router.post("/{worker_id}/disconnect", status_code=status.HTTP_204_NO_CONTENT)
908
+ @instrument_endpoint("workers.worker_disconnect")
909
+ async def worker_disconnect(
910
+ worker_id: str,
911
+ disconnect: WorkerDisconnectRequest,
912
+ request: Request,
913
+ organization: dict = Depends(get_current_organization),
914
+ db: Session = Depends(get_db),
915
+ ):
916
+ """
917
+ Mark a worker as disconnected/offline.
918
+
919
+ This endpoint is called by workers when they:
920
+ - Shut down gracefully (Ctrl+C)
921
+ - Exit due to an error
922
+ - Crash unexpectedly (via atexit handler)
923
+
924
+ Args:
925
+ worker_id: Worker ID (UUID)
926
+ disconnect: Disconnect details (reason, exit code, error)
927
+ """
928
+ try:
929
+ org_id = organization["id"]
930
+
931
+ # Look up worker in database
932
+ worker = db.query(WorkerHeartbeat).filter(
933
+ WorkerHeartbeat.id == worker_id,
934
+ WorkerHeartbeat.organization_id == org_id
935
+ ).first()
936
+
937
+ if not worker:
938
+ logger.warning(
939
+ "worker_disconnect_not_found",
940
+ worker_id=worker_id,
941
+ org_id=org_id,
942
+ )
943
+ raise HTTPException(
944
+ status_code=status.HTTP_404_NOT_FOUND,
945
+ detail="Worker not found"
946
+ )
947
+
948
+ # IMPORTANT: Delete from Redis FIRST for immediate effect
949
+ # This ensures workers are removed from active lists immediately
950
+ redis_client = get_redis_client()
951
+ if redis_client:
952
+ redis_key = f"worker:{worker_id}:heartbeat"
953
+ try:
954
+ # Delete the heartbeat key from Redis
955
+ await redis_client.delete(redis_key)
956
+ logger.info(
957
+ "worker_removed_from_redis",
958
+ worker_id=worker_id,
959
+ redis_key=redis_key
960
+ )
961
+ except Exception as redis_error:
962
+ # Log but don't fail the disconnect
963
+ logger.warning(
964
+ "redis_delete_failed",
965
+ error=str(redis_error),
966
+ worker_id=worker_id
967
+ )
968
+
969
+ # THEN update worker status to disconnected in database
970
+ worker.status = "disconnected"
971
+ worker.last_heartbeat = datetime.now(timezone.utc)
972
+ worker.worker_metadata = {
973
+ "disconnect_reason": disconnect.reason,
974
+ "disconnect_time": datetime.now(timezone.utc).isoformat(),
975
+ "exit_code": disconnect.exit_code,
976
+ "error_message": disconnect.error_message,
977
+ }
978
+ worker.updated_at = datetime.now(timezone.utc)
979
+
980
+ db.commit()
981
+
982
+ logger.info(
983
+ "worker_disconnected",
984
+ worker_id=worker_id,
985
+ reason=disconnect.reason,
986
+ exit_code=disconnect.exit_code,
987
+ org_id=org_id,
988
+ )
989
+
990
+ return None
991
+
992
+ except HTTPException:
993
+ raise
994
+ except Exception as e:
995
+ logger.error(
996
+ "worker_disconnect_failed",
997
+ error=str(e),
998
+ worker_id=worker_id,
999
+ org_id=organization.get("id")
1000
+ )
1001
+ raise HTTPException(
1002
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1003
+ detail=f"Failed to process disconnect: {str(e)}"
1004
+ )