kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubiya-control-plane-api might be problematic. Click here for more details.

Files changed (185) hide show
  1. control_plane_api/README.md +266 -0
  2. control_plane_api/__init__.py +0 -0
  3. control_plane_api/__version__.py +1 -0
  4. control_plane_api/alembic/README +1 -0
  5. control_plane_api/alembic/env.py +98 -0
  6. control_plane_api/alembic/script.py.mako +28 -0
  7. control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
  8. control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
  9. control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
  10. control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
  11. control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
  12. control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
  13. control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
  14. control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
  15. control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
  16. control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
  17. control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
  18. control_plane_api/alembic.ini +148 -0
  19. control_plane_api/api/index.py +12 -0
  20. control_plane_api/app/__init__.py +11 -0
  21. control_plane_api/app/activities/__init__.py +20 -0
  22. control_plane_api/app/activities/agent_activities.py +379 -0
  23. control_plane_api/app/activities/team_activities.py +410 -0
  24. control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
  25. control_plane_api/app/config/__init__.py +35 -0
  26. control_plane_api/app/config/api_config.py +354 -0
  27. control_plane_api/app/config/model_pricing.py +318 -0
  28. control_plane_api/app/config.py +95 -0
  29. control_plane_api/app/database.py +135 -0
  30. control_plane_api/app/exceptions.py +408 -0
  31. control_plane_api/app/lib/__init__.py +11 -0
  32. control_plane_api/app/lib/job_executor.py +312 -0
  33. control_plane_api/app/lib/kubiya_client.py +235 -0
  34. control_plane_api/app/lib/litellm_pricing.py +166 -0
  35. control_plane_api/app/lib/planning_tools/__init__.py +22 -0
  36. control_plane_api/app/lib/planning_tools/agents.py +155 -0
  37. control_plane_api/app/lib/planning_tools/base.py +189 -0
  38. control_plane_api/app/lib/planning_tools/environments.py +214 -0
  39. control_plane_api/app/lib/planning_tools/resources.py +240 -0
  40. control_plane_api/app/lib/planning_tools/teams.py +198 -0
  41. control_plane_api/app/lib/policy_enforcer_client.py +939 -0
  42. control_plane_api/app/lib/redis_client.py +436 -0
  43. control_plane_api/app/lib/supabase.py +71 -0
  44. control_plane_api/app/lib/temporal_client.py +138 -0
  45. control_plane_api/app/lib/validation/__init__.py +20 -0
  46. control_plane_api/app/lib/validation/runtime_validation.py +287 -0
  47. control_plane_api/app/main.py +128 -0
  48. control_plane_api/app/middleware/__init__.py +8 -0
  49. control_plane_api/app/middleware/auth.py +513 -0
  50. control_plane_api/app/middleware/exception_handler.py +267 -0
  51. control_plane_api/app/middleware/rate_limiting.py +384 -0
  52. control_plane_api/app/middleware/request_id.py +202 -0
  53. control_plane_api/app/models/__init__.py +27 -0
  54. control_plane_api/app/models/agent.py +79 -0
  55. control_plane_api/app/models/analytics.py +206 -0
  56. control_plane_api/app/models/associations.py +81 -0
  57. control_plane_api/app/models/environment.py +63 -0
  58. control_plane_api/app/models/execution.py +93 -0
  59. control_plane_api/app/models/job.py +179 -0
  60. control_plane_api/app/models/llm_model.py +75 -0
  61. control_plane_api/app/models/presence.py +49 -0
  62. control_plane_api/app/models/project.py +47 -0
  63. control_plane_api/app/models/session.py +38 -0
  64. control_plane_api/app/models/team.py +66 -0
  65. control_plane_api/app/models/workflow.py +55 -0
  66. control_plane_api/app/policies/README.md +121 -0
  67. control_plane_api/app/policies/approved_users.rego +62 -0
  68. control_plane_api/app/policies/business_hours.rego +51 -0
  69. control_plane_api/app/policies/rate_limiting.rego +100 -0
  70. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  71. control_plane_api/app/routers/__init__.py +4 -0
  72. control_plane_api/app/routers/agents.py +364 -0
  73. control_plane_api/app/routers/agents_v2.py +1260 -0
  74. control_plane_api/app/routers/analytics.py +1014 -0
  75. control_plane_api/app/routers/context_manager.py +562 -0
  76. control_plane_api/app/routers/environment_context.py +270 -0
  77. control_plane_api/app/routers/environments.py +715 -0
  78. control_plane_api/app/routers/execution_environment.py +517 -0
  79. control_plane_api/app/routers/executions.py +1911 -0
  80. control_plane_api/app/routers/health.py +92 -0
  81. control_plane_api/app/routers/health_v2.py +326 -0
  82. control_plane_api/app/routers/integrations.py +274 -0
  83. control_plane_api/app/routers/jobs.py +1344 -0
  84. control_plane_api/app/routers/models.py +82 -0
  85. control_plane_api/app/routers/models_v2.py +361 -0
  86. control_plane_api/app/routers/policies.py +639 -0
  87. control_plane_api/app/routers/presence.py +234 -0
  88. control_plane_api/app/routers/projects.py +902 -0
  89. control_plane_api/app/routers/runners.py +379 -0
  90. control_plane_api/app/routers/runtimes.py +172 -0
  91. control_plane_api/app/routers/secrets.py +155 -0
  92. control_plane_api/app/routers/skills.py +1001 -0
  93. control_plane_api/app/routers/skills_definitions.py +140 -0
  94. control_plane_api/app/routers/task_planning.py +1256 -0
  95. control_plane_api/app/routers/task_queues.py +654 -0
  96. control_plane_api/app/routers/team_context.py +270 -0
  97. control_plane_api/app/routers/teams.py +1400 -0
  98. control_plane_api/app/routers/worker_queues.py +1545 -0
  99. control_plane_api/app/routers/workers.py +935 -0
  100. control_plane_api/app/routers/workflows.py +204 -0
  101. control_plane_api/app/runtimes/__init__.py +6 -0
  102. control_plane_api/app/runtimes/validation.py +344 -0
  103. control_plane_api/app/schemas/job_schemas.py +295 -0
  104. control_plane_api/app/services/__init__.py +1 -0
  105. control_plane_api/app/services/agno_service.py +619 -0
  106. control_plane_api/app/services/litellm_service.py +190 -0
  107. control_plane_api/app/services/policy_service.py +525 -0
  108. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  109. control_plane_api/app/skills/__init__.py +44 -0
  110. control_plane_api/app/skills/base.py +229 -0
  111. control_plane_api/app/skills/business_intelligence.py +189 -0
  112. control_plane_api/app/skills/data_visualization.py +154 -0
  113. control_plane_api/app/skills/docker.py +104 -0
  114. control_plane_api/app/skills/file_generation.py +94 -0
  115. control_plane_api/app/skills/file_system.py +110 -0
  116. control_plane_api/app/skills/python.py +92 -0
  117. control_plane_api/app/skills/registry.py +65 -0
  118. control_plane_api/app/skills/shell.py +102 -0
  119. control_plane_api/app/skills/workflow_executor.py +469 -0
  120. control_plane_api/app/utils/workflow_executor.py +354 -0
  121. control_plane_api/app/workflows/__init__.py +11 -0
  122. control_plane_api/app/workflows/agent_execution.py +507 -0
  123. control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
  124. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  125. control_plane_api/app/workflows/team_execution.py +399 -0
  126. control_plane_api/scripts/seed_models.py +239 -0
  127. control_plane_api/worker/__init__.py +0 -0
  128. control_plane_api/worker/activities/__init__.py +0 -0
  129. control_plane_api/worker/activities/agent_activities.py +1241 -0
  130. control_plane_api/worker/activities/approval_activities.py +234 -0
  131. control_plane_api/worker/activities/runtime_activities.py +388 -0
  132. control_plane_api/worker/activities/skill_activities.py +267 -0
  133. control_plane_api/worker/activities/team_activities.py +1217 -0
  134. control_plane_api/worker/config/__init__.py +31 -0
  135. control_plane_api/worker/config/worker_config.py +275 -0
  136. control_plane_api/worker/control_plane_client.py +529 -0
  137. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  138. control_plane_api/worker/models/__init__.py +1 -0
  139. control_plane_api/worker/models/inputs.py +89 -0
  140. control_plane_api/worker/runtimes/__init__.py +31 -0
  141. control_plane_api/worker/runtimes/base.py +789 -0
  142. control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
  143. control_plane_api/worker/runtimes/default_runtime.py +617 -0
  144. control_plane_api/worker/runtimes/factory.py +173 -0
  145. control_plane_api/worker/runtimes/validation.py +93 -0
  146. control_plane_api/worker/services/__init__.py +1 -0
  147. control_plane_api/worker/services/agent_executor.py +422 -0
  148. control_plane_api/worker/services/agent_executor_v2.py +383 -0
  149. control_plane_api/worker/services/analytics_collector.py +457 -0
  150. control_plane_api/worker/services/analytics_service.py +464 -0
  151. control_plane_api/worker/services/approval_tools.py +310 -0
  152. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  153. control_plane_api/worker/services/cancellation_manager.py +177 -0
  154. control_plane_api/worker/services/data_visualization.py +827 -0
  155. control_plane_api/worker/services/jira_tools.py +257 -0
  156. control_plane_api/worker/services/runtime_analytics.py +328 -0
  157. control_plane_api/worker/services/session_service.py +194 -0
  158. control_plane_api/worker/services/skill_factory.py +175 -0
  159. control_plane_api/worker/services/team_executor.py +574 -0
  160. control_plane_api/worker/services/team_executor_v2.py +465 -0
  161. control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
  162. control_plane_api/worker/tests/__init__.py +1 -0
  163. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  164. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  165. control_plane_api/worker/tests/integration/__init__.py +0 -0
  166. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  167. control_plane_api/worker/tests/unit/__init__.py +0 -0
  168. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  169. control_plane_api/worker/utils/__init__.py +1 -0
  170. control_plane_api/worker/utils/chunk_batcher.py +305 -0
  171. control_plane_api/worker/utils/retry_utils.py +60 -0
  172. control_plane_api/worker/utils/streaming_utils.py +373 -0
  173. control_plane_api/worker/worker.py +753 -0
  174. control_plane_api/worker/workflows/__init__.py +0 -0
  175. control_plane_api/worker/workflows/agent_execution.py +589 -0
  176. control_plane_api/worker/workflows/team_execution.py +429 -0
  177. kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
  178. kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
  179. kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
  180. kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
  181. kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
  182. kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
  183. kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
  184. {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
  185. {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,753 @@
1
+ """
2
+ Temporal worker for Agent Control Plane - Decoupled Architecture.
3
+
4
+ This worker:
5
+ 1. Registers with Control Plane API on startup using KUBIYA_API_KEY
6
+ 2. Gets dynamic configuration (Temporal credentials, task queue name, etc.)
7
+ 3. Connects to Temporal Cloud with provided credentials
8
+ 4. Sends periodic heartbeats to Control Plane
9
+ 5. Has NO direct database access - all state managed via Control Plane API
10
+
11
+ Environment variables REQUIRED:
12
+ - KUBIYA_API_KEY: Kubiya API key for authentication (required)
13
+ - CONTROL_PLANE_URL: Control Plane API URL (e.g., https://control-plane.kubiya.ai)
14
+ - ENVIRONMENT_NAME: Environment/task queue name to join (default: "default")
15
+
16
+ Environment variables OPTIONAL:
17
+ - WORKER_HOSTNAME: Custom hostname for worker (default: auto-detected)
18
+ - HEARTBEAT_INTERVAL: Seconds between heartbeats (default: 30)
19
+ """
20
+
21
+ import asyncio
22
+ import os
23
+ import sys
24
+ import structlog
25
+ import httpx
26
+ import socket
27
+ import platform
28
+ import psutil
29
+ import time
30
+ from dataclasses import dataclass
31
+ from typing import Optional, List
32
+ from temporalio.worker import Worker
33
+ from temporalio.client import Client, TLSConfig
34
+ from collections import deque
35
+
36
+ # Import workflows and activities from local package
37
+ from control_plane_api.worker.workflows.agent_execution import AgentExecutionWorkflow
38
+ from control_plane_api.worker.workflows.team_execution import TeamExecutionWorkflow
39
+ from control_plane_api.worker.activities.agent_activities import (
40
+ execute_agent_llm,
41
+ update_execution_status,
42
+ update_agent_status,
43
+ persist_conversation_history,
44
+ )
45
+ from control_plane_api.worker.activities.team_activities import (
46
+ get_team_agents,
47
+ execute_team_coordination,
48
+ )
49
+ from control_plane_api.worker.activities.runtime_activities import (
50
+ execute_with_runtime,
51
+ )
52
+
53
+ # Configure structured logging
54
+ import logging
55
+
56
+
57
+ def pretty_console_renderer(logger, name, event_dict):
58
+ """
59
+ Render logs in a pretty, human-readable format instead of JSON.
60
+ Uses colors and emojis for better readability.
61
+ """
62
+ level = event_dict.get("level", "info").upper()
63
+ event = event_dict.get("event", "")
64
+ timestamp = event_dict.get("timestamp", "")
65
+
66
+ # Extract timestamp (just time part)
67
+ if timestamp:
68
+ try:
69
+ time_part = timestamp.split("T")[1].split(".")[0] # HH:MM:SS
70
+ except:
71
+ time_part = timestamp
72
+ else:
73
+ time_part = time.strftime("%H:%M:%S")
74
+
75
+ # Color codes
76
+ RESET = "\033[0m"
77
+ GRAY = "\033[90m"
78
+ GREEN = "\033[92m"
79
+ YELLOW = "\033[93m"
80
+ RED = "\033[91m"
81
+ CYAN = "\033[96m"
82
+ BOLD = "\033[1m"
83
+
84
+ # Level icons and colors
85
+ level_config = {
86
+ "INFO": ("ℹ️", CYAN),
87
+ "WARNING": ("⚠️", YELLOW),
88
+ "ERROR": ("❌", RED),
89
+ "DEBUG": ("🔍", GRAY),
90
+ }
91
+
92
+ icon, color = level_config.get(level, ("•", RESET))
93
+
94
+ # Format the main message
95
+ message = f"{GRAY}[{time_part}]{RESET} {icon} {event}"
96
+
97
+ # Add relevant context (skip internal keys)
98
+ skip_keys = {"level", "event", "timestamp", "logger"}
99
+ context_parts = []
100
+
101
+ for key, value in event_dict.items():
102
+ if key in skip_keys:
103
+ continue
104
+ # Format value nicely
105
+ if isinstance(value, bool):
106
+ value_str = "✓" if value else "✗"
107
+ elif isinstance(value, str) and len(value) > 60:
108
+ value_str = value[:57] + "..."
109
+ else:
110
+ value_str = str(value)
111
+
112
+ context_parts.append(f"{GRAY}{key}={RESET}{value_str}")
113
+
114
+ if context_parts:
115
+ message += f" {GRAY}({', '.join(context_parts)}){RESET}"
116
+
117
+ return message
118
+
119
+
120
+ structlog.configure(
121
+ processors=[
122
+ structlog.contextvars.merge_contextvars,
123
+ structlog.processors.add_log_level,
124
+ structlog.processors.TimeStamper(fmt="iso"),
125
+ pretty_console_renderer,
126
+ ],
127
+ wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
128
+ logger_factory=structlog.PrintLoggerFactory(),
129
+ )
130
+
131
+ logger = structlog.get_logger()
132
+
133
+ # Global log buffer to collect logs since last heartbeat
134
+ log_buffer = deque(maxlen=500) # Keep last 500 log lines
135
+ worker_start_time = time.time()
136
+
137
+
138
+ class ProgressUI:
139
+ """Minimal animated UI for worker startup - minikube style"""
140
+
141
+ @staticmethod
142
+ def step(emoji: str, message: str, status: str = ""):
143
+ """Print a step with emoji and optional status"""
144
+ if status:
145
+ print(f"{emoji} {message} {status}")
146
+ else:
147
+ print(f"{emoji} {message}")
148
+
149
+ @staticmethod
150
+ def success(emoji: str, message: str):
151
+ """Print success message"""
152
+ GREEN = "\033[92m"
153
+ RESET = "\033[0m"
154
+ print(f"{GREEN}{emoji} {message}{RESET}")
155
+
156
+ @staticmethod
157
+ def error(emoji: str, message: str):
158
+ """Print error message"""
159
+ RED = "\033[91m"
160
+ RESET = "\033[0m"
161
+ print(f"{RED}{emoji} {message}{RESET}")
162
+
163
+ @staticmethod
164
+ def header(text: str):
165
+ """Print section header"""
166
+ CYAN = "\033[96m"
167
+ BOLD = "\033[1m"
168
+ RESET = "\033[0m"
169
+ print(f"\n{CYAN}{BOLD}{text}{RESET}")
170
+
171
+ @staticmethod
172
+ def banner():
173
+ """Print startup banner"""
174
+ CYAN = "\033[96m"
175
+ BOLD = "\033[1m"
176
+ RESET = "\033[0m"
177
+ print(f"\n{CYAN}{BOLD}🚀 Kubiya Agent Worker{RESET}\n")
178
+
179
+
180
+ def collect_system_info() -> dict:
181
+ """
182
+ Collect current system metrics and information.
183
+ """
184
+ try:
185
+ cpu_percent = psutil.cpu_percent(interval=0.1)
186
+ memory = psutil.virtual_memory()
187
+ disk = psutil.disk_usage('/')
188
+
189
+ # Get Kubiya CLI version from environment variable (set by CLI) - skipped for now
190
+ cli_version = None
191
+
192
+ # Check Docker availability
193
+ docker_available = False
194
+ docker_version = None
195
+ try:
196
+ import subprocess
197
+ import shutil
198
+
199
+ # First try to find docker in PATH using shutil.which
200
+ docker_path = shutil.which('docker')
201
+ logger.debug("docker_which_result", path=docker_path)
202
+
203
+ # Fallback to common locations if not in PATH
204
+ if not docker_path:
205
+ docker_paths = [
206
+ '/usr/local/bin/docker',
207
+ '/usr/bin/docker',
208
+ '/opt/homebrew/bin/docker',
209
+ ]
210
+ for path in docker_paths:
211
+ logger.debug("docker_checking_path", path=path, exists=os.path.exists(path))
212
+ if os.path.exists(path):
213
+ docker_path = path
214
+ break
215
+
216
+ if docker_path:
217
+ logger.debug("docker_running_version_check", path=docker_path)
218
+ result = subprocess.run(
219
+ [docker_path, '--version'],
220
+ capture_output=True,
221
+ text=True,
222
+ timeout=3,
223
+ shell=False
224
+ )
225
+ logger.debug(
226
+ "docker_version_output",
227
+ returncode=result.returncode,
228
+ stdout=result.stdout[:200],
229
+ stderr=result.stderr[:200] if result.stderr else None
230
+ )
231
+ if result.returncode == 0:
232
+ docker_available = True
233
+ # Parse "Docker version 28.1.1, build 4eba377"
234
+ output = result.stdout.strip()
235
+ if ',' in output:
236
+ docker_version = output.split(',')[0].replace('Docker version', '').strip()
237
+ else:
238
+ docker_version = output.replace('Docker version', '').strip()
239
+ logger.debug("docker_detected", version=docker_version, path=docker_path)
240
+ else:
241
+ logger.warning("docker_version_check_failed", returncode=result.returncode)
242
+ else:
243
+ logger.warning("docker_not_found_in_path_or_common_locations")
244
+ except Exception as e:
245
+ # Log for debugging but don't fail
246
+ logger.warning("docker_detection_failed", error=str(e), error_type=type(e).__name__)
247
+ import traceback
248
+ logger.debug("docker_detection_traceback", traceback=traceback.format_exc())
249
+
250
+ # Parse OS details from platform
251
+ os_name = platform.system() # Darwin, Linux, Windows
252
+ os_version = platform.release()
253
+
254
+ return {
255
+ "hostname": socket.gethostname(),
256
+ "platform": platform.platform(),
257
+ "os_name": os_name,
258
+ "os_version": os_version,
259
+ "python_version": platform.python_version(),
260
+ "cli_version": cli_version,
261
+ "docker_available": docker_available,
262
+ "docker_version": docker_version,
263
+ "cpu_count": psutil.cpu_count(),
264
+ "cpu_percent": cpu_percent,
265
+ "memory_total": memory.total,
266
+ "memory_used": memory.used,
267
+ "memory_percent": memory.percent,
268
+ "disk_total": disk.total,
269
+ "disk_used": disk.used,
270
+ "disk_percent": disk.percent,
271
+ "uptime_seconds": time.time() - worker_start_time,
272
+ }
273
+ except Exception as e:
274
+ logger.warning("failed_to_collect_system_info", error=str(e))
275
+ return {
276
+ "hostname": socket.gethostname(),
277
+ "platform": platform.platform(),
278
+ }
279
+
280
+
281
+ def get_recent_logs() -> List[str]:
282
+ """
283
+ Get logs collected since last heartbeat and clear the buffer.
284
+ """
285
+ logs = list(log_buffer)
286
+ log_buffer.clear()
287
+ return logs
288
+
289
+
290
+ def log_to_buffer(message: str):
291
+ """
292
+ Add a log message to the buffer for sending in next heartbeat.
293
+ """
294
+ log_buffer.append(message)
295
+
296
+
297
+ @dataclass
298
+ class WorkerConfig:
299
+ """Configuration received from Control Plane registration"""
300
+ worker_id: str
301
+ environment_name: str # Task queue name (org_id.environment)
302
+ temporal_namespace: str
303
+ temporal_host: str
304
+ temporal_api_key: str
305
+ organization_id: str
306
+ control_plane_url: str
307
+ litellm_api_url: str = "https://llm-proxy.kubiya.ai"
308
+ litellm_api_key: str = ""
309
+
310
+
311
+ async def start_worker_for_queue(
312
+ control_plane_url: str,
313
+ kubiya_api_key: str,
314
+ queue_id: str,
315
+ ) -> WorkerConfig:
316
+ """
317
+ Start a worker for a specific queue ID.
318
+
319
+ Args:
320
+ control_plane_url: Control Plane API URL
321
+ kubiya_api_key: Kubiya API key for authentication
322
+ queue_id: Worker queue ID (UUID)
323
+
324
+ Returns:
325
+ WorkerConfig with all necessary configuration
326
+
327
+ Raises:
328
+ Exception if start fails
329
+ """
330
+ logger.info(
331
+ "starting_worker_for_queue",
332
+ queue_id=queue_id,
333
+ control_plane_url=control_plane_url,
334
+ )
335
+
336
+ try:
337
+ async with httpx.AsyncClient(timeout=60.0) as client:
338
+ response = await client.post(
339
+ f"{control_plane_url}/api/v1/worker-queues/{queue_id}/start",
340
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
341
+ )
342
+
343
+ # Success case
344
+ if response.status_code == 200:
345
+ data = response.json()
346
+
347
+ ProgressUI.success("✓", f"Registered with control plane")
348
+ logger.info(
349
+ "worker_registered",
350
+ worker_id=data.get("worker_id")[:8],
351
+ queue_name=data.get("queue_name"),
352
+ )
353
+
354
+ # The task_queue_name is now just the queue UUID
355
+ return WorkerConfig(
356
+ worker_id=data["worker_id"],
357
+ environment_name=data["task_queue_name"], # This is now the queue UUID
358
+ temporal_namespace=data["temporal_namespace"],
359
+ temporal_host=data["temporal_host"],
360
+ temporal_api_key=data["temporal_api_key"],
361
+ organization_id=data["organization_id"],
362
+ control_plane_url=data["control_plane_url"],
363
+ litellm_api_url=data.get("litellm_api_url", "https://llm-proxy.kubiya.ai"),
364
+ litellm_api_key=data.get("litellm_api_key", ""),
365
+ )
366
+
367
+ # Handle errors
368
+ else:
369
+ # Try to extract error detail from response
370
+ error_message = response.text
371
+ try:
372
+ error_data = response.json()
373
+ error_message = error_data.get("detail", response.text)
374
+ except:
375
+ pass
376
+
377
+ ProgressUI.error("✗", "Worker registration failed")
378
+ print(f" {error_message}\n")
379
+
380
+ logger.error(
381
+ "worker_start_failed",
382
+ status_code=response.status_code,
383
+ queue_id=queue_id,
384
+ )
385
+ sys.exit(1)
386
+
387
+ except httpx.RequestError as e:
388
+ ProgressUI.error("✗", f"Connection failed: {control_plane_url}")
389
+ print(f" {str(e)}\n")
390
+ logger.error("control_plane_connection_failed", error=str(e))
391
+ sys.exit(1)
392
+
393
+
394
+ async def send_heartbeat(
395
+ config: WorkerConfig,
396
+ kubiya_api_key: str,
397
+ status: str = "active",
398
+ tasks_processed: int = 0,
399
+ current_task_id: Optional[str] = None
400
+ ) -> bool:
401
+ """
402
+ Send heartbeat to Control Plane with system info and logs.
403
+
404
+ Args:
405
+ config: Worker configuration
406
+ kubiya_api_key: Kubiya API key for authentication
407
+ status: Worker status (active, idle, busy)
408
+ tasks_processed: Number of tasks processed
409
+ current_task_id: Currently executing task ID
410
+
411
+ Returns:
412
+ True if successful, False otherwise
413
+ """
414
+ # Collect system info
415
+ system_info = collect_system_info()
416
+
417
+ # Get logs since last heartbeat
418
+ logs = get_recent_logs()
419
+
420
+ heartbeat_data = {
421
+ "status": status,
422
+ "tasks_processed": tasks_processed,
423
+ "current_task_id": current_task_id,
424
+ "worker_metadata": {},
425
+ "system_info": system_info,
426
+ "logs": logs if logs else None,
427
+ }
428
+
429
+ try:
430
+ url = f"{config.control_plane_url}/api/v1/workers/{config.worker_id}/heartbeat"
431
+
432
+ async with httpx.AsyncClient(timeout=10.0) as client:
433
+ response = await client.post(
434
+ url,
435
+ json=heartbeat_data,
436
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
437
+ )
438
+
439
+ if response.status_code in [200, 204]:
440
+ logger.debug("heartbeat_sent", worker_id=config.worker_id)
441
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Heartbeat sent successfully")
442
+ return True
443
+ else:
444
+ logger.warning(
445
+ "heartbeat_failed",
446
+ status_code=response.status_code,
447
+ response=response.text[:200]
448
+ )
449
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Heartbeat failed: HTTP {response.status_code}")
450
+ return False
451
+
452
+ except Exception as e:
453
+ error_msg = f"{type(e).__name__}: {str(e)}" if str(e) else f"{type(e).__name__} (no message)"
454
+ logger.warning(
455
+ "heartbeat_error",
456
+ error=error_msg,
457
+ error_type=type(e).__name__,
458
+ worker_id=config.worker_id[:8] if config.worker_id else "unknown"
459
+ )
460
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Heartbeat error: {error_msg[:150]}")
461
+ return False
462
+
463
+
464
+ async def create_temporal_client(config: WorkerConfig) -> Client:
465
+ """
466
+ Create Temporal client using configuration from Control Plane.
467
+
468
+ Args:
469
+ config: Worker configuration from Control Plane registration
470
+
471
+ Returns:
472
+ Connected Temporal client instance
473
+ """
474
+ try:
475
+ # Connect to Temporal Cloud with API key
476
+ client = await Client.connect(
477
+ config.temporal_host,
478
+ namespace=config.temporal_namespace,
479
+ tls=TLSConfig(), # TLS enabled
480
+ rpc_metadata={"authorization": f"Bearer {config.temporal_api_key}"}
481
+ )
482
+
483
+ return client
484
+
485
+ except Exception as e:
486
+ logger.error("connection_failed", error=str(e))
487
+ ProgressUI.error("✗", f"Temporal connection failed: {str(e)}")
488
+ raise
489
+
490
+
491
+ async def send_disconnect(
492
+ config: WorkerConfig,
493
+ kubiya_api_key: str,
494
+ reason: str = "shutdown",
495
+ exit_code: Optional[int] = None,
496
+ error_message: Optional[str] = None
497
+ ) -> bool:
498
+ """
499
+ Notify Control Plane that worker is disconnecting/exiting.
500
+
501
+ Args:
502
+ config: Worker configuration
503
+ kubiya_api_key: Kubiya API key for authentication
504
+ reason: Disconnect reason (shutdown, error, crash, etc.)
505
+ exit_code: Exit code if applicable
506
+ error_message: Error message if applicable
507
+
508
+ Returns:
509
+ True if successful, False otherwise
510
+ """
511
+ disconnect_data = {
512
+ "reason": reason,
513
+ "exit_code": exit_code,
514
+ "error_message": error_message
515
+ }
516
+
517
+ try:
518
+ async with httpx.AsyncClient(timeout=10.0) as client:
519
+ response = await client.post(
520
+ f"{config.control_plane_url}/api/v1/workers/{config.worker_id}/disconnect",
521
+ json=disconnect_data,
522
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
523
+ )
524
+
525
+ if response.status_code in [200, 204]:
526
+ logger.info(
527
+ "worker_disconnected",
528
+ worker_id=config.worker_id,
529
+ reason=reason,
530
+ exit_code=exit_code
531
+ )
532
+ return True
533
+ else:
534
+ logger.warning(
535
+ "disconnect_notification_failed",
536
+ status_code=response.status_code,
537
+ response=response.text[:200]
538
+ )
539
+ return False
540
+
541
+ except Exception as e:
542
+ logger.warning("disconnect_notification_error", error=str(e))
543
+ return False
544
+
545
+
546
+ async def heartbeat_loop(config: WorkerConfig, kubiya_api_key: str, interval: int = 30):
547
+ """
548
+ Background task to send periodic heartbeats to Control Plane.
549
+
550
+ Args:
551
+ config: Worker configuration
552
+ kubiya_api_key: Kubiya API key for authentication
553
+ interval: Seconds between heartbeats
554
+ """
555
+ tasks_processed = 0
556
+
557
+ while True:
558
+ try:
559
+ await asyncio.sleep(interval)
560
+ await send_heartbeat(
561
+ config=config,
562
+ kubiya_api_key=kubiya_api_key,
563
+ status="active",
564
+ tasks_processed=tasks_processed
565
+ )
566
+ except asyncio.CancelledError:
567
+ logger.info("heartbeat_loop_cancelled")
568
+ break
569
+ except Exception as e:
570
+ logger.warning("heartbeat_loop_error", error=str(e))
571
+
572
+
573
+ async def run_worker():
574
+ """
575
+ Run the Temporal worker with decoupled architecture.
576
+
577
+ The worker:
578
+ 1. Registers with Control Plane API
579
+ 2. Gets dynamic configuration (Temporal credentials, task queue, etc.)
580
+ 3. Connects to Temporal Cloud
581
+ 4. Starts heartbeat loop
582
+ 5. Registers workflows and activities
583
+ 6. Polls for tasks and executes them
584
+ """
585
+ # Get configuration from environment
586
+ kubiya_api_key = os.environ.get("KUBIYA_API_KEY")
587
+ control_plane_url = os.environ.get("CONTROL_PLANE_URL")
588
+ queue_id = os.environ.get("QUEUE_ID")
589
+ heartbeat_interval = int(os.environ.get("HEARTBEAT_INTERVAL", "30"))
590
+
591
+ # Validate required configuration
592
+ if not kubiya_api_key:
593
+ logger.error(
594
+ "configuration_error",
595
+ message="KUBIYA_API_KEY environment variable is required"
596
+ )
597
+ sys.exit(1)
598
+
599
+ if not control_plane_url:
600
+ logger.error(
601
+ "configuration_error",
602
+ message="CONTROL_PLANE_URL environment variable is required"
603
+ )
604
+ sys.exit(1)
605
+
606
+ if not queue_id:
607
+ logger.error(
608
+ "configuration_error",
609
+ message="QUEUE_ID environment variable is required"
610
+ )
611
+ sys.exit(1)
612
+
613
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker starting for queue {queue_id}")
614
+
615
+ try:
616
+ # Print banner
617
+ ProgressUI.banner()
618
+
619
+ # Step 1: Register with control plane
620
+ ProgressUI.step("⏳", "Registering with control plane...")
621
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Registering with control plane...")
622
+ config = await start_worker_for_queue(
623
+ control_plane_url=control_plane_url,
624
+ kubiya_api_key=kubiya_api_key,
625
+ queue_id=queue_id,
626
+ )
627
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker registered: {config.worker_id}")
628
+
629
+ # Set environment variables for activities to use
630
+ os.environ["CONTROL_PLANE_URL"] = config.control_plane_url
631
+ os.environ["KUBIYA_API_KEY"] = kubiya_api_key
632
+ os.environ["WORKER_ID"] = config.worker_id
633
+ os.environ["LITELLM_API_BASE"] = config.litellm_api_url
634
+ os.environ["LITELLM_API_KEY"] = config.litellm_api_key
635
+
636
+ # Step 2: Connect to Temporal
637
+ ProgressUI.step("⏳", "Connecting to Temporal...")
638
+ client = await create_temporal_client(config)
639
+ ProgressUI.success("✓", "Connected to Temporal")
640
+
641
+ # Step 3: Send initial heartbeat
642
+ ProgressUI.step("⏳", "Sending heartbeat...")
643
+ await send_heartbeat(
644
+ config=config,
645
+ kubiya_api_key=kubiya_api_key,
646
+ status="active",
647
+ tasks_processed=0
648
+ )
649
+ ProgressUI.success("✓", "Worker visible in UI")
650
+
651
+ # Start heartbeat loop in background
652
+ heartbeat_task = asyncio.create_task(
653
+ heartbeat_loop(config, kubiya_api_key, heartbeat_interval)
654
+ )
655
+
656
+ # Step 4: Create worker
657
+ ProgressUI.step("⏳", "Starting worker...")
658
+ worker = Worker(
659
+ client,
660
+ task_queue=config.environment_name,
661
+ workflows=[
662
+ AgentExecutionWorkflow,
663
+ TeamExecutionWorkflow,
664
+ ],
665
+ activities=[
666
+ execute_agent_llm,
667
+ update_execution_status,
668
+ update_agent_status,
669
+ persist_conversation_history, # Conversation persistence
670
+ get_team_agents,
671
+ execute_team_coordination,
672
+ execute_with_runtime, # RuntimeFactory-based execution
673
+ ],
674
+ max_concurrent_activities=10,
675
+ max_concurrent_workflow_tasks=10,
676
+ )
677
+
678
+ ProgressUI.success("✓", "Worker ready")
679
+ ProgressUI.header("📡 Listening for tasks... (Ctrl+C to stop)")
680
+
681
+ logger.info(
682
+ "worker_ready",
683
+ worker_id=config.worker_id[:8],
684
+ )
685
+
686
+ # Run worker (blocks until interrupted)
687
+ await worker.run()
688
+
689
+ # Cancel heartbeat task when worker stops
690
+ heartbeat_task.cancel()
691
+ try:
692
+ await heartbeat_task
693
+ except asyncio.CancelledError:
694
+ pass
695
+
696
+ # Notify control plane of graceful shutdown
697
+ print()
698
+ ProgressUI.step("⏳", "Shutting down gracefully...")
699
+ await send_disconnect(
700
+ config=config,
701
+ kubiya_api_key=kubiya_api_key,
702
+ reason="shutdown",
703
+ exit_code=0
704
+ )
705
+ ProgressUI.success("✓", "Worker stopped")
706
+ print()
707
+
708
+ except KeyboardInterrupt:
709
+ print()
710
+ ProgressUI.step("⏳", "Shutting down...")
711
+ # Notify control plane of keyboard interrupt
712
+ try:
713
+ await send_disconnect(
714
+ config=config,
715
+ kubiya_api_key=kubiya_api_key,
716
+ reason="shutdown",
717
+ exit_code=0
718
+ )
719
+ ProgressUI.success("✓", "Worker stopped")
720
+ except Exception as e:
721
+ logger.warning("disconnect_on_interrupt_failed", error=str(e))
722
+ except Exception as e:
723
+ import traceback
724
+ logger.error("temporal_worker_error", error=str(e), traceback=traceback.format_exc())
725
+ # Notify control plane of error
726
+ try:
727
+ await send_disconnect(
728
+ config=config,
729
+ kubiya_api_key=kubiya_api_key,
730
+ reason="error",
731
+ exit_code=1,
732
+ error_message=str(e)[:500]
733
+ )
734
+ except Exception as disconnect_error:
735
+ logger.warning("disconnect_on_error_failed", error=str(disconnect_error))
736
+ raise
737
+
738
+
739
+ def main():
740
+ """Main entry point"""
741
+ logger.info("worker_starting")
742
+
743
+ try:
744
+ asyncio.run(run_worker())
745
+ except KeyboardInterrupt:
746
+ logger.info("worker_stopped")
747
+ except Exception as e:
748
+ logger.error("worker_failed", error=str(e))
749
+ sys.exit(1)
750
+
751
+
752
+ if __name__ == "__main__":
753
+ main()