kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubiya-control-plane-api might be problematic. Click here for more details.

Files changed (185) hide show
  1. control_plane_api/README.md +266 -0
  2. control_plane_api/__init__.py +0 -0
  3. control_plane_api/__version__.py +1 -0
  4. control_plane_api/alembic/README +1 -0
  5. control_plane_api/alembic/env.py +98 -0
  6. control_plane_api/alembic/script.py.mako +28 -0
  7. control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
  8. control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
  9. control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
  10. control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
  11. control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
  12. control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
  13. control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
  14. control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
  15. control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
  16. control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
  17. control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
  18. control_plane_api/alembic.ini +148 -0
  19. control_plane_api/api/index.py +12 -0
  20. control_plane_api/app/__init__.py +11 -0
  21. control_plane_api/app/activities/__init__.py +20 -0
  22. control_plane_api/app/activities/agent_activities.py +379 -0
  23. control_plane_api/app/activities/team_activities.py +410 -0
  24. control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
  25. control_plane_api/app/config/__init__.py +35 -0
  26. control_plane_api/app/config/api_config.py +354 -0
  27. control_plane_api/app/config/model_pricing.py +318 -0
  28. control_plane_api/app/config.py +95 -0
  29. control_plane_api/app/database.py +135 -0
  30. control_plane_api/app/exceptions.py +408 -0
  31. control_plane_api/app/lib/__init__.py +11 -0
  32. control_plane_api/app/lib/job_executor.py +312 -0
  33. control_plane_api/app/lib/kubiya_client.py +235 -0
  34. control_plane_api/app/lib/litellm_pricing.py +166 -0
  35. control_plane_api/app/lib/planning_tools/__init__.py +22 -0
  36. control_plane_api/app/lib/planning_tools/agents.py +155 -0
  37. control_plane_api/app/lib/planning_tools/base.py +189 -0
  38. control_plane_api/app/lib/planning_tools/environments.py +214 -0
  39. control_plane_api/app/lib/planning_tools/resources.py +240 -0
  40. control_plane_api/app/lib/planning_tools/teams.py +198 -0
  41. control_plane_api/app/lib/policy_enforcer_client.py +939 -0
  42. control_plane_api/app/lib/redis_client.py +436 -0
  43. control_plane_api/app/lib/supabase.py +71 -0
  44. control_plane_api/app/lib/temporal_client.py +138 -0
  45. control_plane_api/app/lib/validation/__init__.py +20 -0
  46. control_plane_api/app/lib/validation/runtime_validation.py +287 -0
  47. control_plane_api/app/main.py +128 -0
  48. control_plane_api/app/middleware/__init__.py +8 -0
  49. control_plane_api/app/middleware/auth.py +513 -0
  50. control_plane_api/app/middleware/exception_handler.py +267 -0
  51. control_plane_api/app/middleware/rate_limiting.py +384 -0
  52. control_plane_api/app/middleware/request_id.py +202 -0
  53. control_plane_api/app/models/__init__.py +27 -0
  54. control_plane_api/app/models/agent.py +79 -0
  55. control_plane_api/app/models/analytics.py +206 -0
  56. control_plane_api/app/models/associations.py +81 -0
  57. control_plane_api/app/models/environment.py +63 -0
  58. control_plane_api/app/models/execution.py +93 -0
  59. control_plane_api/app/models/job.py +179 -0
  60. control_plane_api/app/models/llm_model.py +75 -0
  61. control_plane_api/app/models/presence.py +49 -0
  62. control_plane_api/app/models/project.py +47 -0
  63. control_plane_api/app/models/session.py +38 -0
  64. control_plane_api/app/models/team.py +66 -0
  65. control_plane_api/app/models/workflow.py +55 -0
  66. control_plane_api/app/policies/README.md +121 -0
  67. control_plane_api/app/policies/approved_users.rego +62 -0
  68. control_plane_api/app/policies/business_hours.rego +51 -0
  69. control_plane_api/app/policies/rate_limiting.rego +100 -0
  70. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  71. control_plane_api/app/routers/__init__.py +4 -0
  72. control_plane_api/app/routers/agents.py +364 -0
  73. control_plane_api/app/routers/agents_v2.py +1260 -0
  74. control_plane_api/app/routers/analytics.py +1014 -0
  75. control_plane_api/app/routers/context_manager.py +562 -0
  76. control_plane_api/app/routers/environment_context.py +270 -0
  77. control_plane_api/app/routers/environments.py +715 -0
  78. control_plane_api/app/routers/execution_environment.py +517 -0
  79. control_plane_api/app/routers/executions.py +1911 -0
  80. control_plane_api/app/routers/health.py +92 -0
  81. control_plane_api/app/routers/health_v2.py +326 -0
  82. control_plane_api/app/routers/integrations.py +274 -0
  83. control_plane_api/app/routers/jobs.py +1344 -0
  84. control_plane_api/app/routers/models.py +82 -0
  85. control_plane_api/app/routers/models_v2.py +361 -0
  86. control_plane_api/app/routers/policies.py +639 -0
  87. control_plane_api/app/routers/presence.py +234 -0
  88. control_plane_api/app/routers/projects.py +902 -0
  89. control_plane_api/app/routers/runners.py +379 -0
  90. control_plane_api/app/routers/runtimes.py +172 -0
  91. control_plane_api/app/routers/secrets.py +155 -0
  92. control_plane_api/app/routers/skills.py +1001 -0
  93. control_plane_api/app/routers/skills_definitions.py +140 -0
  94. control_plane_api/app/routers/task_planning.py +1256 -0
  95. control_plane_api/app/routers/task_queues.py +654 -0
  96. control_plane_api/app/routers/team_context.py +270 -0
  97. control_plane_api/app/routers/teams.py +1400 -0
  98. control_plane_api/app/routers/worker_queues.py +1545 -0
  99. control_plane_api/app/routers/workers.py +935 -0
  100. control_plane_api/app/routers/workflows.py +204 -0
  101. control_plane_api/app/runtimes/__init__.py +6 -0
  102. control_plane_api/app/runtimes/validation.py +344 -0
  103. control_plane_api/app/schemas/job_schemas.py +295 -0
  104. control_plane_api/app/services/__init__.py +1 -0
  105. control_plane_api/app/services/agno_service.py +619 -0
  106. control_plane_api/app/services/litellm_service.py +190 -0
  107. control_plane_api/app/services/policy_service.py +525 -0
  108. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  109. control_plane_api/app/skills/__init__.py +44 -0
  110. control_plane_api/app/skills/base.py +229 -0
  111. control_plane_api/app/skills/business_intelligence.py +189 -0
  112. control_plane_api/app/skills/data_visualization.py +154 -0
  113. control_plane_api/app/skills/docker.py +104 -0
  114. control_plane_api/app/skills/file_generation.py +94 -0
  115. control_plane_api/app/skills/file_system.py +110 -0
  116. control_plane_api/app/skills/python.py +92 -0
  117. control_plane_api/app/skills/registry.py +65 -0
  118. control_plane_api/app/skills/shell.py +102 -0
  119. control_plane_api/app/skills/workflow_executor.py +469 -0
  120. control_plane_api/app/utils/workflow_executor.py +354 -0
  121. control_plane_api/app/workflows/__init__.py +11 -0
  122. control_plane_api/app/workflows/agent_execution.py +507 -0
  123. control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
  124. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  125. control_plane_api/app/workflows/team_execution.py +399 -0
  126. control_plane_api/scripts/seed_models.py +239 -0
  127. control_plane_api/worker/__init__.py +0 -0
  128. control_plane_api/worker/activities/__init__.py +0 -0
  129. control_plane_api/worker/activities/agent_activities.py +1241 -0
  130. control_plane_api/worker/activities/approval_activities.py +234 -0
  131. control_plane_api/worker/activities/runtime_activities.py +388 -0
  132. control_plane_api/worker/activities/skill_activities.py +267 -0
  133. control_plane_api/worker/activities/team_activities.py +1217 -0
  134. control_plane_api/worker/config/__init__.py +31 -0
  135. control_plane_api/worker/config/worker_config.py +275 -0
  136. control_plane_api/worker/control_plane_client.py +529 -0
  137. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  138. control_plane_api/worker/models/__init__.py +1 -0
  139. control_plane_api/worker/models/inputs.py +89 -0
  140. control_plane_api/worker/runtimes/__init__.py +31 -0
  141. control_plane_api/worker/runtimes/base.py +789 -0
  142. control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
  143. control_plane_api/worker/runtimes/default_runtime.py +617 -0
  144. control_plane_api/worker/runtimes/factory.py +173 -0
  145. control_plane_api/worker/runtimes/validation.py +93 -0
  146. control_plane_api/worker/services/__init__.py +1 -0
  147. control_plane_api/worker/services/agent_executor.py +422 -0
  148. control_plane_api/worker/services/agent_executor_v2.py +383 -0
  149. control_plane_api/worker/services/analytics_collector.py +457 -0
  150. control_plane_api/worker/services/analytics_service.py +464 -0
  151. control_plane_api/worker/services/approval_tools.py +310 -0
  152. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  153. control_plane_api/worker/services/cancellation_manager.py +177 -0
  154. control_plane_api/worker/services/data_visualization.py +827 -0
  155. control_plane_api/worker/services/jira_tools.py +257 -0
  156. control_plane_api/worker/services/runtime_analytics.py +328 -0
  157. control_plane_api/worker/services/session_service.py +194 -0
  158. control_plane_api/worker/services/skill_factory.py +175 -0
  159. control_plane_api/worker/services/team_executor.py +574 -0
  160. control_plane_api/worker/services/team_executor_v2.py +465 -0
  161. control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
  162. control_plane_api/worker/tests/__init__.py +1 -0
  163. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  164. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  165. control_plane_api/worker/tests/integration/__init__.py +0 -0
  166. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  167. control_plane_api/worker/tests/unit/__init__.py +0 -0
  168. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  169. control_plane_api/worker/utils/__init__.py +1 -0
  170. control_plane_api/worker/utils/chunk_batcher.py +305 -0
  171. control_plane_api/worker/utils/retry_utils.py +60 -0
  172. control_plane_api/worker/utils/streaming_utils.py +373 -0
  173. control_plane_api/worker/worker.py +753 -0
  174. control_plane_api/worker/workflows/__init__.py +0 -0
  175. control_plane_api/worker/workflows/agent_execution.py +589 -0
  176. control_plane_api/worker/workflows/team_execution.py +429 -0
  177. kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
  178. kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
  179. kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
  180. kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
  181. kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
  182. kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
  183. kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
  184. {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
  185. {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1241 @@
1
+ """Agent-related Temporal activities"""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Any, List, Dict
5
+ from datetime import datetime, timezone
6
+ from temporalio import activity
7
+ import structlog
8
+ import os
9
+ import httpx
10
+ from pathlib import Path
11
+
12
+ from agno.tools.shell import ShellTools
13
+ from agno.tools.python import PythonTools
14
+ from agno.tools.file import FileTools
15
+ from control_plane_api.worker.control_plane_client import get_control_plane_client
16
+
17
+ logger = structlog.get_logger()
18
+
19
+ # Global registry for active Agent/Team instances to support cancellation
20
+ # Key: execution_id, Value: {agent: Agent, run_id: str}
21
+ _active_agents: Dict[str, Dict[str, Any]] = {}
22
+
23
+
24
+ def instantiate_skill(skill_data: dict) -> Optional[Any]:
25
+ """
26
+ Instantiate an Agno toolkit based on skill configuration from Control Plane.
27
+
28
+ Args:
29
+ skill_data: Skill data from Control Plane API containing:
30
+ - type: Skill type (file_system, shell, python, docker, etc.)
31
+ - name: Skill name
32
+ - configuration: Dict with skill-specific config
33
+ - enabled: Whether skill is enabled
34
+
35
+ Returns:
36
+ Instantiated Agno toolkit or None if type not supported/enabled
37
+ """
38
+ if not skill_data.get("enabled", True):
39
+ print(f" ⊗ Skipping disabled skill: {skill_data.get('name')}")
40
+ return None
41
+
42
+ skill_type = skill_data.get("type", "").lower()
43
+ config = skill_data.get("configuration", {})
44
+ name = skill_data.get("name", "Unknown")
45
+
46
+ try:
47
+ # Map Control Plane skill types to Agno toolkit classes
48
+ if skill_type in ["file_system", "file", "file_generation"]:
49
+ # FileTools: file operations (read, write, list, search)
50
+ # Note: file_generation is mapped to FileTools (save_file functionality)
51
+ base_dir = config.get("base_dir")
52
+ toolkit = FileTools(
53
+ base_dir=Path(base_dir) if base_dir else None,
54
+ enable_save_file=config.get("enable_save_file", True),
55
+ enable_read_file=config.get("enable_read_file", True),
56
+ enable_list_files=config.get("enable_list_files", True),
57
+ enable_search_files=config.get("enable_search_files", True),
58
+ )
59
+ print(f" ✓ Instantiated FileTools: {name}")
60
+ if skill_type == "file_generation":
61
+ print(f" - Type: File Generation (using FileTools.save_file)")
62
+ print(f" - Base Dir: {base_dir or 'Current directory'}")
63
+ print(f" - Read: {config.get('enable_read_file', True)}, Write: {config.get('enable_save_file', True)}")
64
+ return toolkit
65
+
66
+ elif skill_type in ["shell", "bash"]:
67
+ # ShellTools: shell command execution
68
+ base_dir = config.get("base_dir")
69
+ toolkit = ShellTools(
70
+ base_dir=Path(base_dir) if base_dir else None,
71
+ enable_run_shell_command=config.get("enable_run_shell_command", True),
72
+ )
73
+ print(f" ✓ Instantiated ShellTools: {name}")
74
+ print(f" - Base Dir: {base_dir or 'Current directory'}")
75
+ print(f" - Run Commands: {config.get('enable_run_shell_command', True)}")
76
+ return toolkit
77
+
78
+ elif skill_type == "python":
79
+ # PythonTools: Python code execution
80
+ base_dir = config.get("base_dir")
81
+ toolkit = PythonTools(
82
+ base_dir=Path(base_dir) if base_dir else None,
83
+ safe_globals=config.get("safe_globals"),
84
+ safe_locals=config.get("safe_locals"),
85
+ )
86
+ print(f" ✓ Instantiated PythonTools: {name}")
87
+ print(f" - Base Dir: {base_dir or 'Current directory'}")
88
+ return toolkit
89
+
90
+ elif skill_type == "docker":
91
+ # DockerTools requires docker package and running Docker daemon
92
+ try:
93
+ from agno.tools.docker import DockerTools
94
+ import docker
95
+
96
+ # Check if Docker daemon is accessible
97
+ try:
98
+ docker_client = docker.from_env()
99
+ docker_client.ping()
100
+
101
+ # Docker is available, instantiate toolkit
102
+ toolkit = DockerTools()
103
+ print(f" ✓ Instantiated DockerTools: {name}")
104
+ print(f" - Docker daemon: Connected")
105
+ docker_client.close()
106
+ return toolkit
107
+
108
+ except Exception as docker_error:
109
+ print(f" ⚠ Docker daemon not available - skipping: {name}")
110
+ print(f" Error: {str(docker_error)}")
111
+ return None
112
+
113
+ except ImportError:
114
+ print(f" ⚠ Docker skill requires 'docker' package - skipping: {name}")
115
+ print(f" Install with: pip install docker")
116
+ return None
117
+
118
+ elif skill_type in ["data_visualization", "diagramming", "visualization"]:
119
+ # DataVisualizationTools: Create diagrams using Mermaid syntax
120
+ # This is a custom implementation that uses streaming to send diagram data
121
+ from services.data_visualization import DataVisualizationTools
122
+
123
+ toolkit = DataVisualizationTools(
124
+ max_diagram_size=config.get("max_diagram_size", 50000),
125
+ enable_flowchart=config.get("enable_flowchart", True),
126
+ enable_sequence=config.get("enable_sequence", True),
127
+ enable_class_diagram=config.get("enable_class_diagram", True),
128
+ enable_er_diagram=config.get("enable_er_diagram", True),
129
+ enable_gantt=config.get("enable_gantt", True),
130
+ enable_pie_chart=config.get("enable_pie_chart", True),
131
+ enable_state_diagram=config.get("enable_state_diagram", True),
132
+ enable_git_graph=config.get("enable_git_graph", True),
133
+ enable_user_journey=config.get("enable_user_journey", True),
134
+ enable_quadrant_chart=config.get("enable_quadrant_chart", True),
135
+ )
136
+ print(f" ✓ Instantiated DataVisualizationTools: {name}")
137
+ print(f" - Max diagram size: {config.get('max_diagram_size', 50000)} chars")
138
+ print(f" - Supported: Mermaid diagrams (flowchart, sequence, class, ER, etc.)")
139
+ return toolkit
140
+
141
+ else:
142
+ print(f" ⚠ Unsupported skill type '{skill_type}': {name}")
143
+ return None
144
+
145
+ except Exception as e:
146
+ print(f" ❌ Error instantiating skill '{name}' (type: {skill_type}): {str(e)}")
147
+ logger.error(
148
+ f"Error instantiating skill",
149
+ extra={
150
+ "skill_name": name,
151
+ "skill_type": skill_type,
152
+ "error": str(e)
153
+ }
154
+ )
155
+ return None
156
+
157
+
158
+ @dataclass
159
+ class ActivityExecuteAgentInput:
160
+ """Input for execute_agent_llm activity"""
161
+ execution_id: str
162
+ agent_id: str
163
+ organization_id: str
164
+ prompt: str
165
+ system_prompt: Optional[str] = None
166
+ model_id: Optional[str] = None
167
+ model_config: dict = None
168
+ mcp_servers: dict = None # MCP servers configuration
169
+ session_id: Optional[str] = None # Session ID for Agno session management (use execution_id)
170
+ user_id: Optional[str] = None # User ID for multi-user support
171
+ # Note: control_plane_url and api_key are read from worker environment variables (CONTROL_PLANE_URL, KUBIYA_API_KEY)
172
+
173
+ def __post_init__(self):
174
+ if self.model_config is None:
175
+ self.model_config = {}
176
+ if self.mcp_servers is None:
177
+ self.mcp_servers = {}
178
+
179
+
180
+ @dataclass
181
+ class ActivityUpdateExecutionInput:
182
+ """Input for update_execution_status activity"""
183
+ execution_id: str
184
+ status: str
185
+ started_at: Optional[str] = None
186
+ completed_at: Optional[str] = None
187
+ response: Optional[str] = None
188
+ error_message: Optional[str] = None
189
+ usage: dict = None
190
+ execution_metadata: dict = None
191
+
192
+ def __post_init__(self):
193
+ if self.usage is None:
194
+ self.usage = {}
195
+ if self.execution_metadata is None:
196
+ self.execution_metadata = {}
197
+
198
+
199
+ @dataclass
200
+ class ActivityUpdateAgentInput:
201
+ """Input for update_agent_status activity"""
202
+ agent_id: str
203
+ organization_id: str
204
+ status: str
205
+ last_active_at: str
206
+ error_message: Optional[str] = None
207
+ state: dict = None
208
+
209
+ def __post_init__(self):
210
+ if self.state is None:
211
+ self.state = {}
212
+
213
+
214
+ @activity.defn
215
+ async def execute_agent_llm(input: ActivityExecuteAgentInput) -> dict:
216
+ """
217
+ Execute an agent's LLM call with Agno Teams and session management.
218
+
219
+ This activity uses Agno Teams with session support for persistent conversation history.
220
+ The session_id should be set to execution_id for 1:1 mapping.
221
+
222
+ Args:
223
+ input: Activity input with execution details
224
+
225
+ Returns:
226
+ Dict with response, usage, success flag, session messages, etc.
227
+ """
228
+ print("\n" + "="*80)
229
+ print("🤖 AGENT EXECUTION START")
230
+ print("="*80)
231
+ print(f"Execution ID: {input.execution_id}")
232
+ print(f"Agent ID: {input.agent_id}")
233
+ print(f"Organization: {input.organization_id}")
234
+ print(f"Model: {input.model_id or 'default'}")
235
+ print(f"Session ID: {input.session_id}")
236
+ print(f"MCP Servers: {len(input.mcp_servers)} configured" if input.mcp_servers else "MCP Servers: None")
237
+ print(f"Prompt: {input.prompt[:100]}..." if len(input.prompt) > 100 else f"Prompt: {input.prompt}")
238
+ print("="*80 + "\n")
239
+
240
+ activity.logger.info(
241
+ f"Executing agent LLM call with Agno Sessions",
242
+ extra={
243
+ "execution_id": input.execution_id,
244
+ "agent_id": input.agent_id,
245
+ "organization_id": input.organization_id,
246
+ "model_id": input.model_id,
247
+ "has_mcp_servers": bool(input.mcp_servers),
248
+ "mcp_server_count": len(input.mcp_servers) if input.mcp_servers else 0,
249
+ "mcp_server_ids": list(input.mcp_servers.keys()) if input.mcp_servers else [],
250
+ "session_id": input.session_id,
251
+ }
252
+ )
253
+
254
+ try:
255
+ # Get Control Plane client for all communication with Control Plane
256
+ control_plane = get_control_plane_client()
257
+
258
+ # STEP 1: Load existing session history from Control Plane (if this is a continuation)
259
+ # This enables conversation continuity across multiple execution turns
260
+ # IMPORTANT: This must be non-blocking and have proper timeout/retry
261
+ session_history = []
262
+ if input.session_id:
263
+ print(f"\n📥 Loading session history from Control Plane...")
264
+
265
+ # Try up to 3 times with exponential backoff for transient failures
266
+ max_retries = 3
267
+ for attempt in range(max_retries):
268
+ try:
269
+ if attempt > 0:
270
+ print(f" 🔄 Retry attempt {attempt + 1}/{max_retries}...")
271
+
272
+ session_data = control_plane.get_session(
273
+ execution_id=input.execution_id,
274
+ session_id=input.session_id
275
+ )
276
+ if session_data and session_data.get("messages"):
277
+ session_history = session_data["messages"]
278
+ print(f" ✅ Loaded {len(session_history)} messages from previous turns")
279
+
280
+ activity.logger.info(
281
+ "Session history loaded from Control Plane",
282
+ extra={
283
+ "execution_id": input.execution_id,
284
+ "session_id": input.session_id,
285
+ "message_count": len(session_history),
286
+ "attempt": attempt + 1,
287
+ }
288
+ )
289
+ break # Success - exit retry loop
290
+ else:
291
+ print(f" ℹ️ No previous session found - starting new conversation")
292
+ break # No session exists - not an error
293
+
294
+ except httpx.TimeoutException as e:
295
+ print(f" ⏱️ Timeout loading session (attempt {attempt + 1}/{max_retries})")
296
+ activity.logger.warning(
297
+ "Session load timeout",
298
+ extra={"error": str(e), "execution_id": input.execution_id, "attempt": attempt + 1}
299
+ )
300
+ if attempt < max_retries - 1:
301
+ import time
302
+ time.sleep(2 ** attempt) # Exponential backoff: 1s, 2s, 4s
303
+ continue
304
+ else:
305
+ print(f" ⚠️ Session load failed after {max_retries} attempts - continuing without history")
306
+
307
+ except Exception as e:
308
+ error_type = type(e).__name__
309
+ print(f" ⚠️ Failed to load session history ({error_type}): {str(e)[:100]}")
310
+ activity.logger.warning(
311
+ "Failed to load session history from Control Plane",
312
+ extra={
313
+ "error": str(e),
314
+ "error_type": error_type,
315
+ "execution_id": input.execution_id,
316
+ "attempt": attempt + 1
317
+ }
318
+ )
319
+ # For non-timeout errors, don't retry - likely invalid session
320
+ break
321
+
322
+ # Always continue execution even if session loading fails
323
+ print(f" → Continuing with {len(session_history)} messages in context\n")
324
+
325
+ # Get LiteLLM credentials from environment (set by worker from registration)
326
+ litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
327
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
328
+
329
+ if not litellm_api_key:
330
+ raise ValueError("LITELLM_API_KEY environment variable not set")
331
+
332
+ # Get model from input or use default
333
+ model = input.model_id or os.environ.get("LITELLM_DEFAULT_MODEL", "kubiya/claude-sonnet-4")
334
+
335
+ # Fetch resolved skills from Control Plane if available
336
+ skills = []
337
+ if input.agent_id:
338
+ print(f"🔧 Fetching skills from Control Plane...")
339
+ try:
340
+ skills = control_plane.get_skills(input.agent_id)
341
+ if skills:
342
+ print(f"✅ Resolved {len(skills)} skills from Control Plane")
343
+ print(f" Skill Types: {[t.get('type') for t in skills]}")
344
+ print(f" Skill Sources: {[t.get('source') for t in skills]}")
345
+ print(f" Skill Names: {[t.get('name') for t in skills]}\n")
346
+
347
+ activity.logger.info(
348
+ f"Resolved skills from Control Plane",
349
+ extra={
350
+ "agent_id": input.agent_id,
351
+ "skill_count": len(skills),
352
+ "skill_types": [t.get("type") for t in skills],
353
+ "skill_sources": [t.get("source") for t in skills],
354
+ "skill_names": [t.get("name") for t in skills],
355
+ }
356
+ )
357
+ else:
358
+ print(f"⚠️ No skills found for agent\n")
359
+ except Exception as e:
360
+ print(f"❌ Error fetching skills: {str(e)}\n")
361
+ activity.logger.error(
362
+ f"Error fetching skills from Control Plane: {str(e)}",
363
+ extra={"error": str(e)}
364
+ )
365
+ # Continue execution without skills
366
+ else:
367
+ print(f"ℹ️ No agent_id provided - skipping skill resolution\n")
368
+
369
+ # Instantiate Agno toolkits from Control Plane skills
370
+ print(f"\n🔧 Instantiating Skills:")
371
+ agno_toolkits = []
372
+ if skills:
373
+ for skill in skills:
374
+ toolkit = instantiate_skill(skill)
375
+ if toolkit:
376
+ agno_toolkits.append(toolkit)
377
+
378
+ if agno_toolkits:
379
+ print(f"\n✅ Successfully instantiated {len(agno_toolkits)} skill(s)")
380
+ else:
381
+ print(f"\nℹ️ No skills instantiated\n")
382
+
383
+ print(f"📦 Total Tools Available:")
384
+ print(f" MCP Servers: {len(input.mcp_servers)}")
385
+ print(f" OS-Level Skills: {len(agno_toolkits)}\n")
386
+
387
+ activity.logger.info(
388
+ f"Using Agno Agent with sessions and skills",
389
+ extra={
390
+ "execution_id": input.execution_id,
391
+ "session_id": input.session_id,
392
+ "has_mcp_servers": bool(input.mcp_servers),
393
+ "mcp_server_count": len(input.mcp_servers) if input.mcp_servers else 0,
394
+ "mcp_servers": list(input.mcp_servers.keys()) if input.mcp_servers else [],
395
+ "skill_count": len(agno_toolkits),
396
+ "model": model,
397
+ }
398
+ )
399
+
400
+ # Import Agno libraries
401
+ from agno.agent import Agent
402
+ from agno.models.litellm import LiteLLM
403
+
404
+ print(f"\n🤖 Creating Agno Agent:")
405
+ print(f" Model: {model}")
406
+ print(f" Skills: {len(agno_toolkits)}")
407
+
408
+ # Send heartbeat: Creating agent
409
+ activity.heartbeat({"status": "Creating agent with skills..."})
410
+
411
+ # Track tool executions for real-time streaming
412
+ tool_execution_messages = []
413
+
414
+ # Create tool hook to capture tool execution for real-time streaming
415
+ # Agno inspects the signature and passes matching parameters
416
+ def tool_hook(name: str = None, function_name: str = None, function=None, arguments: dict = None, **kwargs):
417
+ """Hook to capture tool execution and add to messages for streaming
418
+
419
+ Agno passes these parameters based on our signature:
420
+ - name or function_name: The tool function name
421
+ - function: The callable being executed (this is the NEXT function in the chain)
422
+ - arguments: Dict of arguments passed to the tool
423
+
424
+ The hook must CALL the function and return its result.
425
+ """
426
+ # Get tool name from Agno's parameters
427
+ tool_name = name or function_name or "unknown"
428
+ tool_args = arguments or {}
429
+
430
+ # Generate unique tool execution ID (tool_name + timestamp)
431
+ import time
432
+ tool_execution_id = f"{tool_name}_{int(time.time() * 1000000)}"
433
+
434
+ print(f" 🔧 Tool Starting: {tool_name} (ID: {tool_execution_id})")
435
+ if tool_args:
436
+ args_preview = str(tool_args)[:200]
437
+ print(f" Args: {args_preview}{'...' if len(str(tool_args)) > 200 else ''}")
438
+
439
+ # Publish streaming event to Control Plane (real-time UI update)
440
+ control_plane.publish_event(
441
+ execution_id=input.execution_id,
442
+ event_type="tool_started",
443
+ data={
444
+ "tool_name": tool_name,
445
+ "tool_execution_id": tool_execution_id, # Unique ID for this execution
446
+ "tool_arguments": tool_args,
447
+ "message": f"🔧 Executing tool: {tool_name}",
448
+ }
449
+ )
450
+
451
+ tool_execution_messages.append({
452
+ "role": "system",
453
+ "content": f"🔧 Executing tool: **{tool_name}**",
454
+ "tool_name": tool_name,
455
+ "tool_event": "started",
456
+ "timestamp": datetime.now(timezone.utc).isoformat(),
457
+ })
458
+
459
+ # CRITICAL: Actually call the function and handle completion
460
+ result = None
461
+ error = None
462
+ try:
463
+ # Call the actual function (next in the hook chain)
464
+ if function and callable(function):
465
+ result = function(**tool_args) if tool_args else function()
466
+ else:
467
+ raise ValueError(f"Function not callable: {function}")
468
+
469
+ status = "success"
470
+ icon = "✅"
471
+ print(f" {icon} Tool Success: {tool_name}")
472
+
473
+ except Exception as e:
474
+ error = e
475
+ status = "failed"
476
+ icon = "❌"
477
+ print(f" {icon} Tool Failed: {tool_name} - {str(e)}")
478
+
479
+ # Publish completion event to Control Plane (real-time UI update)
480
+ control_plane.publish_event(
481
+ execution_id=input.execution_id,
482
+ event_type="tool_completed",
483
+ data={
484
+ "tool_name": tool_name,
485
+ "tool_execution_id": tool_execution_id, # Same ID to match the started event
486
+ "status": status,
487
+ "error": str(error) if error else None,
488
+ "tool_output": result if result is not None else None, # Include tool output for UI display
489
+ "message": f"{icon} Tool {status}: {tool_name}",
490
+ }
491
+ )
492
+
493
+ tool_execution_messages.append({
494
+ "role": "system",
495
+ "content": f"{icon} Tool {status}: **{tool_name}**",
496
+ "tool_name": tool_name,
497
+ "tool_event": "completed",
498
+ "tool_status": status,
499
+ "timestamp": datetime.now(timezone.utc).isoformat(),
500
+ })
501
+
502
+ # If there was an error, re-raise it so Agno knows the tool failed
503
+ if error:
504
+ raise error
505
+
506
+ # Return the result to continue the chain
507
+ return result
508
+
509
+ # Build conversation context from session history for manual session management
510
+ # Workers don't have database access, so we manage sessions via Control Plane API
511
+ conversation_context = []
512
+ if session_history:
513
+ print(f"\n📝 Building conversation context from {len(session_history)} previous messages...")
514
+ for msg in session_history:
515
+ # Convert Control Plane message format to Agno format
516
+ conversation_context.append({
517
+ "role": msg.get("role", "user"),
518
+ "content": msg.get("content", ""),
519
+ })
520
+ print(f" ✅ Conversation context ready\n")
521
+
522
+ # Create Agno Agent with LiteLLM configuration
523
+ # Note: NO database - workers use Control Plane API for session management
524
+ # Use openai/ prefix for custom proxy compatibility
525
+ agent = Agent(
526
+ name=f"Agent {input.agent_id}",
527
+ role=input.system_prompt or "You are a helpful AI assistant",
528
+ model=LiteLLM(
529
+ id=f"openai/{model}",
530
+ api_base=litellm_api_base,
531
+ api_key=litellm_api_key,
532
+ ),
533
+ tools=agno_toolkits if agno_toolkits else None, # Add skills to agent
534
+ tool_hooks=[tool_hook], # Add hook for real-time tool updates
535
+ # NO db parameter - session management via Control Plane API
536
+ )
537
+
538
+ # Register agent for cancellation support
539
+ _active_agents[input.execution_id] = {
540
+ "agent": agent,
541
+ "run_id": None, # Will be set when run starts
542
+ "started_at": datetime.now(timezone.utc).isoformat(),
543
+ }
544
+ print(f"✅ Agent registered for cancellation support (execution_id: {input.execution_id})\n")
545
+
546
+ # Cache execution metadata in Redis for fast SSE lookups (avoid DB queries)
547
+ control_plane.cache_metadata(input.execution_id, "AGENT")
548
+
549
+ # Execute agent run with streaming
550
+ print("⚡ Executing Agent Run with Streaming...\n")
551
+
552
+ # Send heartbeat: Starting execution
553
+ activity.heartbeat({"status": "Agent is processing your request..."})
554
+
555
+ import asyncio
556
+
557
+ # Stream the response and collect chunks
558
+ response_chunks = []
559
+ full_response = ""
560
+
561
+ # Generate unique message ID for this turn (execution_id + timestamp)
562
+ import time
563
+ message_id = f"{input.execution_id}_{int(time.time() * 1000000)}"
564
+
565
+ def stream_agent_run():
566
+ """Run agent with streaming and collect response"""
567
+ nonlocal full_response
568
+ run_id_published = False
569
+
570
+ try:
571
+ # Build full prompt with conversation history for context
572
+ # Since worker has no database, we manually prepend history
573
+ if conversation_context:
574
+ # Agno Agent supports passing messages parameter for conversation context
575
+ run_response = agent.run(
576
+ input.prompt,
577
+ stream=True,
578
+ messages=conversation_context, # Pass previous conversation
579
+ )
580
+ else:
581
+ # First turn - no history
582
+ run_response = agent.run(input.prompt, stream=True)
583
+
584
+ # Iterate over streaming chunks
585
+ for chunk in run_response:
586
+ # Capture and publish run_id from first chunk for cancellation support
587
+ if not run_id_published and hasattr(chunk, 'run_id') and chunk.run_id:
588
+ agno_run_id = chunk.run_id
589
+ print(f"\n🆔 Agno run_id: {agno_run_id}")
590
+
591
+ # Store run_id in registry for cancellation
592
+ if input.execution_id in _active_agents:
593
+ _active_agents[input.execution_id]["run_id"] = agno_run_id
594
+
595
+ # Publish run_id to Redis for Control Plane cancellation access
596
+ # This allows users to cancel via STOP button in UI
597
+ control_plane.publish_event(
598
+ execution_id=input.execution_id,
599
+ event_type="run_started",
600
+ data={
601
+ "run_id": agno_run_id,
602
+ "agent_id": input.agent_id,
603
+ "cancellable": True,
604
+ }
605
+ )
606
+ run_id_published = True
607
+
608
+ if hasattr(chunk, 'content') and chunk.content:
609
+ content = str(chunk.content)
610
+ full_response += content
611
+ response_chunks.append(content)
612
+ print(content, end='', flush=True)
613
+
614
+ # Stream chunk to Control Plane for real-time UI updates
615
+ # Include message_id so UI knows which message these chunks belong to
616
+ control_plane.publish_event(
617
+ execution_id=input.execution_id,
618
+ event_type="message_chunk",
619
+ data={
620
+ "role": "assistant",
621
+ "content": content,
622
+ "is_chunk": True,
623
+ "message_id": message_id, # Unique ID for this turn
624
+ }
625
+ )
626
+
627
+ # Note: Cannot send heartbeat from sync context (thread pool)
628
+
629
+ print() # New line after streaming
630
+
631
+ # Return the iterator's final result
632
+ return run_response
633
+ except Exception as e:
634
+ print(f"\n❌ Streaming error: {str(e)}")
635
+ # Fall back to non-streaming
636
+ if conversation_context:
637
+ return agent.run(input.prompt, stream=False, messages=conversation_context)
638
+ else:
639
+ return agent.run(input.prompt, stream=False)
640
+
641
+ # Execute in thread pool (NO TIMEOUT - tasks can run as long as needed)
642
+ # Control Plane can cancel via Agno's cancel_run API if user requests it
643
+ result = await asyncio.to_thread(stream_agent_run)
644
+
645
+ # Send heartbeat: Completed
646
+ activity.heartbeat({"status": "Agent execution completed, preparing response..."})
647
+
648
+ print("✅ Agent Execution Completed!")
649
+ print(f" Response Length: {len(full_response)} chars\n")
650
+
651
+ activity.logger.info(
652
+ f"Agent LLM call completed",
653
+ extra={
654
+ "execution_id": input.execution_id,
655
+ "has_content": bool(full_response),
656
+ }
657
+ )
658
+
659
+ # Use the streamed response content
660
+ response_content = full_response if full_response else (result.content if hasattr(result, "content") else str(result))
661
+
662
+ # Extract tool call messages for UI streaming
663
+ tool_messages = []
664
+ if hasattr(result, "messages") and result.messages:
665
+ for msg in result.messages:
666
+ # Check if message has tool calls
667
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
668
+ for tool_call in msg.tool_calls:
669
+ tool_name = getattr(tool_call, "function", {}).get("name") if hasattr(tool_call, "function") else str(tool_call)
670
+ tool_args = getattr(tool_call, "function", {}).get("arguments") if hasattr(tool_call, "function") else {}
671
+
672
+ print(f" 🔧 Tool Call: {tool_name}")
673
+
674
+ tool_messages.append({
675
+ "role": "tool",
676
+ "content": f"Executing {tool_name}...",
677
+ "tool_name": tool_name,
678
+ "tool_input": tool_args,
679
+ "timestamp": datetime.now(timezone.utc).isoformat(),
680
+ })
681
+
682
+ if tool_messages:
683
+ print(f"\n🔧 Tool Calls Captured: {len(tool_messages)}")
684
+
685
+ # Extract usage metrics if available
686
+ usage = {}
687
+ if hasattr(result, "metrics") and result.metrics:
688
+ metrics = result.metrics
689
+ usage = {
690
+ "prompt_tokens": getattr(metrics, "input_tokens", 0),
691
+ "completion_tokens": getattr(metrics, "output_tokens", 0),
692
+ "total_tokens": getattr(metrics, "total_tokens", 0),
693
+ }
694
+ print(f"📊 Token Usage:")
695
+ print(f" Input Tokens: {usage.get('prompt_tokens', 0)}")
696
+ print(f" Output Tokens: {usage.get('completion_tokens', 0)}")
697
+ print(f" Total Tokens: {usage.get('total_tokens', 0)}\n")
698
+
699
+ print(f"📝 Response Preview:")
700
+ print(f" {response_content[:200]}..." if len(response_content) > 200 else f" {response_content}")
701
+
702
+ # CRITICAL: Persist COMPLETE session history to Control Plane API
703
+ # This includes previous history + current turn for conversation continuity
704
+ # IMPORTANT: Use retry logic - persistence failures shouldn't break execution
705
+ print("\n💾 Persisting session history to Control Plane...")
706
+
707
+ # Build complete session: previous history + current turn's messages
708
+ updated_session_messages = list(session_history) # Start with loaded history
709
+
710
+ # Add current turn messages (user prompt + assistant response)
711
+ # Streaming results don't have result.messages, so we manually build them
712
+ current_turn_messages = [
713
+ {
714
+ "role": "user",
715
+ "content": input.prompt,
716
+ "timestamp": datetime.now(timezone.utc).isoformat(),
717
+ "user_id": input.user_id,
718
+ "user_name": getattr(input, "user_name", None),
719
+ "user_email": getattr(input, "user_email", None),
720
+ },
721
+ {
722
+ "role": "assistant",
723
+ "content": response_content,
724
+ "timestamp": datetime.now(timezone.utc).isoformat(),
725
+ }
726
+ ]
727
+
728
+ print(f" 📝 Adding {len(current_turn_messages)} messages from current turn (user + assistant)...")
729
+ updated_session_messages.extend(current_turn_messages)
730
+
731
+ if updated_session_messages:
732
+ # Try up to 3 times to persist session history
733
+ max_retries = 3
734
+ persisted = False
735
+
736
+ for attempt in range(max_retries):
737
+ try:
738
+ if attempt > 0:
739
+ print(f" 🔄 Retry persistence attempt {attempt + 1}/{max_retries}...")
740
+
741
+ success = control_plane.persist_session(
742
+ execution_id=input.execution_id,
743
+ session_id=input.session_id or input.execution_id,
744
+ user_id=input.user_id,
745
+ messages=updated_session_messages, # Complete conversation history
746
+ metadata={
747
+ "agent_id": input.agent_id,
748
+ "organization_id": input.organization_id,
749
+ "turn_count": len(updated_session_messages),
750
+ }
751
+ )
752
+
753
+ if success:
754
+ print(f" ✅ Complete session history persisted ({len(updated_session_messages)} total messages)")
755
+ persisted = True
756
+ break
757
+ else:
758
+ print(f" ⚠️ Persistence failed (attempt {attempt + 1}/{max_retries})")
759
+ if attempt < max_retries - 1:
760
+ import time
761
+ time.sleep(2 ** attempt) # Exponential backoff
762
+
763
+ except Exception as session_error:
764
+ error_type = type(session_error).__name__
765
+ print(f" ⚠️ Persistence error ({error_type}, attempt {attempt + 1}/{max_retries})")
766
+ logger.warning(
767
+ "session_persistence_error",
768
+ extra={
769
+ "error": str(session_error),
770
+ "error_type": error_type,
771
+ "execution_id": input.execution_id,
772
+ "attempt": attempt + 1
773
+ }
774
+ )
775
+ if attempt < max_retries - 1:
776
+ import time
777
+ time.sleep(2 ** attempt) # Exponential backoff
778
+
779
+ if not persisted:
780
+ print(f" ⚠️ Session persistence failed after {max_retries} attempts")
781
+ logger.error(
782
+ "session_persistence_failed_all_retries",
783
+ extra={
784
+ "execution_id": input.execution_id,
785
+ "message_count": len(updated_session_messages)
786
+ }
787
+ )
788
+ # Don't fail execution - session loss is better than execution failure
789
+ else:
790
+ print(" ℹ️ No messages - skipping session persistence")
791
+
792
+ print("\n" + "="*80)
793
+ print("🏁 AGENT EXECUTION END")
794
+ print("="*80 + "\n")
795
+
796
+ # Cleanup: Remove agent from registry
797
+ if input.execution_id in _active_agents:
798
+ del _active_agents[input.execution_id]
799
+ print(f"✅ Agent unregistered (execution_id: {input.execution_id})\n")
800
+
801
+ return {
802
+ "success": True,
803
+ "response": response_content,
804
+ "usage": usage,
805
+ "model": model,
806
+ "finish_reason": "stop",
807
+ "mcp_tools_used": 0, # TODO: Track MCP tool usage
808
+ "tool_messages": tool_messages, # Include tool call messages for UI
809
+ "tool_execution_messages": tool_execution_messages, # Include real-time tool execution status
810
+ }
811
+
812
+ except Exception as e:
813
+ # Cleanup on error
814
+ if input.execution_id in _active_agents:
815
+ del _active_agents[input.execution_id]
816
+ print("\n" + "="*80)
817
+ print("❌ AGENT EXECUTION FAILED")
818
+ print("="*80)
819
+ print(f"Error: {str(e)}")
820
+ print("="*80 + "\n")
821
+
822
+ activity.logger.error(
823
+ f"Agent LLM call failed",
824
+ extra={
825
+ "execution_id": input.execution_id,
826
+ "error": str(e),
827
+ }
828
+ )
829
+ return {
830
+ "success": False,
831
+ "error": str(e),
832
+ "model": input.model_id,
833
+ "usage": None,
834
+ "finish_reason": "error",
835
+ }
836
+
837
+
838
+ @activity.defn
839
+ async def update_execution_status(input: ActivityUpdateExecutionInput) -> dict:
840
+ """
841
+ Update execution status in database via Control Plane API.
842
+
843
+ This activity calls the Control Plane API to update execution records.
844
+ Also records which worker processed this execution.
845
+
846
+ Args:
847
+ input: Activity input with update details
848
+
849
+ Returns:
850
+ Dict with success flag
851
+ """
852
+ print(f"🔄 Updating execution status: {input.status} (execution_id: {input.execution_id[:8]}...)")
853
+
854
+ activity.logger.info(
855
+ f"Updating execution status via Control Plane API",
856
+ extra={
857
+ "execution_id": input.execution_id,
858
+ "status": input.status,
859
+ }
860
+ )
861
+
862
+ try:
863
+ # Get Control Plane URL and Kubiya API key from environment
864
+ control_plane_url = os.getenv("CONTROL_PLANE_URL")
865
+ kubiya_api_key = os.getenv("KUBIYA_API_KEY")
866
+ worker_id = os.getenv("WORKER_ID", "unknown")
867
+
868
+ if not control_plane_url:
869
+ raise ValueError("CONTROL_PLANE_URL environment variable not set")
870
+ if not kubiya_api_key:
871
+ raise ValueError("KUBIYA_API_KEY environment variable not set")
872
+
873
+ # Collect worker system information
874
+ import socket
875
+ import platform
876
+ worker_info = {
877
+ "worker_id": worker_id,
878
+ "hostname": socket.gethostname(),
879
+ "platform": platform.platform(),
880
+ "python_version": platform.python_version(),
881
+ }
882
+
883
+ # Build update payload
884
+ update_payload = {}
885
+
886
+ if input.status:
887
+ update_payload["status"] = input.status
888
+
889
+ if input.started_at:
890
+ update_payload["started_at"] = input.started_at
891
+
892
+ if input.completed_at:
893
+ update_payload["completed_at"] = input.completed_at
894
+
895
+ if input.response is not None:
896
+ update_payload["response"] = input.response
897
+
898
+ if input.error_message is not None:
899
+ update_payload["error_message"] = input.error_message
900
+
901
+ if input.usage:
902
+ update_payload["usage"] = input.usage
903
+
904
+ # Merge worker info into execution_metadata
905
+ execution_metadata = input.execution_metadata or {}
906
+ if not execution_metadata.get("worker_info"):
907
+ execution_metadata["worker_info"] = worker_info
908
+ update_payload["execution_metadata"] = execution_metadata
909
+
910
+ # Call Control Plane API
911
+ async with httpx.AsyncClient(timeout=30.0) as client:
912
+ response = await client.patch(
913
+ f"{control_plane_url}/api/v1/executions/{input.execution_id}",
914
+ json=update_payload,
915
+ headers={
916
+ "Authorization": f"Bearer {kubiya_api_key}",
917
+ "Content-Type": "application/json",
918
+ }
919
+ )
920
+
921
+ if response.status_code == 404:
922
+ raise Exception(f"Execution not found: {input.execution_id}")
923
+ elif response.status_code != 200:
924
+ raise Exception(f"Failed to update execution: {response.status_code} - {response.text}")
925
+
926
+ print(f"✅ Status updated successfully: {input.status}\n")
927
+
928
+ activity.logger.info(
929
+ f"Execution status updated via API",
930
+ extra={
931
+ "execution_id": input.execution_id,
932
+ "status": input.status,
933
+ }
934
+ )
935
+
936
+ return {"success": True}
937
+
938
+ except Exception as e:
939
+ print(f"❌ Failed to update status: {str(e)}\n")
940
+
941
+ activity.logger.error(
942
+ f"Failed to update execution status",
943
+ extra={
944
+ "execution_id": input.execution_id,
945
+ "error": str(e),
946
+ }
947
+ )
948
+ raise
949
+
950
+
951
+ @activity.defn
952
+ async def update_agent_status(input: ActivityUpdateAgentInput) -> dict:
953
+ """
954
+ Update agent status in database via Control Plane API.
955
+
956
+ This activity calls the Control Plane API to update agent records.
957
+
958
+ Args:
959
+ input: Activity input with update details
960
+
961
+ Returns:
962
+ Dict with success flag
963
+ """
964
+ activity.logger.info(
965
+ f"Updating agent status via Control Plane API",
966
+ extra={
967
+ "agent_id": input.agent_id,
968
+ "status": input.status,
969
+ }
970
+ )
971
+
972
+ try:
973
+ # Get Control Plane URL and Kubiya API key from environment
974
+ control_plane_url = os.getenv("CONTROL_PLANE_URL")
975
+ kubiya_api_key = os.getenv("KUBIYA_API_KEY")
976
+
977
+ if not control_plane_url:
978
+ raise ValueError("CONTROL_PLANE_URL environment variable not set")
979
+ if not kubiya_api_key:
980
+ raise ValueError("KUBIYA_API_KEY environment variable not set")
981
+
982
+ # Build update payload
983
+ update_payload = {
984
+ "status": input.status,
985
+ "last_active_at": input.last_active_at,
986
+ }
987
+
988
+ if input.error_message is not None:
989
+ update_payload["error_message"] = input.error_message
990
+
991
+ if input.state:
992
+ update_payload["state"] = input.state
993
+
994
+ # Call Control Plane API
995
+ async with httpx.AsyncClient(timeout=30.0) as client:
996
+ response = await client.patch(
997
+ f"{control_plane_url}/api/v1/agents/{input.agent_id}",
998
+ json=update_payload,
999
+ headers={
1000
+ "Authorization": f"Bearer {kubiya_api_key}",
1001
+ "Content-Type": "application/json",
1002
+ }
1003
+ )
1004
+
1005
+ # For team executions, the "agent_id" is actually a team_id, so it won't be found in agents table
1006
+ # This is expected and not an error - just log and return success
1007
+ if response.status_code == 404:
1008
+ activity.logger.info(
1009
+ f"Agent not found (likely a team execution) - skipping agent status update",
1010
+ extra={
1011
+ "agent_id": input.agent_id,
1012
+ "status": input.status,
1013
+ }
1014
+ )
1015
+ return {"success": True, "skipped": True}
1016
+ elif response.status_code != 200:
1017
+ raise Exception(f"Failed to update agent: {response.status_code} - {response.text}")
1018
+
1019
+ activity.logger.info(
1020
+ f"Agent status updated via API",
1021
+ extra={
1022
+ "agent_id": input.agent_id,
1023
+ "status": input.status,
1024
+ }
1025
+ )
1026
+
1027
+ return {"success": True}
1028
+
1029
+ except Exception as e:
1030
+ activity.logger.error(
1031
+ f"Failed to update agent status",
1032
+ extra={
1033
+ "agent_id": input.agent_id,
1034
+ "error": str(e),
1035
+ }
1036
+ )
1037
+ raise
1038
+
1039
+
1040
+ @dataclass
1041
+ class ActivityCancelAgentInput:
1042
+ execution_id: str
1043
+
1044
+
1045
+ @activity.defn(name="cancel_agent_run")
1046
+ async def cancel_agent_run(input: ActivityCancelAgentInput) -> dict:
1047
+ """
1048
+ Cancel an active agent/team run using Agno's cancel_run API.
1049
+
1050
+ This is called when a user clicks the STOP button in the UI.
1051
+ Uses the global registry to find the Agent instance and run_id,
1052
+ then calls agent.cancel_run(run_id) to stop execution.
1053
+
1054
+ Args:
1055
+ input: Contains execution_id to identify which run to cancel
1056
+
1057
+ Returns:
1058
+ Dict with success status and cancellation details
1059
+ """
1060
+ print("\n" + "="*80)
1061
+ print("🛑 CANCEL AGENT RUN")
1062
+ print("="*80)
1063
+ print(f"Execution ID: {input.execution_id}\n")
1064
+
1065
+ try:
1066
+ # Look up agent in registry
1067
+ if input.execution_id not in _active_agents:
1068
+ print(f"⚠️ Agent not found in registry - may have already completed")
1069
+ activity.logger.warning(
1070
+ "cancel_agent_not_found",
1071
+ extra={"execution_id": input.execution_id}
1072
+ )
1073
+ return {
1074
+ "success": False,
1075
+ "error": "Agent not found or already completed",
1076
+ "execution_id": input.execution_id,
1077
+ }
1078
+
1079
+ agent_info = _active_agents[input.execution_id]
1080
+ agent = agent_info["agent"]
1081
+ run_id = agent_info.get("run_id")
1082
+
1083
+ if not run_id:
1084
+ print(f"⚠️ No run_id found - execution may not have started yet")
1085
+ return {
1086
+ "success": False,
1087
+ "error": "Execution not started yet",
1088
+ "execution_id": input.execution_id,
1089
+ }
1090
+
1091
+ print(f"🆔 Found run_id: {run_id}")
1092
+ print(f"🛑 Calling agent.cancel_run()...")
1093
+
1094
+ # Call Agno's cancel_run API
1095
+ success = agent.cancel_run(run_id)
1096
+
1097
+ if success:
1098
+ print(f"✅ Agent run cancelled successfully!\n")
1099
+ activity.logger.info(
1100
+ "agent_run_cancelled",
1101
+ extra={
1102
+ "execution_id": input.execution_id,
1103
+ "run_id": run_id,
1104
+ }
1105
+ )
1106
+
1107
+ # Clean up registry
1108
+ del _active_agents[input.execution_id]
1109
+
1110
+ return {
1111
+ "success": True,
1112
+ "execution_id": input.execution_id,
1113
+ "run_id": run_id,
1114
+ "cancelled_at": datetime.now(timezone.utc).isoformat(),
1115
+ }
1116
+ else:
1117
+ print(f"⚠️ Cancel failed - run may have already completed\n")
1118
+ return {
1119
+ "success": False,
1120
+ "error": "Cancel failed - run may be completed",
1121
+ "execution_id": input.execution_id,
1122
+ "run_id": run_id,
1123
+ }
1124
+
1125
+ except Exception as e:
1126
+ print(f"❌ Error cancelling run: {str(e)}\n")
1127
+ activity.logger.error(
1128
+ "cancel_agent_error",
1129
+ extra={
1130
+ "execution_id": input.execution_id,
1131
+ "error": str(e),
1132
+ }
1133
+ )
1134
+ return {
1135
+ "success": False,
1136
+ "error": str(e),
1137
+ "execution_id": input.execution_id,
1138
+ }
1139
+
1140
+
1141
+ @dataclass
1142
+ class ActivityPersistConversationInput:
1143
+ """Input for persisting conversation history"""
1144
+ execution_id: str
1145
+ session_id: str
1146
+ messages: List[Dict[str, Any]]
1147
+ user_id: Optional[str] = None
1148
+ metadata: Optional[Dict[str, Any]] = None
1149
+
1150
+
1151
+ @activity.defn(name="persist_conversation_history")
1152
+ async def persist_conversation_history(input: ActivityPersistConversationInput) -> dict:
1153
+ """
1154
+ Persist conversation history to Control Plane after each turn.
1155
+
1156
+ This ensures conversation state is saved end-to-end, making it available:
1157
+ - For future turns in the same conversation
1158
+ - For UI display and history views
1159
+ - For analytics and debugging
1160
+ - Even if the worker crashes or restarts
1161
+
1162
+ The Control Plane stores this in the database and caches it in Redis
1163
+ for fast retrieval on subsequent turns.
1164
+
1165
+ Args:
1166
+ input: Contains execution_id, session_id, messages, and optional metadata
1167
+
1168
+ Returns:
1169
+ Dict with success status and persistence details
1170
+ """
1171
+ execution_id_short = input.execution_id[:8] if input.execution_id else "unknown"
1172
+
1173
+ activity.logger.info(
1174
+ "persisting_conversation",
1175
+ extra={
1176
+ "execution_id": execution_id_short,
1177
+ "session_id": input.session_id[:8] if input.session_id else "none",
1178
+ "message_count": len(input.messages),
1179
+ }
1180
+ )
1181
+
1182
+ try:
1183
+ # Get Control Plane client
1184
+ control_plane = get_control_plane_client()
1185
+
1186
+ # Persist conversation via Control Plane API
1187
+ success = control_plane.persist_session(
1188
+ execution_id=input.execution_id,
1189
+ session_id=input.session_id or input.execution_id,
1190
+ user_id=input.user_id,
1191
+ messages=input.messages,
1192
+ metadata=input.metadata or {}
1193
+ )
1194
+
1195
+ if success:
1196
+ activity.logger.info(
1197
+ "conversation_persisted",
1198
+ extra={
1199
+ "execution_id": execution_id_short,
1200
+ "message_count": len(input.messages),
1201
+ }
1202
+ )
1203
+ return {
1204
+ "success": True,
1205
+ "execution_id": input.execution_id,
1206
+ "message_count": len(input.messages),
1207
+ "persisted_at": datetime.now(timezone.utc).isoformat(),
1208
+ }
1209
+ else:
1210
+ activity.logger.warning(
1211
+ "conversation_persistence_failed",
1212
+ extra={
1213
+ "execution_id": execution_id_short,
1214
+ }
1215
+ )
1216
+ return {
1217
+ "success": False,
1218
+ "error": "Control Plane API returned failure",
1219
+ "execution_id": input.execution_id,
1220
+ }
1221
+
1222
+ except Exception as e:
1223
+ error_type = type(e).__name__
1224
+ error_msg = str(e) if str(e) else "No error message provided"
1225
+
1226
+ activity.logger.error(
1227
+ "conversation_persistence_error",
1228
+ extra={
1229
+ "execution_id": execution_id_short,
1230
+ "error_type": error_type,
1231
+ "error": error_msg[:500], # Truncate very long errors
1232
+ "message_count": len(input.messages),
1233
+ },
1234
+ exc_info=True,
1235
+ )
1236
+ return {
1237
+ "success": False,
1238
+ "error": f"{error_type}: {error_msg}",
1239
+ "error_type": error_type,
1240
+ "execution_id": input.execution_id,
1241
+ }