kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubiya-control-plane-api might be problematic. Click here for more details.

Files changed (185) hide show
  1. control_plane_api/README.md +266 -0
  2. control_plane_api/__init__.py +0 -0
  3. control_plane_api/__version__.py +1 -0
  4. control_plane_api/alembic/README +1 -0
  5. control_plane_api/alembic/env.py +98 -0
  6. control_plane_api/alembic/script.py.mako +28 -0
  7. control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
  8. control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
  9. control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
  10. control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
  11. control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
  12. control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
  13. control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
  14. control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
  15. control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
  16. control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
  17. control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
  18. control_plane_api/alembic.ini +148 -0
  19. control_plane_api/api/index.py +12 -0
  20. control_plane_api/app/__init__.py +11 -0
  21. control_plane_api/app/activities/__init__.py +20 -0
  22. control_plane_api/app/activities/agent_activities.py +379 -0
  23. control_plane_api/app/activities/team_activities.py +410 -0
  24. control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
  25. control_plane_api/app/config/__init__.py +35 -0
  26. control_plane_api/app/config/api_config.py +354 -0
  27. control_plane_api/app/config/model_pricing.py +318 -0
  28. control_plane_api/app/config.py +95 -0
  29. control_plane_api/app/database.py +135 -0
  30. control_plane_api/app/exceptions.py +408 -0
  31. control_plane_api/app/lib/__init__.py +11 -0
  32. control_plane_api/app/lib/job_executor.py +312 -0
  33. control_plane_api/app/lib/kubiya_client.py +235 -0
  34. control_plane_api/app/lib/litellm_pricing.py +166 -0
  35. control_plane_api/app/lib/planning_tools/__init__.py +22 -0
  36. control_plane_api/app/lib/planning_tools/agents.py +155 -0
  37. control_plane_api/app/lib/planning_tools/base.py +189 -0
  38. control_plane_api/app/lib/planning_tools/environments.py +214 -0
  39. control_plane_api/app/lib/planning_tools/resources.py +240 -0
  40. control_plane_api/app/lib/planning_tools/teams.py +198 -0
  41. control_plane_api/app/lib/policy_enforcer_client.py +939 -0
  42. control_plane_api/app/lib/redis_client.py +436 -0
  43. control_plane_api/app/lib/supabase.py +71 -0
  44. control_plane_api/app/lib/temporal_client.py +138 -0
  45. control_plane_api/app/lib/validation/__init__.py +20 -0
  46. control_plane_api/app/lib/validation/runtime_validation.py +287 -0
  47. control_plane_api/app/main.py +128 -0
  48. control_plane_api/app/middleware/__init__.py +8 -0
  49. control_plane_api/app/middleware/auth.py +513 -0
  50. control_plane_api/app/middleware/exception_handler.py +267 -0
  51. control_plane_api/app/middleware/rate_limiting.py +384 -0
  52. control_plane_api/app/middleware/request_id.py +202 -0
  53. control_plane_api/app/models/__init__.py +27 -0
  54. control_plane_api/app/models/agent.py +79 -0
  55. control_plane_api/app/models/analytics.py +206 -0
  56. control_plane_api/app/models/associations.py +81 -0
  57. control_plane_api/app/models/environment.py +63 -0
  58. control_plane_api/app/models/execution.py +93 -0
  59. control_plane_api/app/models/job.py +179 -0
  60. control_plane_api/app/models/llm_model.py +75 -0
  61. control_plane_api/app/models/presence.py +49 -0
  62. control_plane_api/app/models/project.py +47 -0
  63. control_plane_api/app/models/session.py +38 -0
  64. control_plane_api/app/models/team.py +66 -0
  65. control_plane_api/app/models/workflow.py +55 -0
  66. control_plane_api/app/policies/README.md +121 -0
  67. control_plane_api/app/policies/approved_users.rego +62 -0
  68. control_plane_api/app/policies/business_hours.rego +51 -0
  69. control_plane_api/app/policies/rate_limiting.rego +100 -0
  70. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  71. control_plane_api/app/routers/__init__.py +4 -0
  72. control_plane_api/app/routers/agents.py +364 -0
  73. control_plane_api/app/routers/agents_v2.py +1260 -0
  74. control_plane_api/app/routers/analytics.py +1014 -0
  75. control_plane_api/app/routers/context_manager.py +562 -0
  76. control_plane_api/app/routers/environment_context.py +270 -0
  77. control_plane_api/app/routers/environments.py +715 -0
  78. control_plane_api/app/routers/execution_environment.py +517 -0
  79. control_plane_api/app/routers/executions.py +1911 -0
  80. control_plane_api/app/routers/health.py +92 -0
  81. control_plane_api/app/routers/health_v2.py +326 -0
  82. control_plane_api/app/routers/integrations.py +274 -0
  83. control_plane_api/app/routers/jobs.py +1344 -0
  84. control_plane_api/app/routers/models.py +82 -0
  85. control_plane_api/app/routers/models_v2.py +361 -0
  86. control_plane_api/app/routers/policies.py +639 -0
  87. control_plane_api/app/routers/presence.py +234 -0
  88. control_plane_api/app/routers/projects.py +902 -0
  89. control_plane_api/app/routers/runners.py +379 -0
  90. control_plane_api/app/routers/runtimes.py +172 -0
  91. control_plane_api/app/routers/secrets.py +155 -0
  92. control_plane_api/app/routers/skills.py +1001 -0
  93. control_plane_api/app/routers/skills_definitions.py +140 -0
  94. control_plane_api/app/routers/task_planning.py +1256 -0
  95. control_plane_api/app/routers/task_queues.py +654 -0
  96. control_plane_api/app/routers/team_context.py +270 -0
  97. control_plane_api/app/routers/teams.py +1400 -0
  98. control_plane_api/app/routers/worker_queues.py +1545 -0
  99. control_plane_api/app/routers/workers.py +935 -0
  100. control_plane_api/app/routers/workflows.py +204 -0
  101. control_plane_api/app/runtimes/__init__.py +6 -0
  102. control_plane_api/app/runtimes/validation.py +344 -0
  103. control_plane_api/app/schemas/job_schemas.py +295 -0
  104. control_plane_api/app/services/__init__.py +1 -0
  105. control_plane_api/app/services/agno_service.py +619 -0
  106. control_plane_api/app/services/litellm_service.py +190 -0
  107. control_plane_api/app/services/policy_service.py +525 -0
  108. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  109. control_plane_api/app/skills/__init__.py +44 -0
  110. control_plane_api/app/skills/base.py +229 -0
  111. control_plane_api/app/skills/business_intelligence.py +189 -0
  112. control_plane_api/app/skills/data_visualization.py +154 -0
  113. control_plane_api/app/skills/docker.py +104 -0
  114. control_plane_api/app/skills/file_generation.py +94 -0
  115. control_plane_api/app/skills/file_system.py +110 -0
  116. control_plane_api/app/skills/python.py +92 -0
  117. control_plane_api/app/skills/registry.py +65 -0
  118. control_plane_api/app/skills/shell.py +102 -0
  119. control_plane_api/app/skills/workflow_executor.py +469 -0
  120. control_plane_api/app/utils/workflow_executor.py +354 -0
  121. control_plane_api/app/workflows/__init__.py +11 -0
  122. control_plane_api/app/workflows/agent_execution.py +507 -0
  123. control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
  124. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  125. control_plane_api/app/workflows/team_execution.py +399 -0
  126. control_plane_api/scripts/seed_models.py +239 -0
  127. control_plane_api/worker/__init__.py +0 -0
  128. control_plane_api/worker/activities/__init__.py +0 -0
  129. control_plane_api/worker/activities/agent_activities.py +1241 -0
  130. control_plane_api/worker/activities/approval_activities.py +234 -0
  131. control_plane_api/worker/activities/runtime_activities.py +388 -0
  132. control_plane_api/worker/activities/skill_activities.py +267 -0
  133. control_plane_api/worker/activities/team_activities.py +1217 -0
  134. control_plane_api/worker/config/__init__.py +31 -0
  135. control_plane_api/worker/config/worker_config.py +275 -0
  136. control_plane_api/worker/control_plane_client.py +529 -0
  137. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  138. control_plane_api/worker/models/__init__.py +1 -0
  139. control_plane_api/worker/models/inputs.py +89 -0
  140. control_plane_api/worker/runtimes/__init__.py +31 -0
  141. control_plane_api/worker/runtimes/base.py +789 -0
  142. control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
  143. control_plane_api/worker/runtimes/default_runtime.py +617 -0
  144. control_plane_api/worker/runtimes/factory.py +173 -0
  145. control_plane_api/worker/runtimes/validation.py +93 -0
  146. control_plane_api/worker/services/__init__.py +1 -0
  147. control_plane_api/worker/services/agent_executor.py +422 -0
  148. control_plane_api/worker/services/agent_executor_v2.py +383 -0
  149. control_plane_api/worker/services/analytics_collector.py +457 -0
  150. control_plane_api/worker/services/analytics_service.py +464 -0
  151. control_plane_api/worker/services/approval_tools.py +310 -0
  152. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  153. control_plane_api/worker/services/cancellation_manager.py +177 -0
  154. control_plane_api/worker/services/data_visualization.py +827 -0
  155. control_plane_api/worker/services/jira_tools.py +257 -0
  156. control_plane_api/worker/services/runtime_analytics.py +328 -0
  157. control_plane_api/worker/services/session_service.py +194 -0
  158. control_plane_api/worker/services/skill_factory.py +175 -0
  159. control_plane_api/worker/services/team_executor.py +574 -0
  160. control_plane_api/worker/services/team_executor_v2.py +465 -0
  161. control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
  162. control_plane_api/worker/tests/__init__.py +1 -0
  163. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  164. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  165. control_plane_api/worker/tests/integration/__init__.py +0 -0
  166. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  167. control_plane_api/worker/tests/unit/__init__.py +0 -0
  168. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  169. control_plane_api/worker/utils/__init__.py +1 -0
  170. control_plane_api/worker/utils/chunk_batcher.py +305 -0
  171. control_plane_api/worker/utils/retry_utils.py +60 -0
  172. control_plane_api/worker/utils/streaming_utils.py +373 -0
  173. control_plane_api/worker/worker.py +753 -0
  174. control_plane_api/worker/workflows/__init__.py +0 -0
  175. control_plane_api/worker/workflows/agent_execution.py +589 -0
  176. control_plane_api/worker/workflows/team_execution.py +429 -0
  177. kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
  178. kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
  179. kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
  180. kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
  181. kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
  182. kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
  183. kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
  184. {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
  185. {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1256 @@
1
+ """
2
+ Task Planning Router - AI-powered task analysis and planning using Agno
3
+ """
4
+
5
+ from fastapi import APIRouter, HTTPException, status, Depends
6
+ from fastapi.responses import StreamingResponse
7
+ from pydantic import BaseModel, Field, field_validator
8
+ from typing import List, Dict, Optional, Literal, AsyncIterator
9
+ import structlog
10
+ from agno.agent import Agent
11
+ from agno.models.litellm import LiteLLM
12
+ from agno.tools.reasoning import ReasoningTools
13
+ from agno.workflow import Workflow, Step
14
+ from agno.run.workflow import WorkflowRunOutput
15
+ import os
16
+ import traceback
17
+ import json
18
+ import asyncio
19
+
20
+ from control_plane_api.app.lib.litellm_pricing import get_litellm_pricing, get_model_display_name
21
+ from control_plane_api.app.lib.planning_tools import (
22
+ AgentsContextTools,
23
+ TeamsContextTools,
24
+ EnvironmentsContextTools,
25
+ ResourcesContextTools,
26
+ )
27
+
28
+ router = APIRouter()
29
+ logger = structlog.get_logger()
30
+
31
+
32
+ # Request/Response Models
33
+ class AgentInfo(BaseModel):
34
+ """Information about an agent"""
35
+ id: str
36
+ name: str
37
+ model_id: str
38
+ description: Optional[str] = None
39
+
40
+ @field_validator('description', mode='before')
41
+ @classmethod
42
+ def empty_str_to_none(cls, v):
43
+ """Convert empty string to None for optional fields"""
44
+ if v == '':
45
+ return None
46
+ return v
47
+
48
+ @field_validator('model_id', mode='before')
49
+ @classmethod
50
+ def default_model(cls, v):
51
+ """Provide default model if empty"""
52
+ if not v or v == '':
53
+ return 'claude-sonnet-4'
54
+ return v
55
+
56
+
57
+ class TeamInfo(BaseModel):
58
+ """Information about a team"""
59
+ id: str
60
+ name: str
61
+ agents: List[Dict] = []
62
+ description: Optional[str] = None
63
+
64
+ @field_validator('description', mode='before')
65
+ @classmethod
66
+ def empty_str_to_none(cls, v):
67
+ """Convert empty string to None for optional fields"""
68
+ if v == '':
69
+ return None
70
+ return v
71
+
72
+
73
+ class EnvironmentInfo(BaseModel):
74
+ """Information about an execution environment"""
75
+ id: str
76
+ name: str
77
+ type: Optional[str] = "production"
78
+ status: Optional[str] = "active"
79
+
80
+
81
+ class WorkerQueueInfo(BaseModel):
82
+ """Information about a worker queue"""
83
+ id: str
84
+ name: str
85
+ environment_id: Optional[str] = None
86
+ active_workers: int = 0
87
+ status: Optional[str] = "active"
88
+
89
+
90
+ class TaskPlanRequest(BaseModel):
91
+ """Request to plan a task"""
92
+ description: str = Field(..., description="Task description")
93
+ priority: Literal['low', 'medium', 'high', 'critical'] = Field('medium', description="Task priority")
94
+ project_id: Optional[str] = Field(None, description="Associated project ID")
95
+ agents: List[AgentInfo] = Field([], description="Available agents")
96
+ teams: List[TeamInfo] = Field([], description="Available teams")
97
+ environments: List[EnvironmentInfo] = Field([], description="Available execution environments")
98
+ worker_queues: List[WorkerQueueInfo] = Field([], description="Available worker queues")
99
+ refinement_feedback: Optional[str] = Field(None, description="User feedback for plan refinement")
100
+ conversation_context: Optional[str] = Field(None, description="Conversation history for context")
101
+ previous_plan: Optional[Dict] = Field(None, description="Previous plan for refinement")
102
+ iteration: int = Field(1, description="Planning iteration number")
103
+
104
+
105
+ class ComplexityInfo(BaseModel):
106
+ """Task complexity assessment"""
107
+ story_points: int = Field(..., ge=1, le=21, description="Story points (1-21)")
108
+ confidence: Literal['low', 'medium', 'high'] = Field(..., description="Confidence level")
109
+ reasoning: str = Field(..., description="Reasoning for complexity assessment")
110
+
111
+
112
+ class AgentModelInfo(BaseModel):
113
+ """Information about the model an agent will use"""
114
+ model_id: str # e.g., "claude-sonnet-4", "gpt-4o"
115
+ estimated_input_tokens: int
116
+ estimated_output_tokens: int
117
+ cost_per_1k_input_tokens: float
118
+ cost_per_1k_output_tokens: float
119
+ total_model_cost: float
120
+
121
+
122
+ class ToolUsageInfo(BaseModel):
123
+ """Expected tool usage for an agent"""
124
+ tool_name: str # e.g., "aws_s3", "kubectl", "bash"
125
+ estimated_calls: int
126
+ cost_per_call: float
127
+ total_tool_cost: float
128
+
129
+
130
+ class TeamBreakdownItem(BaseModel):
131
+ """Breakdown of work for a specific team/agent"""
132
+ team_id: Optional[str] = None
133
+ team_name: str
134
+ agent_id: Optional[str] = None
135
+ agent_name: Optional[str] = None
136
+ responsibilities: List[str]
137
+ estimated_time_hours: float
138
+ model_info: Optional[AgentModelInfo] = None
139
+ expected_tools: List[ToolUsageInfo] = []
140
+ agent_cost: float = 0.0 # Total cost for this agent (model + tools)
141
+
142
+
143
+ class RecommendedExecution(BaseModel):
144
+ """AI recommendation for which entity should execute the task"""
145
+ entity_type: Literal['agent', 'team']
146
+ entity_id: str
147
+ entity_name: str
148
+ reasoning: str
149
+ recommended_environment_id: Optional[str] = None
150
+ recommended_environment_name: Optional[str] = None
151
+ recommended_worker_queue_id: Optional[str] = None
152
+ recommended_worker_queue_name: Optional[str] = None
153
+ execution_reasoning: Optional[str] = None
154
+
155
+
156
+ class LLMCostBreakdown(BaseModel):
157
+ """Detailed LLM cost breakdown by model"""
158
+ model_id: str
159
+ estimated_input_tokens: int
160
+ estimated_output_tokens: int
161
+ cost_per_1k_input_tokens: float
162
+ cost_per_1k_output_tokens: float
163
+ total_cost: float
164
+
165
+
166
+ class ToolCostBreakdown(BaseModel):
167
+ """Tool execution cost breakdown"""
168
+ category: str # e.g., "AWS APIs", "Database Queries", "External APIs"
169
+ tools: List[ToolUsageInfo]
170
+ category_total: float
171
+
172
+
173
+ class RuntimeCostBreakdown(BaseModel):
174
+ """Runtime and compute costs"""
175
+ worker_execution_hours: float
176
+ cost_per_hour: float
177
+ total_cost: float
178
+
179
+
180
+ class CostBreakdownItem(BaseModel):
181
+ """Individual cost breakdown item (legacy, kept for backwards compatibility)"""
182
+ item: str
183
+ cost: float
184
+
185
+
186
+ class HumanResourceCost(BaseModel):
187
+ """Human resource cost breakdown by role"""
188
+ role: str # e.g., "Senior DevOps Engineer", "Security Engineer"
189
+ hourly_rate: float # e.g., 150.00
190
+ estimated_hours: float # e.g., 8.0
191
+ total_cost: float # e.g., 1200.00
192
+
193
+
194
+ class CostEstimate(BaseModel):
195
+ """Enhanced cost estimation for the task"""
196
+ estimated_cost_usd: float
197
+ # Legacy breakdown (keep for backwards compatibility)
198
+ breakdown: List[CostBreakdownItem] = []
199
+ # New detailed breakdowns
200
+ llm_costs: List[LLMCostBreakdown] = []
201
+ tool_costs: List[ToolCostBreakdown] = []
202
+ runtime_cost: Optional[RuntimeCostBreakdown] = None
203
+
204
+
205
+ class RealizedSavings(BaseModel):
206
+ """Realized savings by using Kubiya orchestration platform"""
207
+ # Without Kubiya (manual execution)
208
+ without_kubiya_cost: float # Total cost if done manually
209
+ without_kubiya_hours: float # Total time if done manually
210
+ without_kubiya_resources: List[HumanResourceCost] # Resource breakdown
211
+
212
+ # With Kubiya (AI orchestration)
213
+ with_kubiya_cost: float # AI execution cost
214
+ with_kubiya_hours: float # AI execution time
215
+
216
+ # Realized Savings
217
+ money_saved: float # Dollars saved
218
+ time_saved_hours: float # Hours saved
219
+ time_saved_percentage: int # Percentage of time saved
220
+
221
+ # Summary
222
+ savings_summary: str # Compelling savings narrative
223
+
224
+
225
+ class TaskPlanResponse(BaseModel):
226
+ """AI-generated task plan"""
227
+ title: str
228
+ summary: str
229
+ complexity: ComplexityInfo
230
+ team_breakdown: List[TeamBreakdownItem]
231
+ recommended_execution: RecommendedExecution
232
+ cost_estimate: CostEstimate
233
+ realized_savings: RealizedSavings
234
+ risks: List[str] = []
235
+ prerequisites: List[str] = []
236
+ success_criteria: List[str] = []
237
+ # Optional fields for when AI needs clarification
238
+ has_questions: bool = False
239
+ questions: Optional[str] = None
240
+
241
+
242
+ def _infer_agent_specialty(name: str, description: Optional[str]) -> str:
243
+ """
244
+ Infer agent specialty from name and description for better context.
245
+ """
246
+ name_lower = name.lower()
247
+ desc_lower = (description or "").lower()
248
+
249
+ # Check for specific specialties
250
+ if "devops" in name_lower or "devops" in desc_lower:
251
+ return "Infrastructure, deployments, cloud operations, monitoring"
252
+ elif "security" in name_lower or "ciso" in name_lower or "security" in desc_lower:
253
+ return "Security audits, compliance, vulnerability scanning, IAM"
254
+ elif "data" in name_lower or "analytics" in desc_lower:
255
+ return "Data analysis, ETL, reporting, database operations"
256
+ elif "backend" in name_lower or "api" in desc_lower:
257
+ return "API development, backend services, database integration"
258
+ elif "frontend" in name_lower or "ui" in desc_lower:
259
+ return "UI development, React/Vue/Angular, responsive design"
260
+ elif "full" in name_lower or "fullstack" in name_lower:
261
+ return "End-to-end development, frontend + backend + infrastructure"
262
+ elif "test" in name_lower or "qa" in desc_lower:
263
+ return "Testing, quality assurance, test automation"
264
+ else:
265
+ return "General automation, scripting, API integration, cloud operations"
266
+
267
+
268
+ def create_planning_agent(organization_id: Optional[str] = None) -> Agent:
269
+ """
270
+ Create an Agno agent for task planning using LiteLLM with context tools
271
+
272
+ Args:
273
+ organization_id: Optional organization ID for filtering resources
274
+ """
275
+ # Get LiteLLM configuration
276
+ litellm_api_url = (
277
+ os.getenv("LITELLM_API_URL") or
278
+ os.getenv("LITELLM_API_BASE") or
279
+ "https://llm-proxy.kubiya.ai"
280
+ ).strip()
281
+
282
+ litellm_api_key = os.getenv("LITELLM_API_KEY", "").strip()
283
+
284
+ if not litellm_api_key:
285
+ raise ValueError("LITELLM_API_KEY environment variable not set")
286
+
287
+ model = os.getenv("LITELLM_DEFAULT_MODEL", "kubiya/claude-sonnet-4").strip()
288
+
289
+ # Get control plane URL for tools
290
+ control_plane_url = os.getenv("CONTROL_PLANE_API_URL", "http://localhost:8000")
291
+
292
+ logger.info(
293
+ "creating_agno_planning_agent_with_tools",
294
+ litellm_api_url=litellm_api_url,
295
+ model=model,
296
+ has_api_key=bool(litellm_api_key),
297
+ control_plane_url=control_plane_url,
298
+ organization_id=organization_id,
299
+ )
300
+
301
+ # Initialize context tools
302
+ agents_tools = AgentsContextTools(base_url=control_plane_url, organization_id=organization_id)
303
+ teams_tools = TeamsContextTools(base_url=control_plane_url, organization_id=organization_id)
304
+ environments_tools = EnvironmentsContextTools(base_url=control_plane_url, organization_id=organization_id)
305
+ resources_tools = ResourcesContextTools(base_url=control_plane_url, organization_id=organization_id)
306
+
307
+ # Create fast planning agent optimized for speed
308
+ planning_agent = Agent(
309
+ name="Task Planning Agent",
310
+ role="Expert project manager and task planner",
311
+ model=LiteLLM(
312
+ id=f"openai/{model}",
313
+ api_base=litellm_api_url,
314
+ api_key=litellm_api_key,
315
+ ),
316
+ output_schema=TaskPlanResponse, # Use Pydantic model for structured output
317
+ tools=[
318
+ # Only essential context tools - no ReasoningTools for speed
319
+ agents_tools,
320
+ teams_tools,
321
+ environments_tools,
322
+ resources_tools,
323
+ ],
324
+ instructions=[
325
+ "You are a fast, efficient task planning agent.",
326
+ "",
327
+ "**Use Tools:**",
328
+ "- Call list_agents() for available agents",
329
+ "- Call list_teams() for available teams",
330
+ "- Call list_environments() for environments",
331
+ "- Call list_worker_queues() for worker capacity",
332
+ "",
333
+ "**Plan Requirements:**",
334
+ "- Choose the best agent/team based on capabilities",
335
+ "- Consider resource availability and capacity",
336
+ "- Provide realistic time and cost estimates",
337
+ "- Match worker queues to environments when possible",
338
+ "- Select queues with available capacity (active_workers > 0)",
339
+ ],
340
+ description="Fast task planner for AI agent teams",
341
+ markdown=False,
342
+ add_history_to_context=False, # Disable for speed
343
+ retries=2, # Reduced retries
344
+ )
345
+
346
+ return planning_agent
347
+
348
+
349
+ @router.post("/tasks/plan")
350
+ async def plan_task(request: TaskPlanRequest):
351
+ """
352
+ Generate an AI-powered task plan using Agno workflow
353
+
354
+ This endpoint:
355
+ 1. Analyzes the task description and context
356
+ 2. Assesses complexity (story points)
357
+ 3. Recommends which agent/team should execute
358
+ 4. Breaks down work by team
359
+ 5. Estimates costs and time savings
360
+ 6. Identifies risks and prerequisites
361
+ """
362
+ try:
363
+ logger.info(
364
+ "task_planning_requested",
365
+ description=request.description[:100],
366
+ priority=request.priority,
367
+ agents_count=len(request.agents),
368
+ teams_count=len(request.teams),
369
+ iteration=request.iteration,
370
+ has_conversation_context=bool(request.conversation_context and request.conversation_context.strip()),
371
+ has_refinement_feedback=bool(request.refinement_feedback),
372
+ )
373
+
374
+ # Validate we have agents or teams
375
+ if not request.agents and not request.teams:
376
+ raise HTTPException(
377
+ status_code=status.HTTP_400_BAD_REQUEST,
378
+ detail="At least one agent or team must be provided"
379
+ )
380
+
381
+ # Fetch LiteLLM pricing data for accurate cost estimation
382
+ logger.info("fetching_litellm_pricing_data")
383
+ pricing_data = await get_litellm_pricing()
384
+ logger.info("litellm_pricing_data_fetched", models_available=len(pricing_data))
385
+
386
+ # Create enhanced context for the AI
387
+ agents_context = "\n".join([
388
+ f"- **{a.name}** (ID: `{a.id}`)\n"
389
+ f" - **Model**: {a.model_id}\n"
390
+ f" - **Capabilities**: {a.description or 'General-purpose AI agent with code execution, API calls, and automation capabilities'}\n"
391
+ f" - **Best For**: {_infer_agent_specialty(a.name, a.description)}"
392
+ for a in request.agents
393
+ ])
394
+
395
+ teams_context = "\n".join([
396
+ f"- **{t.name}** (ID: `{t.id}`)\n"
397
+ f" - **Team Size**: {len(t.agents)} agents\n"
398
+ f" - **Description**: {t.description or 'Cross-functional team capable of handling complex multi-step tasks'}\n"
399
+ f" - **Team Members**: {', '.join([agent.get('name', 'Agent') for agent in t.agents[:3]])}{'...' if len(t.agents) > 3 else ''}\n"
400
+ f" - **Best For**: Multi-domain tasks requiring coordination, full-stack development, complex workflows"
401
+ for t in request.teams
402
+ ])
403
+
404
+ # Add execution environments context
405
+ environments_context = "\n".join([
406
+ f"- **{e.name}** (ID: `{e.id}`)\n"
407
+ f" - **Type**: {e.type}\n"
408
+ f" - **Status**: {e.status}"
409
+ for e in request.environments
410
+ ]) if request.environments else "No execution environments specified"
411
+
412
+ # Add worker queues context
413
+ worker_queues_context = "\n".join([
414
+ f"- **{q.name}** (ID: `{q.id}`)\n"
415
+ f" - **Environment**: {q.environment_id or 'Not specified'}\n"
416
+ f" - **Active Workers**: {q.active_workers}\n"
417
+ f" - **Status**: {q.status}\n"
418
+ f" - **Capacity**: {'Available' if q.active_workers > 0 and q.status == 'active' else 'Limited or Inactive'}"
419
+ for q in request.worker_queues
420
+ ]) if request.worker_queues else "No worker queues specified"
421
+
422
+ # Add system capabilities context
423
+ system_capabilities = """
424
+ **Available System Capabilities:**
425
+ - **Code Execution**: Python, Bash, JavaScript, and other languages
426
+ - **Cloud Integrations**: AWS (S3, EC2, Lambda, RDS, CloudWatch), Azure, GCP
427
+ - **APIs & Tools**: REST APIs, GraphQL, Kubernetes, Docker, Terraform
428
+ - **Databases**: PostgreSQL, MySQL, MongoDB, Redis
429
+ - **Monitoring**: Datadog, Prometheus, Grafana, CloudWatch
430
+ - **Security**: IAM policies, security scanning, compliance checks
431
+ - **DevOps**: CI/CD pipelines, Infrastructure as Code, automation scripts
432
+ """
433
+
434
+ # Build pricing context from LiteLLM data for common models
435
+ pricing_context = """
436
+ **Model Pricing Reference** (use these for accurate cost estimates):
437
+ - **Claude Sonnet 4**: $0.003/1K input, $0.015/1K output tokens
438
+ - **Claude 3.5 Sonnet**: $0.003/1K input, $0.015/1K output tokens
439
+ - **Claude 3 Opus**: $0.015/1K input, $0.075/1K output tokens
440
+ - **Claude 3 Haiku**: $0.00025/1K input, $0.00125/1K output tokens
441
+ - **GPT-4o**: $0.0025/1K input, $0.01/1K output tokens
442
+ - **GPT-4o Mini**: $0.00015/1K input, $0.0006/1K output tokens
443
+ - **GPT-4 Turbo**: $0.01/1K input, $0.03/1K output tokens
444
+ - **Gemini 2.0 Flash**: $0.0001/1K input, $0.0003/1K output tokens
445
+ - **Gemini 1.5 Pro**: $0.00125/1K input, $0.005/1K output tokens
446
+
447
+ **Tool Cost Estimates:**
448
+ - AWS API calls: $0.0004-0.001 per call
449
+ - Database queries: $0.0001 per query
450
+ - Kubernetes operations: Free (compute cost only)
451
+ - Bash/shell commands: Free (compute cost only)
452
+
453
+ **Runtime Costs:**
454
+ - Worker execution: $0.10/hour typical
455
+ """
456
+
457
+ # Check if this is a refinement or subsequent iteration
458
+ is_refinement = request.iteration > 1 and request.refinement_feedback
459
+ has_conversation_history = bool(request.conversation_context and request.conversation_context.strip())
460
+
461
+ # After iteration 1, or if there's conversation history, be decisive
462
+ should_be_decisive = request.iteration > 1 or has_conversation_history
463
+
464
+ # Build the planning prompt
465
+ planning_prompt = f"""
466
+ # Task Planning Request - Iteration #{request.iteration}
467
+
468
+ ## Task Description
469
+ {request.description}
470
+
471
+ ## Priority
472
+ {request.priority.upper()}
473
+
474
+ {"## Previous Conversation (USE THIS CONTEXT)" if has_conversation_history else ""}
475
+ {request.conversation_context if has_conversation_history else ""}
476
+
477
+ {"## User Feedback for Refinement" if request.refinement_feedback else ""}
478
+ {request.refinement_feedback if request.refinement_feedback else ""}
479
+
480
+ {"## Previous Plan (to be refined)" if request.previous_plan else ""}
481
+ {json.dumps(request.previous_plan, indent=2) if request.previous_plan else ""}
482
+
483
+ ## Available Resources
484
+
485
+ ### Agents
486
+ {agents_context if agents_context else "No individual agents available"}
487
+
488
+ ### Teams
489
+ {teams_context if teams_context else "No teams available"}
490
+
491
+ ### Execution Environments
492
+ {environments_context}
493
+
494
+ ### Worker Queues
495
+ {worker_queues_context}
496
+
497
+ {system_capabilities}
498
+
499
+ {pricing_context}
500
+
501
+ ## Your Task
502
+
503
+ {'**BE DECISIVE**: You have conversation history showing the user has already provided context. DO NOT ask more questions. Use the information provided in the conversation history above to create a reasonable plan. Make sensible assumptions where needed and proceed with planning.' if should_be_decisive else '**FIRST ITERATION**: Review if you have enough context. ONLY ask questions if you are missing CRITICAL information that makes planning impossible (like completely unknown technology stack or domain). If the task is reasonably clear, proceed with planning and make reasonable assumptions.'}
504
+
505
+ {'**IMPORTANT**: DO NOT ask questions. The user wants a plan now. Use the conversation history above.' if should_be_decisive else 'If you need CRITICAL information to proceed, respond with:'}
506
+ {'```json' if not should_be_decisive else ''}
507
+ {'{' if not should_be_decisive else ''}
508
+ {' "has_questions": true,' if not should_be_decisive else ''}
509
+ {' "questions": "List 1-2 CRITICAL questions (not nice-to-haves). Be very selective."' if not should_be_decisive else ''}
510
+ {'}' if not should_be_decisive else ''}
511
+ {'```' if not should_be_decisive else ''}
512
+
513
+ Otherwise, analyze this task and provide a comprehensive plan in the following JSON format:
514
+
515
+ {{
516
+ "title": "Concise task title",
517
+ "summary": "2-3 sentence summary of what needs to be done",
518
+ "complexity": {{
519
+ "story_points": 5,
520
+ "confidence": "medium",
521
+ "reasoning": "Explanation of complexity assessment"
522
+ }},
523
+ "team_breakdown": [
524
+ {{
525
+ "team_id": "team-uuid-or-null",
526
+ "team_name": "Team Name or Agent Name",
527
+ "agent_id": "agent-uuid-or-null",
528
+ "agent_name": "Agent Name if individual agent",
529
+ "responsibilities": ["Task 1", "Task 2"],
530
+ "estimated_time_hours": 2.5
531
+ }}
532
+ ],
533
+ "recommended_execution": {{
534
+ "entity_type": "agent or team",
535
+ "entity_id": "uuid-of-recommended-agent-or-team",
536
+ "entity_name": "Name",
537
+ "reasoning": "Why this entity is best suited for this task",
538
+ "recommended_environment_id": "uuid-of-best-environment-or-null",
539
+ "recommended_environment_name": "Environment Name or null",
540
+ "recommended_worker_queue_id": "uuid-of-best-worker-queue-or-null",
541
+ "recommended_worker_queue_name": "Worker Queue Name or null",
542
+ "execution_reasoning": "Why this environment/queue is optimal for execution (consider capacity, type, status)"
543
+ }},
544
+ "cost_estimate": {{
545
+ "estimated_cost_usd": 1.50,
546
+ "breakdown": [
547
+ {{"item": "API calls", "cost": 1.00}},
548
+ {{"item": "Processing", "cost": 0.50}}
549
+ ]
550
+ }},
551
+ "realized_savings": {{
552
+ "without_kubiya_cost": 1440.0,
553
+ "without_kubiya_hours": 10.0,
554
+ "without_kubiya_resources": [
555
+ {{
556
+ "role": "Senior DevOps Engineer",
557
+ "hourly_rate": 150.0,
558
+ "estimated_hours": 8.0,
559
+ "total_cost": 1200.0
560
+ }},
561
+ {{
562
+ "role": "Security Engineer",
563
+ "hourly_rate": 120.0,
564
+ "estimated_hours": 2.0,
565
+ "total_cost": 240.0
566
+ }}
567
+ ],
568
+ "with_kubiya_cost": 3.75,
569
+ "with_kubiya_hours": 4.0,
570
+ "money_saved": 1436.25,
571
+ "time_saved_hours": 6.0,
572
+ "time_saved_percentage": 60,
573
+ "savings_summary": "By using Kubiya's AI orchestration, you saved $1,436 and 6 hours. Manual execution would require 10 hours of skilled engineers ($1,440), but Kubiya completes it in 4 hours for just $3.75."
574
+ }},
575
+ "risks": ["Risk 1", "Risk 2"],
576
+ "prerequisites": ["Prerequisite 1", "Prerequisite 2"],
577
+ "success_criteria": ["Criterion 1", "Criterion 2"]
578
+ }}
579
+
580
+ **Important Guidelines:**
581
+ 1. For `recommended_execution`, choose the MOST CAPABLE entity (agent or team) based on:
582
+ - Task complexity
583
+ - Agent/team capabilities and model
584
+ - Description fit
585
+ - Whether multiple agents are needed (prefer team) or single agent is sufficient
586
+ 2. The recommended entity MUST be from the available agents/teams list above
587
+ 3. For `recommended_environment_id` and `recommended_worker_queue_id`:
588
+ - Choose the BEST environment based on task requirements (production vs staging vs development)
589
+ - Choose a worker queue with AVAILABLE CAPACITY (active_workers > 0 and status = 'active')
590
+ - Match worker queue to the selected environment if possible
591
+ - Provide clear `execution_reasoning` explaining your environment/queue selection
592
+ - If no suitable queue is available, still recommend one and note the capacity concern in reasoning
593
+ 4. Use IDs exactly as provided from the lists above
594
+ 4. **CRITICAL - Enhanced Cost Breakdown**:
595
+ - **Team Breakdown**: For each agent/team member, include:
596
+ - `model_info`: Specify the model they'll use (use the model_id from agent info)
597
+ - Estimate input/output tokens based on task complexity
598
+ - Use realistic pricing: Claude Sonnet 4 ($0.003/1K in, $0.015/1K out), GPT-4o ($0.0025/1K in, $0.01/1K out)
599
+ - Calculate total_model_cost accurately
600
+ - `expected_tools`: List tools they'll use with estimated call counts
601
+ - AWS APIs: $0.0004-0.001 per call
602
+ - Database queries: $0.0001 per query
603
+ - Free tools (kubectl, bash): $0.0 per call
604
+ - `agent_cost`: Sum of model_cost + tool_costs
605
+ - **Cost Estimate**: Provide detailed breakdown:
606
+ - `llm_costs`: Array of LLM costs by model (aggregate from team breakdown)
607
+ - `tool_costs`: Categorized tool costs (AWS APIs, Database Queries, External APIs)
608
+ - `runtime_cost`: Worker execution time × cost per hour ($0.10/hr typical)
609
+ - Ensure `estimated_cost_usd` = sum of all LLM + tool + runtime costs
610
+ - Legacy `breakdown` still required for backwards compatibility
611
+ 5. **Realistic Token Estimates**:
612
+ - Simple tasks (story points 1-3): 2-5K input, 1-2K output tokens per agent
613
+ - Medium tasks (story points 5-8): 5-10K input, 2-5K output tokens per agent
614
+ - Complex tasks (story points 13-21): 10-20K input, 5-10K output tokens per agent
615
+ 6. **Tool Call Estimates**:
616
+ - Consider what APIs/tools the agent will actually use for this specific task
617
+ - Be realistic: Simple tasks might only need 5-10 API calls total
618
+ - Complex deployments might need 50+ API calls across multiple tools
619
+ 7. **CRITICAL - Realized Savings Calculation** (keep for backwards compatibility):
620
+ - **WITHOUT KUBIYA**: Calculate what it would cost using manual human execution
621
+ - Break down by SPECIFIC ROLES (e.g., "Senior DevOps Engineer", "Security Engineer")
622
+ - Use realistic hourly rates: Senior ($120-200/hr), Mid-level ($80-120/hr), Junior ($50-80/hr)
623
+ - Calculate without_kubiya_cost = sum of all human resource costs
624
+ - Estimate without_kubiya_hours = total time if done manually
625
+ - **WITH KUBIYA**: Calculate AI orchestration costs and time
626
+ - with_kubiya_cost = estimated AI execution cost (API calls, compute)
627
+ - with_kubiya_hours = estimated time for AI agents to complete
628
+ - **REALIZED SAVINGS**:
629
+ - money_saved = without_kubiya_cost - with_kubiya_cost
630
+ - time_saved_hours = without_kubiya_hours - with_kubiya_hours
631
+ - time_saved_percentage = (time_saved_hours / without_kubiya_hours) * 100
632
+ - **COMPELLING NARRATIVE**: Create savings_summary that emphasizes the concrete savings:
633
+ - "By using Kubiya, you saved $X and Y hours"
634
+ - Show the contrast: "Without Kubiya: $X (Y hours)" vs "With Kubiya: $X (Y hours)"
635
+ 6. Be specific and actionable in all fields
636
+ 7. Output ONLY valid JSON, no markdown formatting
637
+ """
638
+
639
+ # Get organization ID from agents/teams if available
640
+ organization_id = None
641
+ if request.agents and len(request.agents) > 0:
642
+ # Try to infer organization from first agent (you may want to pass this explicitly)
643
+ organization_id = getattr(request.agents[0], "organization_id", None)
644
+ elif request.teams and len(request.teams) > 0:
645
+ organization_id = getattr(request.teams[0], "organization_id", None)
646
+
647
+ # Create planning agent using LiteLLM with tools
648
+ logger.info("creating_planning_agent_with_tools", organization_id=organization_id)
649
+ planning_agent = create_planning_agent(organization_id=organization_id)
650
+ logger.info("planning_agent_created", agent_name=planning_agent.name)
651
+
652
+ # Run the agent with the planning prompt
653
+ logger.info("executing_agent_run", prompt_length=len(planning_prompt))
654
+ response = planning_agent.run(planning_prompt)
655
+ logger.info("agent_run_completed", has_content=hasattr(response, 'content'))
656
+
657
+ # With output_schema, response.content is already a TaskPlanResponse object
658
+ if not isinstance(response.content, TaskPlanResponse):
659
+ logger.error("unexpected_response_type", response_type=type(response.content).__name__)
660
+ raise HTTPException(
661
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
662
+ detail=f"Agent returned unexpected response type: {type(response.content).__name__}"
663
+ )
664
+
665
+ plan = response.content
666
+
667
+ # Check if AI is asking questions (for first iteration)
668
+ if plan.has_questions:
669
+ logger.info("task_planner_asking_questions", iteration=request.iteration)
670
+ return {
671
+ "plan": plan,
672
+ "has_questions": True,
673
+ "questions": plan.questions
674
+ }
675
+
676
+ logger.info(
677
+ "task_plan_generated",
678
+ title=plan.title,
679
+ complexity=plan.complexity.story_points,
680
+ recommended_entity=plan.recommended_execution.entity_name,
681
+ iteration=request.iteration,
682
+ is_refinement=is_refinement,
683
+ )
684
+
685
+ return {"plan": plan}
686
+
687
+ except json.JSONDecodeError as e:
688
+ logger.error("json_parse_error", error=str(e), traceback=traceback.format_exc())
689
+ raise HTTPException(
690
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
691
+ detail=f"Failed to parse AI response: {str(e)}"
692
+ )
693
+ except ValueError as e:
694
+ # Catch missing API key or other config errors
695
+ logger.error("configuration_error", error=str(e), traceback=traceback.format_exc())
696
+ raise HTTPException(
697
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
698
+ detail=f"Configuration error: {str(e)}"
699
+ )
700
+ except Exception as e:
701
+ logger.error("task_planning_error", error=str(e), error_type=type(e).__name__, traceback=traceback.format_exc())
702
+ raise HTTPException(
703
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
704
+ detail=f"Task planning failed: {str(e)}"
705
+ )
706
+
707
+
708
+ def format_sse_message(event: str, data: dict) -> str:
709
+ """Format data as Server-Sent Event message"""
710
+ return f"event: {event}\ndata: {json.dumps(data)}\n\n"
711
+
712
+
713
+ async def generate_task_plan_stream(request: TaskPlanRequest) -> AsyncIterator[str]:
714
+ """
715
+ Async generator that yields SSE events during task planning
716
+ """
717
+ try:
718
+ # Yield initial progress
719
+ yield format_sse_message("progress", {
720
+ "stage": "initializing",
721
+ "message": "🚀 Initializing AI Task Planner...",
722
+ "progress": 10
723
+ })
724
+
725
+ await asyncio.sleep(0.1) # Small delay for UX
726
+
727
+ # Validate we have agents or teams
728
+ if not request.agents and not request.teams:
729
+ yield format_sse_message("error", {
730
+ "message": "At least one agent or team must be provided"
731
+ })
732
+ return
733
+
734
+ logger.info(
735
+ "task_planning_requested_stream",
736
+ description=request.description[:100],
737
+ priority=request.priority,
738
+ agents_count=len(request.agents),
739
+ teams_count=len(request.teams),
740
+ iteration=request.iteration,
741
+ )
742
+
743
+ # Fetch LiteLLM pricing data for accurate cost estimation
744
+ logger.info("fetching_litellm_pricing_data_stream")
745
+ pricing_data = await get_litellm_pricing()
746
+ logger.info("litellm_pricing_data_fetched_stream", models_available=len(pricing_data))
747
+
748
+ # Yield context gathering progress
749
+ yield format_sse_message("progress", {
750
+ "stage": "context",
751
+ "message": "🌐 Gathering organizational context...",
752
+ "progress": 20
753
+ })
754
+
755
+ await asyncio.sleep(0.2)
756
+
757
+ # Create enhanced context for the AI (same as original endpoint)
758
+ agents_context = "\n".join([
759
+ f"- **{a.name}** (ID: `{a.id}`)\n"
760
+ f" - **Model**: {a.model_id}\n"
761
+ f" - **Capabilities**: {a.description or 'General-purpose AI agent with code execution, API calls, and automation capabilities'}\n"
762
+ f" - **Best For**: {_infer_agent_specialty(a.name, a.description)}"
763
+ for a in request.agents
764
+ ])
765
+
766
+ teams_context = "\n".join([
767
+ f"- **{t.name}** (ID: `{t.id}`)\n"
768
+ f" - **Team Size**: {len(t.agents)} agents\n"
769
+ f" - **Description**: {t.description or 'Cross-functional team capable of handling complex multi-step tasks'}\n"
770
+ f" - **Team Members**: {', '.join([agent.get('name', 'Agent') for agent in t.agents[:3]])}{'...' if len(t.agents) > 3 else ''}\n"
771
+ f" - **Best For**: Multi-domain tasks requiring coordination, full-stack development, complex workflows"
772
+ for t in request.teams
773
+ ])
774
+
775
+ # Add execution environments context
776
+ environments_context = "\n".join([
777
+ f"- **{e.name}** (ID: `{e.id}`)\n"
778
+ f" - **Type**: {e.type}\n"
779
+ f" - **Status**: {e.status}"
780
+ for e in request.environments
781
+ ]) if request.environments else "No execution environments specified"
782
+
783
+ # Add worker queues context
784
+ worker_queues_context = "\n".join([
785
+ f"- **{q.name}** (ID: `{q.id}`)\n"
786
+ f" - **Environment**: {q.environment_id or 'Not specified'}\n"
787
+ f" - **Active Workers**: {q.active_workers}\n"
788
+ f" - **Status**: {q.status}\n"
789
+ f" - **Capacity**: {'Available' if q.active_workers > 0 and q.status == 'active' else 'Limited or Inactive'}"
790
+ for q in request.worker_queues
791
+ ]) if request.worker_queues else "No worker queues specified"
792
+
793
+ # Add system capabilities context
794
+ system_capabilities = """
795
+ **Available System Capabilities:**
796
+ - **Code Execution**: Python, Bash, JavaScript, and other languages
797
+ - **Cloud Integrations**: AWS (S3, EC2, Lambda, RDS, CloudWatch), Azure, GCP
798
+ - **APIs & Tools**: REST APIs, GraphQL, Kubernetes, Docker, Terraform
799
+ - **Databases**: PostgreSQL, MySQL, MongoDB, Redis
800
+ - **Monitoring**: Datadog, Prometheus, Grafana, CloudWatch
801
+ - **Security**: IAM policies, security scanning, compliance checks
802
+ - **DevOps**: CI/CD pipelines, Infrastructure as Code, automation scripts
803
+ """
804
+
805
+ # Build pricing context from LiteLLM data for common models
806
+ pricing_context = """
807
+ **Model Pricing Reference** (use these for accurate cost estimates):
808
+ - **Claude Sonnet 4**: $0.003/1K input, $0.015/1K output tokens
809
+ - **Claude 3.5 Sonnet**: $0.003/1K input, $0.015/1K output tokens
810
+ - **Claude 3 Opus**: $0.015/1K input, $0.075/1K output tokens
811
+ - **Claude 3 Haiku**: $0.00025/1K input, $0.00125/1K output tokens
812
+ - **GPT-4o**: $0.0025/1K input, $0.01/1K output tokens
813
+ - **GPT-4o Mini**: $0.00015/1K input, $0.0006/1K output tokens
814
+ - **GPT-4 Turbo**: $0.01/1K input, $0.03/1K output tokens
815
+ - **Gemini 2.0 Flash**: $0.0001/1K input, $0.0003/1K output tokens
816
+ - **Gemini 1.5 Pro**: $0.00125/1K input, $0.005/1K output tokens
817
+
818
+ **Tool Cost Estimates:**
819
+ - AWS API calls: $0.0004-0.001 per call
820
+ - Database queries: $0.0001 per query
821
+ - Kubernetes operations: Free (compute cost only)
822
+ - Bash/shell commands: Free (compute cost only)
823
+
824
+ **Runtime Costs:**
825
+ - Worker execution: $0.10/hour typical
826
+ """
827
+
828
+ # Yield team analysis progress
829
+ yield format_sse_message("progress", {
830
+ "stage": "analyzing_teams",
831
+ "message": f"👥 Finding best teams ({len(request.agents)} agents, {len(request.teams)} teams)...",
832
+ "progress": 35
833
+ })
834
+
835
+ await asyncio.sleep(0.2)
836
+
837
+ # Check if this is a refinement or subsequent iteration
838
+ is_refinement = request.iteration > 1 and request.refinement_feedback
839
+ has_conversation_history = bool(request.conversation_context and request.conversation_context.strip())
840
+ should_be_decisive = request.iteration > 1 or has_conversation_history
841
+
842
+ # Yield complexity analysis progress
843
+ yield format_sse_message("progress", {
844
+ "stage": "complexity",
845
+ "message": "🔍 Analyzing task complexity...",
846
+ "progress": 50
847
+ })
848
+
849
+ await asyncio.sleep(0.2)
850
+
851
+ # Build the planning prompt (same as original)
852
+ planning_prompt = f"""
853
+ # Task Planning Request - Iteration #{request.iteration}
854
+
855
+ ## Task Description
856
+ {request.description}
857
+
858
+ ## Priority
859
+ {request.priority.upper()}
860
+
861
+ {"## Previous Conversation (USE THIS CONTEXT)" if has_conversation_history else ""}
862
+ {request.conversation_context if has_conversation_history else ""}
863
+
864
+ {"## User Feedback for Refinement" if request.refinement_feedback else ""}
865
+ {request.refinement_feedback if request.refinement_feedback else ""}
866
+
867
+ {"## Previous Plan (to be refined)" if request.previous_plan else ""}
868
+ {json.dumps(request.previous_plan, indent=2) if request.previous_plan else ""}
869
+
870
+ ## Available Resources
871
+
872
+ ### Agents
873
+ {agents_context if agents_context else "No individual agents available"}
874
+
875
+ ### Teams
876
+ {teams_context if teams_context else "No teams available"}
877
+
878
+ ### Execution Environments
879
+ {environments_context}
880
+
881
+ ### Worker Queues
882
+ {worker_queues_context}
883
+
884
+ {system_capabilities}
885
+
886
+ {pricing_context}
887
+
888
+ ## Your Task
889
+
890
+ {'**BE DECISIVE**: You have conversation history showing the user has already provided context. DO NOT ask more questions. Use the information provided in the conversation history above to create a reasonable plan. Make sensible assumptions where needed and proceed with planning.' if should_be_decisive else '**FIRST ITERATION**: Review if you have enough context. ONLY ask questions if you are missing CRITICAL information that makes planning impossible (like completely unknown technology stack or domain). If the task is reasonably clear, proceed with planning and make reasonable assumptions.'}
891
+
892
+ {'**IMPORTANT**: DO NOT ask questions. The user wants a plan now. Use the conversation history above.' if should_be_decisive else 'If you need CRITICAL information to proceed, respond with:'}
893
+ {'```json' if not should_be_decisive else ''}
894
+ {'{' if not should_be_decisive else ''}
895
+ {' "has_questions": true,' if not should_be_decisive else ''}
896
+ {' "questions": "List 1-2 CRITICAL questions (not nice-to-haves). Be very selective."' if not should_be_decisive else ''}
897
+ {'}' if not should_be_decisive else ''}
898
+ {'```' if not should_be_decisive else ''}
899
+
900
+ Otherwise, analyze this task and provide a comprehensive plan in the following JSON format:
901
+
902
+ {{
903
+ "title": "Concise task title",
904
+ "summary": "2-3 sentence summary of what needs to be done",
905
+ "complexity": {{
906
+ "story_points": 5,
907
+ "confidence": "medium",
908
+ "reasoning": "Explanation of complexity assessment"
909
+ }},
910
+ "team_breakdown": [
911
+ {{
912
+ "team_id": "team-uuid-or-null",
913
+ "team_name": "Team Name or Agent Name",
914
+ "agent_id": "agent-uuid-or-null",
915
+ "agent_name": "Agent Name if individual agent",
916
+ "responsibilities": ["Task 1", "Task 2"],
917
+ "estimated_time_hours": 2.5
918
+ }}
919
+ ],
920
+ "recommended_execution": {{
921
+ "entity_type": "agent or team",
922
+ "entity_id": "uuid-of-recommended-agent-or-team",
923
+ "entity_name": "Name",
924
+ "reasoning": "Why this entity is best suited for this task",
925
+ "recommended_environment_id": "uuid-of-best-environment-or-null",
926
+ "recommended_environment_name": "Environment Name or null",
927
+ "recommended_worker_queue_id": "uuid-of-best-worker-queue-or-null",
928
+ "recommended_worker_queue_name": "Worker Queue Name or null",
929
+ "execution_reasoning": "Why this environment/queue is optimal for execution (consider capacity, type, status)"
930
+ }},
931
+ "cost_estimate": {{
932
+ "estimated_cost_usd": 1.50,
933
+ "breakdown": [
934
+ {{"item": "API calls", "cost": 1.00}},
935
+ {{"item": "Processing", "cost": 0.50}}
936
+ ]
937
+ }},
938
+ "realized_savings": {{
939
+ "without_kubiya_cost": 1440.0,
940
+ "without_kubiya_hours": 10.0,
941
+ "without_kubiya_resources": [
942
+ {{
943
+ "role": "Senior DevOps Engineer",
944
+ "hourly_rate": 150.0,
945
+ "estimated_hours": 8.0,
946
+ "total_cost": 1200.0
947
+ }},
948
+ {{
949
+ "role": "Security Engineer",
950
+ "hourly_rate": 120.0,
951
+ "estimated_hours": 2.0,
952
+ "total_cost": 240.0
953
+ }}
954
+ ],
955
+ "with_kubiya_cost": 3.75,
956
+ "with_kubiya_hours": 4.0,
957
+ "money_saved": 1436.25,
958
+ "time_saved_hours": 6.0,
959
+ "time_saved_percentage": 60,
960
+ "savings_summary": "By using Kubiya's AI orchestration, you saved $1,436 and 6 hours. Manual execution would require 10 hours of skilled engineers ($1,440), but Kubiya completes it in 4 hours for just $3.75."
961
+ }},
962
+ "risks": ["Risk 1", "Risk 2"],
963
+ "prerequisites": ["Prerequisite 1", "Prerequisite 2"],
964
+ "success_criteria": ["Criterion 1", "Criterion 2"]
965
+ }}
966
+
967
+ **Important Guidelines:**
968
+ 1. For `recommended_execution`, choose the MOST CAPABLE entity (agent or team) based on:
969
+ - Task complexity
970
+ - Agent/team capabilities and model
971
+ - Description fit
972
+ - Whether multiple agents are needed (prefer team) or single agent is sufficient
973
+ 2. The recommended entity MUST be from the available agents/teams list above
974
+ 3. For `recommended_environment_id` and `recommended_worker_queue_id`:
975
+ - Choose the BEST environment based on task requirements (production vs staging vs development)
976
+ - Choose a worker queue with AVAILABLE CAPACITY (active_workers > 0 and status = 'active')
977
+ - Match worker queue to the selected environment if possible
978
+ - Provide clear `execution_reasoning` explaining your environment/queue selection
979
+ - If no suitable queue is available, still recommend one and note the capacity concern in reasoning
980
+ 4. Use IDs exactly as provided from the lists above
981
+ 4. **CRITICAL - Enhanced Cost Breakdown**:
982
+ - **Team Breakdown**: For each agent/team member, include:
983
+ - `model_info`: Specify the model they'll use (use the model_id from agent info)
984
+ - Estimate input/output tokens based on task complexity
985
+ - Use realistic pricing: Claude Sonnet 4 ($0.003/1K in, $0.015/1K out), GPT-4o ($0.0025/1K in, $0.01/1K out)
986
+ - Calculate total_model_cost accurately
987
+ - `expected_tools`: List tools they'll use with estimated call counts
988
+ - AWS APIs: $0.0004-0.001 per call
989
+ - Database queries: $0.0001 per query
990
+ - Free tools (kubectl, bash): $0.0 per call
991
+ - `agent_cost`: Sum of model_cost + tool_costs
992
+ - **Cost Estimate**: Provide detailed breakdown:
993
+ - `llm_costs`: Array of LLM costs by model (aggregate from team breakdown)
994
+ - `tool_costs`: Categorized tool costs (AWS APIs, Database Queries, External APIs)
995
+ - `runtime_cost`: Worker execution time × cost per hour ($0.10/hr typical)
996
+ - Ensure `estimated_cost_usd` = sum of all LLM + tool + runtime costs
997
+ - Legacy `breakdown` still required for backwards compatibility
998
+ 5. **Realistic Token Estimates**:
999
+ - Simple tasks (story points 1-3): 2-5K input, 1-2K output tokens per agent
1000
+ - Medium tasks (story points 5-8): 5-10K input, 2-5K output tokens per agent
1001
+ - Complex tasks (story points 13-21): 10-20K input, 5-10K output tokens per agent
1002
+ 6. **Tool Call Estimates**:
1003
+ - Consider what APIs/tools the agent will actually use for this specific task
1004
+ - Be realistic: Simple tasks might only need 5-10 API calls total
1005
+ - Complex deployments might need 50+ API calls across multiple tools
1006
+ 7. **CRITICAL - Realized Savings Calculation** (keep for backwards compatibility):
1007
+ - **WITHOUT KUBIYA**: Calculate what it would cost using manual human execution
1008
+ - Break down by SPECIFIC ROLES (e.g., "Senior DevOps Engineer", "Security Engineer")
1009
+ - Use realistic hourly rates: Senior ($120-200/hr), Mid-level ($80-120/hr), Junior ($50-80/hr)
1010
+ - Calculate without_kubiya_cost = sum of all human resource costs
1011
+ - Estimate without_kubiya_hours = total time if done manually
1012
+ - **WITH KUBIYA**: Calculate AI orchestration costs and time
1013
+ - with_kubiya_cost = estimated AI execution cost (API calls, compute)
1014
+ - with_kubiya_hours = estimated time for AI agents to complete
1015
+ - **REALIZED SAVINGS**:
1016
+ - money_saved = without_kubiya_cost - with_kubiya_cost
1017
+ - time_saved_hours = without_kubiya_hours - with_kubiya_hours
1018
+ - time_saved_percentage = (time_saved_hours / without_kubiya_hours) * 100
1019
+ - **COMPELLING NARRATIVE**: Create savings_summary that emphasizes the concrete savings:
1020
+ - "By using Kubiya, you saved $X and Y hours"
1021
+ - Show the contrast: "Without Kubiya: $X (Y hours)" vs "With Kubiya: $X (Y hours)"
1022
+ 6. Be specific and actionable in all fields
1023
+ 7. Output ONLY valid JSON, no markdown formatting
1024
+ """
1025
+
1026
+ # Yield AI agent creation progress
1027
+ yield format_sse_message("progress", {
1028
+ "stage": "creating_agent",
1029
+ "message": "🤖 Creating AI planning agent...",
1030
+ "progress": 60
1031
+ })
1032
+
1033
+ await asyncio.sleep(0.2)
1034
+
1035
+ # Get organization ID from agents/teams if available
1036
+ organization_id = None
1037
+ if request.agents and len(request.agents) > 0:
1038
+ organization_id = getattr(request.agents[0], "organization_id", None)
1039
+ elif request.teams and len(request.teams) > 0:
1040
+ organization_id = getattr(request.teams[0], "organization_id", None)
1041
+
1042
+ # Create planning agent with tools
1043
+ logger.info("creating_planning_agent_stream", organization_id=organization_id)
1044
+ planning_agent = create_planning_agent(organization_id=organization_id)
1045
+
1046
+ # Yield generating plan progress
1047
+ yield format_sse_message("progress", {
1048
+ "stage": "generating",
1049
+ "message": "✨ Generating comprehensive plan...",
1050
+ "progress": 75
1051
+ })
1052
+
1053
+ # Run the agent with streaming to capture reasoning
1054
+ # The actual reasoning will be streamed in real-time (no need for generic "thinking" message)
1055
+ logger.info("executing_agent_run_stream", prompt_length=len(planning_prompt))
1056
+
1057
+ # Use streaming to capture reasoning content as it comes in
1058
+ # ReasoningTools provide structured reasoning capabilities
1059
+ reasoning_chunks = []
1060
+ final_response = None
1061
+
1062
+ # Set timeout for agent run to prevent hanging (2 minutes max)
1063
+ agent_timeout = 120 # seconds
1064
+ start_time = asyncio.get_event_loop().time()
1065
+
1066
+ # Stream with ReasoningTools - it handles reasoning display automatically
1067
+ for chunk in planning_agent.run(planning_prompt, stream=True):
1068
+ # Check for timeout
1069
+ if asyncio.get_event_loop().time() - start_time > agent_timeout:
1070
+ logger.error("agent_run_timeout", elapsed=agent_timeout)
1071
+ raise TimeoutError(f"Agent run exceeded {agent_timeout}s timeout")
1072
+ # Log chunk attributes for debugging
1073
+ logger.info("streaming_chunk_received",
1074
+ has_content=hasattr(chunk, 'content'),
1075
+ has_reasoning=hasattr(chunk, 'reasoning_content'),
1076
+ has_tool_calls=hasattr(chunk, 'tool_calls'),
1077
+ content_type=type(chunk.content).__name__ if hasattr(chunk, 'content') else None)
1078
+
1079
+ # Check for reasoning content (Agno's reasoning agent output)
1080
+ if hasattr(chunk, 'reasoning_content') and chunk.reasoning_content:
1081
+ reasoning_text = str(chunk.reasoning_content)
1082
+ reasoning_chunks.append(reasoning_text)
1083
+
1084
+ # Stream reasoning to frontend in real-time
1085
+ yield format_sse_message("reasoning", {
1086
+ "content": reasoning_text,
1087
+ "is_complete": False
1088
+ })
1089
+
1090
+ # Check for regular content chunks (chain-of-thought reasoning before structured output)
1091
+ elif hasattr(chunk, 'content') and chunk.content and not hasattr(chunk, 'tool_calls'):
1092
+ # This might be reasoning content - stream it to frontend
1093
+ reasoning_text = str(chunk.content)
1094
+
1095
+ # Filter out the final structured response (which will be a dict/object)
1096
+ if not isinstance(chunk.content, (dict, TaskPlanResponse)):
1097
+ reasoning_chunks.append(reasoning_text)
1098
+
1099
+ # Stream reasoning to frontend in real-time
1100
+ yield format_sse_message("reasoning", {
1101
+ "content": reasoning_text,
1102
+ "is_complete": False
1103
+ })
1104
+ logger.info("streaming_reasoning_chunk", length=len(reasoning_text))
1105
+ else:
1106
+ # This is the final response with structured output
1107
+ final_response = chunk
1108
+ logger.info("received_final_structured_response")
1109
+
1110
+ # Check for tool calls (when agent uses planning tools)
1111
+ if hasattr(chunk, 'tool_calls') and chunk.tool_calls:
1112
+ for tool_call in chunk.tool_calls:
1113
+ tool_name = tool_call.function.name if hasattr(tool_call, 'function') else str(tool_call)
1114
+ yield format_sse_message("tool_call", {
1115
+ "tool_name": tool_name,
1116
+ "message": f"🔧 Calling tool: {tool_name}"
1117
+ })
1118
+ logger.info("streaming_tool_call", tool_name=tool_name)
1119
+
1120
+ # Capture the final response if we haven't yet
1121
+ # In streaming mode, Agno returns the full structured response at the end
1122
+ if hasattr(chunk, 'content') and isinstance(chunk.content, (dict, TaskPlanResponse)):
1123
+ final_response = chunk
1124
+
1125
+ # Signal reasoning is complete
1126
+ if reasoning_chunks:
1127
+ full_reasoning = ''.join(reasoning_chunks)
1128
+ yield format_sse_message("reasoning", {
1129
+ "content": "",
1130
+ "is_complete": True,
1131
+ "full_reasoning": full_reasoning,
1132
+ "token_count": len(full_reasoning.split())
1133
+ })
1134
+
1135
+ logger.info("agent_run_completed_stream", has_final_response=final_response is not None, reasoning_length=len(reasoning_chunks))
1136
+
1137
+ # Yield calculating savings progress
1138
+ yield format_sse_message("progress", {
1139
+ "stage": "calculating",
1140
+ "message": "💰 Calculating cost savings...",
1141
+ "progress": 90
1142
+ })
1143
+
1144
+ await asyncio.sleep(0.2)
1145
+
1146
+ # With output_schema, the final response.content should be a TaskPlanResponse object
1147
+ if not final_response or not hasattr(final_response, 'content'):
1148
+ logger.error("no_final_response_from_agent")
1149
+ yield format_sse_message("error", {
1150
+ "message": "Agent did not return a final response. Please try again."
1151
+ })
1152
+ return
1153
+
1154
+ # Validate that we got the correct type
1155
+ if not isinstance(final_response.content, TaskPlanResponse):
1156
+ logger.error(
1157
+ "unexpected_response_type",
1158
+ type_received=type(final_response.content).__name__,
1159
+ content_preview=str(final_response.content)[:200]
1160
+ )
1161
+ yield format_sse_message("error", {
1162
+ "message": f"Agent returned unexpected response type: {type(final_response.content).__name__}"
1163
+ })
1164
+ return
1165
+
1166
+ plan = final_response.content
1167
+
1168
+ # Check if AI is asking questions
1169
+ if plan.has_questions:
1170
+ logger.info("task_planner_asking_questions_stream", iteration=request.iteration)
1171
+ yield format_sse_message("complete", {
1172
+ "has_questions": True,
1173
+ "questions": plan.questions,
1174
+ "progress": 100
1175
+ })
1176
+ return
1177
+
1178
+ logger.info(
1179
+ "task_plan_generated_stream",
1180
+ title=plan.title,
1181
+ complexity=plan.complexity.story_points,
1182
+ recommended_entity=plan.recommended_execution.entity_name,
1183
+ iteration=request.iteration,
1184
+ )
1185
+
1186
+ # Yield complete event with the full plan
1187
+ yield format_sse_message("complete", {
1188
+ "plan": plan.model_dump(),
1189
+ "progress": 100,
1190
+ "message": "✅ Plan generated successfully!"
1191
+ })
1192
+
1193
+ except Exception as e:
1194
+ from sqlalchemy.exc import OperationalError, DisconnectionError
1195
+ from control_plane_api.app.database import dispose_engine, IS_SERVERLESS
1196
+
1197
+ error_type = type(e).__name__
1198
+ logger.error("task_planning_stream_error", error=str(e), error_type=error_type)
1199
+
1200
+ # Specific handling for database connection errors
1201
+ if isinstance(e, (OperationalError, DisconnectionError)):
1202
+ error_msg = "Database connection lost. This may be due to serverless timeout or connection pool exhaustion. Please try again."
1203
+ logger.error("database_connection_error_in_planning", error=str(e))
1204
+
1205
+ # Dispose engine in serverless to force fresh connections on next request
1206
+ if IS_SERVERLESS:
1207
+ dispose_engine()
1208
+ else:
1209
+ error_msg = f"Task planning failed: {str(e)}"
1210
+
1211
+ yield format_sse_message("error", {
1212
+ "message": error_msg
1213
+ })
1214
+ finally:
1215
+ # Cleanup: Dispose engine in serverless environments after each invocation
1216
+ from control_plane_api.app.database import dispose_engine, IS_SERVERLESS
1217
+ if IS_SERVERLESS:
1218
+ logger.info("cleaning_up_serverless_database_connections")
1219
+ dispose_engine()
1220
+
1221
+
1222
+ @router.post("/tasks/plan/stream")
1223
+ async def plan_task_stream(request: TaskPlanRequest):
1224
+ """
1225
+ Generate an AI-powered task plan with streaming progress updates (SSE)
1226
+
1227
+ This endpoint streams progress events during plan generation:
1228
+ - initializing: Starting the planner
1229
+ - context: Gathering organizational context
1230
+ - analyzing_teams: Finding best teams
1231
+ - complexity: Analyzing task complexity
1232
+ - creating_agent: Creating AI agent
1233
+ - generating: Generating plan
1234
+ - calculating: Calculating savings
1235
+ - complete: Final plan ready
1236
+ - error: If something went wrong
1237
+ """
1238
+ return StreamingResponse(
1239
+ generate_task_plan_stream(request),
1240
+ media_type="text/event-stream",
1241
+ headers={
1242
+ "Cache-Control": "no-cache",
1243
+ "Connection": "keep-alive",
1244
+ "X-Accel-Buffering": "no", # Disable nginx buffering
1245
+ }
1246
+ )
1247
+
1248
+
1249
+ @router.get("/tasks/plan/health")
1250
+ async def planning_health():
1251
+ """Health check for task planning endpoint"""
1252
+ return {
1253
+ "status": "healthy",
1254
+ "service": "task_planning",
1255
+ "ai_provider": "OpenAI GPT-4o",
1256
+ }