kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubiya-control-plane-api might be problematic. Click here for more details.

Files changed (185) hide show
  1. control_plane_api/README.md +266 -0
  2. control_plane_api/__init__.py +0 -0
  3. control_plane_api/__version__.py +1 -0
  4. control_plane_api/alembic/README +1 -0
  5. control_plane_api/alembic/env.py +98 -0
  6. control_plane_api/alembic/script.py.mako +28 -0
  7. control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
  8. control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
  9. control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
  10. control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
  11. control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
  12. control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
  13. control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
  14. control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
  15. control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
  16. control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
  17. control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
  18. control_plane_api/alembic.ini +148 -0
  19. control_plane_api/api/index.py +12 -0
  20. control_plane_api/app/__init__.py +11 -0
  21. control_plane_api/app/activities/__init__.py +20 -0
  22. control_plane_api/app/activities/agent_activities.py +379 -0
  23. control_plane_api/app/activities/team_activities.py +410 -0
  24. control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
  25. control_plane_api/app/config/__init__.py +35 -0
  26. control_plane_api/app/config/api_config.py +354 -0
  27. control_plane_api/app/config/model_pricing.py +318 -0
  28. control_plane_api/app/config.py +95 -0
  29. control_plane_api/app/database.py +135 -0
  30. control_plane_api/app/exceptions.py +408 -0
  31. control_plane_api/app/lib/__init__.py +11 -0
  32. control_plane_api/app/lib/job_executor.py +312 -0
  33. control_plane_api/app/lib/kubiya_client.py +235 -0
  34. control_plane_api/app/lib/litellm_pricing.py +166 -0
  35. control_plane_api/app/lib/planning_tools/__init__.py +22 -0
  36. control_plane_api/app/lib/planning_tools/agents.py +155 -0
  37. control_plane_api/app/lib/planning_tools/base.py +189 -0
  38. control_plane_api/app/lib/planning_tools/environments.py +214 -0
  39. control_plane_api/app/lib/planning_tools/resources.py +240 -0
  40. control_plane_api/app/lib/planning_tools/teams.py +198 -0
  41. control_plane_api/app/lib/policy_enforcer_client.py +939 -0
  42. control_plane_api/app/lib/redis_client.py +436 -0
  43. control_plane_api/app/lib/supabase.py +71 -0
  44. control_plane_api/app/lib/temporal_client.py +138 -0
  45. control_plane_api/app/lib/validation/__init__.py +20 -0
  46. control_plane_api/app/lib/validation/runtime_validation.py +287 -0
  47. control_plane_api/app/main.py +128 -0
  48. control_plane_api/app/middleware/__init__.py +8 -0
  49. control_plane_api/app/middleware/auth.py +513 -0
  50. control_plane_api/app/middleware/exception_handler.py +267 -0
  51. control_plane_api/app/middleware/rate_limiting.py +384 -0
  52. control_plane_api/app/middleware/request_id.py +202 -0
  53. control_plane_api/app/models/__init__.py +27 -0
  54. control_plane_api/app/models/agent.py +79 -0
  55. control_plane_api/app/models/analytics.py +206 -0
  56. control_plane_api/app/models/associations.py +81 -0
  57. control_plane_api/app/models/environment.py +63 -0
  58. control_plane_api/app/models/execution.py +93 -0
  59. control_plane_api/app/models/job.py +179 -0
  60. control_plane_api/app/models/llm_model.py +75 -0
  61. control_plane_api/app/models/presence.py +49 -0
  62. control_plane_api/app/models/project.py +47 -0
  63. control_plane_api/app/models/session.py +38 -0
  64. control_plane_api/app/models/team.py +66 -0
  65. control_plane_api/app/models/workflow.py +55 -0
  66. control_plane_api/app/policies/README.md +121 -0
  67. control_plane_api/app/policies/approved_users.rego +62 -0
  68. control_plane_api/app/policies/business_hours.rego +51 -0
  69. control_plane_api/app/policies/rate_limiting.rego +100 -0
  70. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  71. control_plane_api/app/routers/__init__.py +4 -0
  72. control_plane_api/app/routers/agents.py +364 -0
  73. control_plane_api/app/routers/agents_v2.py +1260 -0
  74. control_plane_api/app/routers/analytics.py +1014 -0
  75. control_plane_api/app/routers/context_manager.py +562 -0
  76. control_plane_api/app/routers/environment_context.py +270 -0
  77. control_plane_api/app/routers/environments.py +715 -0
  78. control_plane_api/app/routers/execution_environment.py +517 -0
  79. control_plane_api/app/routers/executions.py +1911 -0
  80. control_plane_api/app/routers/health.py +92 -0
  81. control_plane_api/app/routers/health_v2.py +326 -0
  82. control_plane_api/app/routers/integrations.py +274 -0
  83. control_plane_api/app/routers/jobs.py +1344 -0
  84. control_plane_api/app/routers/models.py +82 -0
  85. control_plane_api/app/routers/models_v2.py +361 -0
  86. control_plane_api/app/routers/policies.py +639 -0
  87. control_plane_api/app/routers/presence.py +234 -0
  88. control_plane_api/app/routers/projects.py +902 -0
  89. control_plane_api/app/routers/runners.py +379 -0
  90. control_plane_api/app/routers/runtimes.py +172 -0
  91. control_plane_api/app/routers/secrets.py +155 -0
  92. control_plane_api/app/routers/skills.py +1001 -0
  93. control_plane_api/app/routers/skills_definitions.py +140 -0
  94. control_plane_api/app/routers/task_planning.py +1256 -0
  95. control_plane_api/app/routers/task_queues.py +654 -0
  96. control_plane_api/app/routers/team_context.py +270 -0
  97. control_plane_api/app/routers/teams.py +1400 -0
  98. control_plane_api/app/routers/worker_queues.py +1545 -0
  99. control_plane_api/app/routers/workers.py +935 -0
  100. control_plane_api/app/routers/workflows.py +204 -0
  101. control_plane_api/app/runtimes/__init__.py +6 -0
  102. control_plane_api/app/runtimes/validation.py +344 -0
  103. control_plane_api/app/schemas/job_schemas.py +295 -0
  104. control_plane_api/app/services/__init__.py +1 -0
  105. control_plane_api/app/services/agno_service.py +619 -0
  106. control_plane_api/app/services/litellm_service.py +190 -0
  107. control_plane_api/app/services/policy_service.py +525 -0
  108. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  109. control_plane_api/app/skills/__init__.py +44 -0
  110. control_plane_api/app/skills/base.py +229 -0
  111. control_plane_api/app/skills/business_intelligence.py +189 -0
  112. control_plane_api/app/skills/data_visualization.py +154 -0
  113. control_plane_api/app/skills/docker.py +104 -0
  114. control_plane_api/app/skills/file_generation.py +94 -0
  115. control_plane_api/app/skills/file_system.py +110 -0
  116. control_plane_api/app/skills/python.py +92 -0
  117. control_plane_api/app/skills/registry.py +65 -0
  118. control_plane_api/app/skills/shell.py +102 -0
  119. control_plane_api/app/skills/workflow_executor.py +469 -0
  120. control_plane_api/app/utils/workflow_executor.py +354 -0
  121. control_plane_api/app/workflows/__init__.py +11 -0
  122. control_plane_api/app/workflows/agent_execution.py +507 -0
  123. control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
  124. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  125. control_plane_api/app/workflows/team_execution.py +399 -0
  126. control_plane_api/scripts/seed_models.py +239 -0
  127. control_plane_api/worker/__init__.py +0 -0
  128. control_plane_api/worker/activities/__init__.py +0 -0
  129. control_plane_api/worker/activities/agent_activities.py +1241 -0
  130. control_plane_api/worker/activities/approval_activities.py +234 -0
  131. control_plane_api/worker/activities/runtime_activities.py +388 -0
  132. control_plane_api/worker/activities/skill_activities.py +267 -0
  133. control_plane_api/worker/activities/team_activities.py +1217 -0
  134. control_plane_api/worker/config/__init__.py +31 -0
  135. control_plane_api/worker/config/worker_config.py +275 -0
  136. control_plane_api/worker/control_plane_client.py +529 -0
  137. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  138. control_plane_api/worker/models/__init__.py +1 -0
  139. control_plane_api/worker/models/inputs.py +89 -0
  140. control_plane_api/worker/runtimes/__init__.py +31 -0
  141. control_plane_api/worker/runtimes/base.py +789 -0
  142. control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
  143. control_plane_api/worker/runtimes/default_runtime.py +617 -0
  144. control_plane_api/worker/runtimes/factory.py +173 -0
  145. control_plane_api/worker/runtimes/validation.py +93 -0
  146. control_plane_api/worker/services/__init__.py +1 -0
  147. control_plane_api/worker/services/agent_executor.py +422 -0
  148. control_plane_api/worker/services/agent_executor_v2.py +383 -0
  149. control_plane_api/worker/services/analytics_collector.py +457 -0
  150. control_plane_api/worker/services/analytics_service.py +464 -0
  151. control_plane_api/worker/services/approval_tools.py +310 -0
  152. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  153. control_plane_api/worker/services/cancellation_manager.py +177 -0
  154. control_plane_api/worker/services/data_visualization.py +827 -0
  155. control_plane_api/worker/services/jira_tools.py +257 -0
  156. control_plane_api/worker/services/runtime_analytics.py +328 -0
  157. control_plane_api/worker/services/session_service.py +194 -0
  158. control_plane_api/worker/services/skill_factory.py +175 -0
  159. control_plane_api/worker/services/team_executor.py +574 -0
  160. control_plane_api/worker/services/team_executor_v2.py +465 -0
  161. control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
  162. control_plane_api/worker/tests/__init__.py +1 -0
  163. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  164. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  165. control_plane_api/worker/tests/integration/__init__.py +0 -0
  166. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  167. control_plane_api/worker/tests/unit/__init__.py +0 -0
  168. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  169. control_plane_api/worker/utils/__init__.py +1 -0
  170. control_plane_api/worker/utils/chunk_batcher.py +305 -0
  171. control_plane_api/worker/utils/retry_utils.py +60 -0
  172. control_plane_api/worker/utils/streaming_utils.py +373 -0
  173. control_plane_api/worker/worker.py +753 -0
  174. control_plane_api/worker/workflows/__init__.py +0 -0
  175. control_plane_api/worker/workflows/agent_execution.py +589 -0
  176. control_plane_api/worker/workflows/team_execution.py +429 -0
  177. kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
  178. kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
  179. kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
  180. kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
  181. kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
  182. kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
  183. kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
  184. {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
  185. {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1443 @@
1
+ """
2
+ Claude Code runtime implementation using Claude Code SDK.
3
+
4
+ This runtime adapter integrates the Claude Code SDK to power agents with
5
+ advanced coding capabilities, file operations, and specialized tools.
6
+ """
7
+
8
+ from typing import Dict, Any, Optional, AsyncIterator, Callable, TYPE_CHECKING, List
9
+ import structlog
10
+ import os
11
+ import asyncio
12
+ import time
13
+
14
+ from .base import (
15
+ RuntimeType,
16
+ RuntimeExecutionResult,
17
+ RuntimeExecutionContext,
18
+ RuntimeCapabilities,
19
+ BaseRuntime,
20
+ RuntimeRegistry,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from control_plane_client import ControlPlaneClient
25
+ from services.cancellation_manager import CancellationManager
26
+
27
+ logger = structlog.get_logger(__name__)
28
+
29
+
30
+ @RuntimeRegistry.register(RuntimeType.CLAUDE_CODE)
31
+ class ClaudeCodeRuntime(BaseRuntime):
32
+ """
33
+ Runtime implementation using Claude Code SDK.
34
+
35
+ This runtime leverages Claude Code's specialized capabilities for
36
+ software engineering tasks, file operations, and developer workflows.
37
+
38
+ Features:
39
+ - Streaming execution with real-time updates
40
+ - Conversation history support via ClaudeSDKClient
41
+ - Custom tool integration via MCP
42
+ - Hooks for tool execution monitoring
43
+ - Cancellation support via interrupt()
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ control_plane_client: "ControlPlaneClient",
49
+ cancellation_manager: "CancellationManager",
50
+ **kwargs,
51
+ ):
52
+ """
53
+ Initialize the Claude Code runtime.
54
+
55
+ Args:
56
+ control_plane_client: Client for Control Plane API
57
+ cancellation_manager: Manager for execution cancellation
58
+ **kwargs: Additional configuration options
59
+ """
60
+ super().__init__(control_plane_client, cancellation_manager, **kwargs)
61
+
62
+ # Track active SDK clients for cancellation
63
+ self._active_clients: Dict[str, Any] = {}
64
+
65
+ def get_runtime_type(self) -> RuntimeType:
66
+ """Return RuntimeType.CLAUDE_CODE."""
67
+ return RuntimeType.CLAUDE_CODE
68
+
69
+ def get_capabilities(self) -> RuntimeCapabilities:
70
+ """Return Claude Code runtime capabilities."""
71
+ return RuntimeCapabilities(
72
+ streaming=True,
73
+ tools=True,
74
+ mcp=True,
75
+ hooks=True,
76
+ cancellation=True,
77
+ conversation_history=True,
78
+ custom_tools=True
79
+ )
80
+
81
+ async def _execute_impl(
82
+ self, context: RuntimeExecutionContext
83
+ ) -> RuntimeExecutionResult:
84
+ """
85
+ Execute agent using Claude Code SDK (non-streaming).
86
+
87
+ Production-grade implementation with:
88
+ - Comprehensive error handling
89
+ - Proper resource cleanup
90
+ - Detailed logging
91
+ - Timeout management
92
+ - Graceful degradation
93
+
94
+ Args:
95
+ context: Execution context with prompt, history, config
96
+
97
+ Returns:
98
+ RuntimeExecutionResult with response and metadata
99
+ """
100
+ client = None
101
+ start_time = asyncio.get_event_loop().time()
102
+
103
+ try:
104
+ from claude_agent_sdk import ClaudeSDKClient, ResultMessage
105
+
106
+ self.logger.info(
107
+ "Starting Claude Code non-streaming execution",
108
+ execution_id=context.execution_id,
109
+ model=context.model_id,
110
+ has_history=bool(context.conversation_history),
111
+ )
112
+
113
+ # Build Claude Code options with validation
114
+ options, _ = self._build_claude_options(context) # active_tools not needed for non-streaming
115
+
116
+ # Create client and manually manage lifecycle to avoid asyncio cancel scope issues
117
+ client = ClaudeSDKClient(options=options)
118
+ await client.connect()
119
+ self._active_clients[context.execution_id] = client
120
+
121
+ # Send prompt (SDK handles conversation history via session resume)
122
+ # No need to manually inject history - the SDK maintains it via session_id
123
+ prompt = context.prompt
124
+
125
+ self.logger.debug(
126
+ "Sending query to Claude Code SDK",
127
+ execution_id=context.execution_id,
128
+ prompt_length=len(prompt),
129
+ using_session_resume=bool(options.resume),
130
+ )
131
+
132
+ await client.query(prompt)
133
+
134
+ # Collect complete response
135
+ response_text = ""
136
+ usage = {}
137
+ tool_messages = []
138
+ finish_reason = None
139
+ message_count = 0
140
+
141
+ # Use receive_response() to get messages until ResultMessage
142
+ async for message in client.receive_response():
143
+ message_count += 1
144
+
145
+ # Extract content from AssistantMessage
146
+ if hasattr(message, "content"):
147
+ for block in message.content:
148
+ if hasattr(block, "text"):
149
+ response_text += block.text
150
+ elif hasattr(block, "name"): # ToolUseBlock
151
+ tool_messages.append(
152
+ {
153
+ "tool": block.name,
154
+ "input": getattr(block, "input", {}),
155
+ "tool_use_id": getattr(block, "id", None),
156
+ }
157
+ )
158
+
159
+ # Extract usage, finish reason, and session_id from ResultMessage
160
+ if isinstance(message, ResultMessage):
161
+ if message.usage:
162
+ # Use Anthropic field names for consistency with analytics
163
+ usage = {
164
+ "input_tokens": getattr(message.usage, "input_tokens", 0),
165
+ "output_tokens": getattr(message.usage, "output_tokens", 0),
166
+ "total_tokens": getattr(message.usage, "input_tokens", 0) + getattr(message.usage, "output_tokens", 0),
167
+ "cache_read_tokens": getattr(message.usage, "cache_read_input_tokens", 0),
168
+ "cache_creation_tokens": getattr(message.usage, "cache_creation_input_tokens", 0),
169
+ }
170
+ self.logger.info(
171
+ "Claude Code usage extracted",
172
+ execution_id=context.execution_id[:8],
173
+ input_tokens=usage["input_tokens"],
174
+ output_tokens=usage["output_tokens"],
175
+ cache_read=usage["cache_read_tokens"],
176
+ )
177
+ else:
178
+ self.logger.warning(
179
+ "Claude Code ResultMessage has no usage",
180
+ execution_id=context.execution_id[:8],
181
+ )
182
+ finish_reason = message.subtype # "success" or "error"
183
+
184
+ # Extract session_id for conversation continuity
185
+ # This will be passed back to enable multi-turn conversations
186
+ session_id = getattr(message, "session_id", None)
187
+ if session_id:
188
+ # Store in metadata to use on next turn
189
+ metadata["claude_code_session_id"] = session_id
190
+ self.logger.info(
191
+ "āœ… Claude Code session captured for conversation continuity",
192
+ execution_id=context.execution_id[:8],
193
+ session_id=session_id[:16],
194
+ message="This session_id will enable multi-turn conversations"
195
+ )
196
+ else:
197
+ self.logger.warning(
198
+ "āš ļø No session_id in ResultMessage - multi-turn may not work",
199
+ execution_id=context.execution_id[:8],
200
+ )
201
+
202
+ self.logger.info(
203
+ "Claude Code execution completed",
204
+ execution_id=context.execution_id[:8],
205
+ finish_reason=finish_reason,
206
+ message_count=message_count,
207
+ response_length=len(response_text),
208
+ tool_count=len(tool_messages),
209
+ tokens=usage.get("total_tokens", 0),
210
+ has_session_id=bool(session_id),
211
+ )
212
+ break
213
+
214
+ elapsed_time = asyncio.get_event_loop().time() - start_time
215
+
216
+ # Merge accumulated metadata (including session_id) with execution stats
217
+ final_metadata = {
218
+ **metadata, # Includes claude_code_session_id if present
219
+ "elapsed_time": elapsed_time,
220
+ "message_count": message_count,
221
+ }
222
+
223
+ return RuntimeExecutionResult(
224
+ response=response_text,
225
+ usage=usage,
226
+ success=finish_reason == "success",
227
+ finish_reason=finish_reason or "stop",
228
+ tool_messages=tool_messages,
229
+ model=context.model_id,
230
+ metadata=final_metadata,
231
+ )
232
+
233
+ except ImportError as e:
234
+ self.logger.error(
235
+ "Claude Code SDK not installed",
236
+ execution_id=context.execution_id,
237
+ error=str(e),
238
+ )
239
+ return RuntimeExecutionResult(
240
+ response="",
241
+ usage={},
242
+ success=False,
243
+ error=f"Claude Code SDK not available: {str(e)}",
244
+ )
245
+
246
+ except asyncio.TimeoutError:
247
+ self.logger.error(
248
+ "Claude Code execution timeout",
249
+ execution_id=context.execution_id,
250
+ elapsed_time=asyncio.get_event_loop().time() - start_time,
251
+ )
252
+ return RuntimeExecutionResult(
253
+ response="",
254
+ usage={},
255
+ success=False,
256
+ error="Execution timeout exceeded",
257
+ )
258
+
259
+ except asyncio.CancelledError:
260
+ self.logger.warning(
261
+ "Claude Code execution cancelled",
262
+ execution_id=context.execution_id,
263
+ )
264
+ raise # Re-raise to propagate cancellation
265
+
266
+ except Exception as e:
267
+ self.logger.error(
268
+ "Claude Code execution failed",
269
+ execution_id=context.execution_id,
270
+ error=str(e),
271
+ error_type=type(e).__name__,
272
+ exc_info=True,
273
+ )
274
+
275
+ return RuntimeExecutionResult(
276
+ response="",
277
+ usage={},
278
+ success=False,
279
+ error=f"{type(e).__name__}: {str(e)}",
280
+ )
281
+
282
+ finally:
283
+ # Ensure cleanup happens
284
+ # Note: We don't call client.disconnect() here because it has
285
+ # asyncio cancel scope issues. The subprocess cleanup happens
286
+ # automatically when the client object is garbage collected.
287
+ if context.execution_id in self._active_clients:
288
+ try:
289
+ del self._active_clients[context.execution_id]
290
+ self.logger.debug(
291
+ "Cleaned up Claude SDK client reference",
292
+ execution_id=context.execution_id,
293
+ )
294
+ except Exception as cleanup_error:
295
+ self.logger.warning(
296
+ "Error during client cleanup",
297
+ execution_id=context.execution_id,
298
+ error=str(cleanup_error),
299
+ )
300
+
301
+ async def _stream_execute_impl(
302
+ self,
303
+ context: RuntimeExecutionContext,
304
+ event_callback: Optional[Callable[[Dict], None]] = None,
305
+ ) -> AsyncIterator[RuntimeExecutionResult]:
306
+ """
307
+ Production-grade streaming execution with Claude Code SDK.
308
+
309
+ This implementation provides:
310
+ - Comprehensive error handling with specific exception types
311
+ - Detailed structured logging at each stage
312
+ - Proper resource cleanup with finally blocks
313
+ - Real-time event callbacks for tool execution
314
+ - Accumulated metrics and metadata tracking
315
+
316
+ Args:
317
+ context: Execution context with prompt, history, config
318
+ event_callback: Optional callback for real-time events
319
+
320
+ Yields:
321
+ RuntimeExecutionResult chunks as they arrive, ending with final metadata
322
+ """
323
+ client = None
324
+ start_time = asyncio.get_event_loop().time()
325
+ chunk_count = 0
326
+
327
+ try:
328
+ from claude_agent_sdk import (
329
+ ClaudeSDKClient,
330
+ AssistantMessage,
331
+ ResultMessage,
332
+ TextBlock,
333
+ ToolUseBlock,
334
+ ToolResultBlock,
335
+ )
336
+
337
+ self.logger.info(
338
+ "Starting Claude Code streaming execution",
339
+ execution_id=context.execution_id,
340
+ model=context.model_id,
341
+ has_history=bool(context.conversation_history),
342
+ has_callback=event_callback is not None,
343
+ )
344
+
345
+ # Build Claude Code options with hooks
346
+ options, active_tools = self._build_claude_options(context, event_callback)
347
+
348
+ self.logger.info(
349
+ "Created Claude Code SDK options",
350
+ execution_id=context.execution_id,
351
+ has_tools=bool(context.skills),
352
+ has_mcp=len(options.mcp_servers) > 0 if hasattr(options, 'mcp_servers') else False,
353
+ has_hooks=bool(options.hooks) if hasattr(options, 'hooks') else False,
354
+ has_event_callback=event_callback is not None,
355
+ )
356
+
357
+ # Create client and manually manage lifecycle to avoid asyncio cancel scope issues
358
+ client = ClaudeSDKClient(options=options)
359
+ await client.connect()
360
+ self._active_clients[context.execution_id] = client
361
+
362
+ # Cache execution metadata
363
+ try:
364
+ self.control_plane.cache_metadata(context.execution_id, "AGENT")
365
+ except Exception as cache_error:
366
+ self.logger.warning(
367
+ "Failed to cache metadata (non-fatal)",
368
+ execution_id=context.execution_id,
369
+ error=str(cache_error),
370
+ )
371
+
372
+ # Send prompt (SDK handles conversation history via session resume)
373
+ # No need to manually inject history - the SDK maintains it via session_id
374
+ prompt = context.prompt
375
+
376
+ self.logger.debug(
377
+ "Sending streaming query to Claude Code SDK",
378
+ execution_id=context.execution_id,
379
+ prompt_length=len(prompt),
380
+ using_session_resume=bool(options.resume),
381
+ )
382
+
383
+ await client.query(prompt)
384
+
385
+ # Stream messages
386
+ accumulated_response = ""
387
+ accumulated_usage = {}
388
+ tool_messages = []
389
+ # active_tools dict is shared with hooks (from _build_claude_options)
390
+ message_count = 0
391
+ received_stream_events = False # Track if we got streaming events
392
+
393
+ # Generate unique message_id for this turn (execution_id + timestamp)
394
+ message_id = f"{context.execution_id}_{int(time.time() * 1000000)}"
395
+
396
+ # Use receive_response() to get messages for this specific query
397
+ # receive_response() yields messages until ResultMessage (completion)
398
+ # This is better than receive_messages() which expects multiple queries
399
+ # Check if verbose debug logging is enabled
400
+ debug_mode = os.getenv("CLAUDE_CODE_DEBUG", "false").lower() == "true"
401
+
402
+ async for message in client.receive_response():
403
+ message_count += 1
404
+ message_type_name = type(message).__name__
405
+
406
+ # Handle StreamEvent messages (these contain partial chunks!)
407
+ if message_type_name == "StreamEvent":
408
+ # StreamEvent has 'event' attribute (not 'data'!) with the partial content
409
+ if hasattr(message, 'event') and message.event:
410
+ event_data = message.event
411
+
412
+ # Extract text from event data
413
+ # The structure is: {'type': 'content_block_delta', 'delta': {'type': 'text_delta', 'text': 'content'}}
414
+ content = None
415
+ if isinstance(event_data, dict):
416
+ event_type = event_data.get('type')
417
+
418
+ # Handle content_block_delta events (these have the actual text!)
419
+ if event_type == 'content_block_delta':
420
+ delta = event_data.get('delta', {})
421
+ if isinstance(delta, dict):
422
+ content = delta.get('text')
423
+ elif isinstance(delta, str):
424
+ content = delta
425
+
426
+ # Fallback: try direct text extraction
427
+ if not content:
428
+ content = event_data.get('text') or event_data.get('content')
429
+
430
+ elif isinstance(event_data, str):
431
+ content = event_data
432
+ elif hasattr(event_data, 'content'):
433
+ content = event_data.content
434
+ elif hasattr(event_data, 'text'):
435
+ content = event_data.text
436
+
437
+ if content:
438
+ received_stream_events = True # Mark that we got streaming chunks
439
+ chunk_count += 1
440
+ accumulated_response += content
441
+
442
+ # Publish event
443
+ if event_callback:
444
+ try:
445
+ event_callback({
446
+ "type": "content_chunk",
447
+ "content": content,
448
+ "message_id": message_id,
449
+ "execution_id": context.execution_id,
450
+ })
451
+ except Exception as callback_error:
452
+ self.logger.warning(
453
+ "StreamEvent callback failed",
454
+ execution_id=context.execution_id,
455
+ error=str(callback_error),
456
+ )
457
+
458
+ # Yield chunk
459
+ yield RuntimeExecutionResult(
460
+ response=content,
461
+ usage={},
462
+ success=True,
463
+ )
464
+ continue # Skip to next message
465
+
466
+ # Handle assistant messages (final complete message)
467
+ if isinstance(message, AssistantMessage):
468
+ for block_idx, block in enumerate(message.content):
469
+ if isinstance(block, TextBlock):
470
+ # Skip sending TextBlock content if we already streamed it via StreamEvents
471
+ if received_stream_events:
472
+ # Still accumulate for final result (in case it wasn't fully streamed)
473
+ # But don't send to callback or yield (would be duplicate)
474
+ continue
475
+
476
+ # Only send if we didn't receive StreamEvents
477
+ chunk_count += 1
478
+ accumulated_response += block.text
479
+
480
+ if event_callback:
481
+ try:
482
+ event_callback(
483
+ {
484
+ "type": "content_chunk",
485
+ "content": block.text,
486
+ "message_id": message_id,
487
+ "execution_id": context.execution_id,
488
+ }
489
+ )
490
+ except Exception as callback_error:
491
+ self.logger.warning(
492
+ "Event callback failed (non-fatal)",
493
+ execution_id=context.execution_id,
494
+ error=str(callback_error),
495
+ )
496
+
497
+ yield RuntimeExecutionResult(
498
+ response=block.text,
499
+ usage={},
500
+ success=True,
501
+ )
502
+
503
+ elif isinstance(block, ToolUseBlock):
504
+ # Tool use event - Store for later lookup
505
+ tool_info = {
506
+ "tool": block.name,
507
+ "input": block.input,
508
+ "tool_use_id": block.id,
509
+ }
510
+ tool_messages.append(tool_info)
511
+ active_tools[block.id] = block.name
512
+
513
+ # NOTE: Don't publish tool_start here - pre-tool hook will publish it
514
+ # Hooks fire before stream processing, so publishing here causes events in wrong order
515
+
516
+ elif isinstance(block, ToolResultBlock):
517
+ # Tool result - Look up tool name from active_tools
518
+ tool_name = active_tools.get(block.tool_use_id, "unknown")
519
+ if tool_name == "unknown":
520
+ self.logger.warning(
521
+ "Could not find tool name for tool_use_id",
522
+ execution_id=context.execution_id,
523
+ tool_use_id=block.tool_use_id,
524
+ active_tools_keys=list(active_tools.keys()),
525
+ )
526
+
527
+ status = "success" if not block.is_error else "failed"
528
+
529
+ # Publish via callback (non-blocking)
530
+ if event_callback:
531
+ try:
532
+ event_callback(
533
+ {
534
+ "type": "tool_complete",
535
+ "tool_name": tool_name,
536
+ "tool_execution_id": block.tool_use_id,
537
+ "status": status,
538
+ "output": str(block.content)[:1000] if block.content else None,
539
+ "error": str(block.content) if block.is_error else None,
540
+ "execution_id": context.execution_id,
541
+ }
542
+ )
543
+ except Exception as callback_error:
544
+ self.logger.error(
545
+ "Tool complete callback failed",
546
+ execution_id=context.execution_id,
547
+ tool_name=tool_name,
548
+ error=str(callback_error),
549
+ exc_info=True,
550
+ )
551
+
552
+ # Handle result message (final)
553
+ elif isinstance(message, ResultMessage):
554
+ if message.usage:
555
+ accumulated_usage = {
556
+ "prompt_tokens": getattr(message.usage, "input_tokens", 0),
557
+ "completion_tokens": getattr(message.usage, "output_tokens", 0),
558
+ "total_tokens": getattr(message.usage, "total_tokens", 0),
559
+ }
560
+
561
+ # Extract session_id for conversation continuity
562
+ session_id = getattr(message, "session_id", None)
563
+ if session_id:
564
+ self.logger.info(
565
+ "āœ… Claude Code session captured for conversation continuity (streaming)",
566
+ execution_id=context.execution_id[:8],
567
+ session_id=session_id[:16],
568
+ )
569
+ else:
570
+ self.logger.warning(
571
+ "āš ļø No session_id in ResultMessage (streaming) - multi-turn may not work",
572
+ execution_id=context.execution_id[:8],
573
+ )
574
+
575
+ elapsed_time = asyncio.get_event_loop().time() - start_time
576
+
577
+ self.logger.info(
578
+ "Claude Code streaming completed",
579
+ execution_id=context.execution_id,
580
+ finish_reason=message.subtype,
581
+ chunk_count=chunk_count,
582
+ message_count=message_count,
583
+ response_length=len(accumulated_response),
584
+ tool_count=len(tool_messages),
585
+ usage=accumulated_usage,
586
+ elapsed_time=f"{elapsed_time:.2f}s",
587
+ has_session_id=bool(session_id),
588
+ )
589
+
590
+ # Final result message
591
+ yield RuntimeExecutionResult(
592
+ response="", # Already streamed
593
+ usage=accumulated_usage,
594
+ success=message.subtype == "success",
595
+ finish_reason=message.subtype,
596
+ tool_messages=tool_messages,
597
+ model=context.model_id,
598
+ metadata={
599
+ "accumulated_response": accumulated_response,
600
+ "elapsed_time": elapsed_time,
601
+ "chunk_count": chunk_count,
602
+ "message_count": message_count,
603
+ "claude_code_session_id": session_id, # Store for next turn
604
+ },
605
+ )
606
+ break
607
+
608
+ except ImportError as e:
609
+ elapsed_time = asyncio.get_event_loop().time() - start_time
610
+ self.logger.error(
611
+ "Claude Code SDK not installed",
612
+ execution_id=context.execution_id,
613
+ error=str(e),
614
+ elapsed_time=f"{elapsed_time:.2f}s",
615
+ )
616
+ yield RuntimeExecutionResult(
617
+ response="",
618
+ usage={},
619
+ success=False,
620
+ error=f"Claude Code SDK not available: {str(e)}",
621
+ )
622
+
623
+ except asyncio.TimeoutError:
624
+ elapsed_time = asyncio.get_event_loop().time() - start_time
625
+ self.logger.error(
626
+ "Claude Code streaming timeout",
627
+ execution_id=context.execution_id,
628
+ elapsed_time=f"{elapsed_time:.2f}s",
629
+ chunks_before_timeout=chunk_count,
630
+ )
631
+ yield RuntimeExecutionResult(
632
+ response="",
633
+ usage={},
634
+ success=False,
635
+ error="Streaming execution timeout exceeded",
636
+ )
637
+
638
+ except asyncio.CancelledError:
639
+ elapsed_time = asyncio.get_event_loop().time() - start_time
640
+ self.logger.warning(
641
+ "Claude Code streaming cancelled",
642
+ execution_id=context.execution_id,
643
+ elapsed_time=f"{elapsed_time:.2f}s",
644
+ chunks_before_cancellation=chunk_count,
645
+ )
646
+ # Yield error result before re-raising
647
+ yield RuntimeExecutionResult(
648
+ response="",
649
+ usage={},
650
+ success=False,
651
+ error="Execution was cancelled",
652
+ )
653
+ raise # Re-raise to propagate cancellation
654
+
655
+ except Exception as e:
656
+ elapsed_time = asyncio.get_event_loop().time() - start_time
657
+ self.logger.error(
658
+ "Claude Code streaming failed",
659
+ execution_id=context.execution_id,
660
+ error=str(e),
661
+ error_type=type(e).__name__,
662
+ elapsed_time=f"{elapsed_time:.2f}s",
663
+ chunks_before_error=chunk_count,
664
+ exc_info=True,
665
+ )
666
+ yield RuntimeExecutionResult(
667
+ response="",
668
+ usage={},
669
+ success=False,
670
+ error=f"{type(e).__name__}: {str(e)}",
671
+ )
672
+
673
+ finally:
674
+ # Ensure cleanup happens regardless of how we exit
675
+ # Note: We don't call client.disconnect() here because it has
676
+ # asyncio cancel scope issues. The subprocess cleanup happens
677
+ # automatically when the client object is garbage collected.
678
+ if context.execution_id in self._active_clients:
679
+ try:
680
+ del self._active_clients[context.execution_id]
681
+ self.logger.debug(
682
+ "Cleaned up Claude SDK client reference",
683
+ execution_id=context.execution_id,
684
+ )
685
+ except Exception as cleanup_error:
686
+ self.logger.warning(
687
+ "Error during streaming client cleanup",
688
+ execution_id=context.execution_id,
689
+ error=str(cleanup_error),
690
+ )
691
+
692
+ async def cancel(self, execution_id: str) -> bool:
693
+ """
694
+ Cancel an in-progress execution via Claude SDK interrupt.
695
+
696
+ Args:
697
+ execution_id: ID of execution to cancel
698
+
699
+ Returns:
700
+ True if cancellation succeeded
701
+ """
702
+ if execution_id in self._active_clients:
703
+ try:
704
+ client = self._active_clients[execution_id]
705
+ await client.interrupt()
706
+ self.logger.info("Claude Code execution interrupted", execution_id=execution_id)
707
+ return True
708
+ except Exception as e:
709
+ self.logger.error(
710
+ "Failed to interrupt Claude Code execution",
711
+ execution_id=execution_id,
712
+ error=str(e),
713
+ )
714
+ return False
715
+ return False
716
+
717
+
718
+ # Private helper methods
719
+
720
+ def _build_claude_options(
721
+ self, context: RuntimeExecutionContext, event_callback: Optional[Callable] = None
722
+ ) -> Any:
723
+ """
724
+ Build ClaudeAgentOptions from execution context.
725
+
726
+ Args:
727
+ context: Execution context
728
+ event_callback: Optional event callback for hooks
729
+
730
+ Returns:
731
+ ClaudeAgentOptions instance
732
+ """
733
+ from claude_agent_sdk import ClaudeAgentOptions
734
+
735
+ # Extract configuration
736
+ agent_config = context.agent_config or {}
737
+ runtime_config = context.runtime_config or {}
738
+
739
+ # Get LiteLLM configuration (same as DefaultRuntime/Agno)
740
+ litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
741
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
742
+
743
+ if not litellm_api_key:
744
+ raise ValueError("LITELLM_API_KEY environment variable not set")
745
+
746
+ # Determine model (use LiteLLM format)
747
+ model = context.model_id or os.environ.get(
748
+ "LITELLM_DEFAULT_MODEL", "kubiya/claude-sonnet-4"
749
+ )
750
+
751
+ # Map skills to Claude Code tool names
752
+ allowed_tools = self._map_skills_to_tools(context.skills)
753
+
754
+ # Build MCP servers (both from context and custom skills)
755
+ mcp_servers, mcp_tool_names = self._build_mcp_servers(context)
756
+
757
+ # Add MCP tool names to allowed_tools so they have permission to execute
758
+ # This grants permission to all MCP server tools (e.g., workflow executor tools)
759
+ allowed_tools.extend(mcp_tool_names)
760
+
761
+ # Allow explicit MCP tool names from runtime_config (workaround for external MCP servers)
762
+ # Usage: runtime_config = {"explicit_mcp_tools": ["mcp__check_prod_health", "other_tool"]}
763
+ explicit_mcp_tools = runtime_config.get("explicit_mcp_tools", [])
764
+ if explicit_mcp_tools:
765
+ allowed_tools.extend(explicit_mcp_tools)
766
+ self.logger.info(
767
+ "Added explicit MCP tools from runtime_config",
768
+ explicit_tools_count=len(explicit_mcp_tools),
769
+ tools=explicit_mcp_tools,
770
+ )
771
+
772
+ self.logger.info(
773
+ "āœ… Final allowed_tools list configured",
774
+ total_count=len(allowed_tools),
775
+ builtin_tools_count=len(allowed_tools) - len(mcp_tool_names) - len(explicit_mcp_tools),
776
+ mcp_tools_count=len(mcp_tool_names),
777
+ explicit_mcp_tools_count=len(explicit_mcp_tools),
778
+ all_tools=allowed_tools[:50], # Limit to 50 for readability
779
+ truncated=len(allowed_tools) > 50,
780
+ )
781
+
782
+ # If there are MCP servers and we have low confidence in tool extraction
783
+ if context.mcp_servers and len(mcp_tool_names) < len(context.mcp_servers) * 2:
784
+ self.logger.warning(
785
+ "āš ļø LOW MCP TOOL CONFIDENCE - If you get permission errors, add to runtime_config:",
786
+ mcp_servers_count=len(context.mcp_servers),
787
+ mcp_tools_added=len(mcp_tool_names),
788
+ example_config={
789
+ "explicit_mcp_tools": [
790
+ "mcp__your_server_name__your_tool_name",
791
+ "# Example: mcp__check_prod_health__status"
792
+ ]
793
+ },
794
+ message="See Claude's error message for the exact tool name it's trying to use"
795
+ )
796
+
797
+ # Create shared active_tools dict for tool name tracking
798
+ # This is populated in the stream when ToolUseBlock is received,
799
+ # and used in hooks to look up tool names
800
+ active_tools: Dict[str, str] = {}
801
+
802
+ # Build hooks for tool execution monitoring
803
+ hooks = self._build_hooks(context, event_callback, active_tools) if event_callback else {}
804
+
805
+ # Build environment with LiteLLM configuration
806
+ env = runtime_config.get("env", {}).copy()
807
+
808
+ # Configure Claude Code SDK to use LiteLLM proxy
809
+ # The SDK respects ANTHROPIC_BASE_URL and ANTHROPIC_API_KEY
810
+ env["ANTHROPIC_BASE_URL"] = litellm_api_base
811
+ env["ANTHROPIC_API_KEY"] = litellm_api_key
812
+
813
+ # Pass Kubiya API credentials for workflow execution
814
+ # Workflow executor tools need these to execute workflows remotely
815
+ kubiya_api_key = os.environ.get("KUBIYA_API_KEY")
816
+ if kubiya_api_key:
817
+ env["KUBIYA_API_KEY"] = kubiya_api_key
818
+ self.logger.debug("Added KUBIYA_API_KEY to Claude Code subprocess environment")
819
+
820
+ kubiya_api_base = os.environ.get("KUBIYA_API_BASE")
821
+ if kubiya_api_base:
822
+ env["KUBIYA_API_BASE"] = kubiya_api_base
823
+ self.logger.debug(f"Added KUBIYA_API_BASE to Claude Code subprocess environment: {kubiya_api_base}")
824
+
825
+ # Get session_id from previous turn for conversation continuity
826
+ # Session IDs are stored in user_metadata from previous executions
827
+ # NOTE: conversation_history is often empty because Agno/Claude Code manages history via session_id
828
+ # So we check user_metadata directly instead of requiring conversation_history to be populated
829
+ previous_session_id = None
830
+ if context.user_metadata:
831
+ previous_session_id = context.user_metadata.get("claude_code_session_id")
832
+
833
+ self.logger.info(
834
+ "Building Claude Code options",
835
+ has_user_metadata=bool(context.user_metadata),
836
+ has_session_id_in_metadata=bool(previous_session_id),
837
+ previous_session_id=previous_session_id[:16] if previous_session_id else None,
838
+ will_resume=bool(previous_session_id),
839
+ )
840
+
841
+ # Build options
842
+ options = ClaudeAgentOptions(
843
+ system_prompt=context.system_prompt,
844
+ allowed_tools=allowed_tools,
845
+ mcp_servers=mcp_servers,
846
+ permission_mode=runtime_config.get("permission_mode", "acceptEdits"),
847
+ cwd=agent_config.get("cwd") or runtime_config.get("cwd"),
848
+ model=model,
849
+ env=env,
850
+ max_turns=runtime_config.get("max_turns"),
851
+ hooks=hooks,
852
+ setting_sources=[], # Explicit: don't load filesystem settings
853
+ include_partial_messages=True, # Enable character-by-character streaming
854
+ resume=previous_session_id, # Resume previous conversation if available
855
+ )
856
+
857
+ # DEBUG: Verify the option is set
858
+ self.logger.info(
859
+ "šŸ”§ Claude Code options configured",
860
+ include_partial_messages=getattr(options, 'include_partial_messages', 'NOT SET'),
861
+ permission_mode=options.permission_mode,
862
+ model=options.model,
863
+ )
864
+
865
+ # Return both options and the shared active_tools dict for tool name tracking
866
+ return options, active_tools
867
+
868
+ def _build_prompt_with_history(self, context: RuntimeExecutionContext) -> str:
869
+ """
870
+ Build prompt with conversation history.
871
+
872
+ Since ClaudeSDKClient maintains session continuity, we include
873
+ the conversation history as context in the prompt.
874
+
875
+ Args:
876
+ context: Execution context
877
+
878
+ Returns:
879
+ Prompt string with history context
880
+ """
881
+ if not context.conversation_history:
882
+ return context.prompt
883
+
884
+ # Build context from history
885
+ history_context = "Previous conversation:\n"
886
+ for msg in context.conversation_history[-10:]: # Last 10 messages
887
+ role = msg.get("role", "unknown")
888
+ content = msg.get("content", "")
889
+ history_context += f"{role.capitalize()}: {content[:200]}...\n" if len(content) > 200 else f"{role.capitalize()}: {content}\n"
890
+
891
+ return f"{history_context}\n\nCurrent request:\n{context.prompt}"
892
+
893
+ def _map_skills_to_tools(self, skills: list) -> list:
894
+ """
895
+ Map skills to Claude Code tool names.
896
+
897
+ This function translates our generic skill types to the specific
898
+ tool names that Claude Code understands.
899
+
900
+ Args:
901
+ skills: List of skill objects
902
+
903
+ Returns:
904
+ List of Claude Code tool names
905
+ """
906
+ # Skill type to Claude Code tool mapping
907
+ tool_mapping = {
908
+ "shell": ["Bash", "BashOutput", "KillShell"],
909
+ "file_system": ["Read", "Write", "Edit", "Glob", "Grep"],
910
+ "web": ["WebFetch", "WebSearch"],
911
+ "docker": ["Bash"], # Docker commands via Bash
912
+ "kubernetes": ["Bash"], # kubectl via Bash
913
+ "git": ["Bash"], # git commands via Bash
914
+ "task": ["Task"], # Subagent tasks
915
+ "notebook": ["NotebookEdit"],
916
+ "planning": ["TodoWrite", "ExitPlanMode"],
917
+ }
918
+
919
+ tools = []
920
+ for skill in skills:
921
+ # Get skill type
922
+ skill_type = None
923
+ if hasattr(skill, "type"):
924
+ skill_type = skill.type
925
+ elif isinstance(skill, dict):
926
+ skill_type = skill.get("type")
927
+
928
+ # Map to Claude Code tools
929
+ if skill_type and skill_type in tool_mapping:
930
+ tools.extend(tool_mapping[skill_type])
931
+
932
+ # Deduplicate and add default tools if none specified
933
+ unique_tools = list(set(tools)) if tools else ["Read", "Write", "Bash"]
934
+
935
+ self.logger.info(
936
+ "Mapped skills to Claude Code tools",
937
+ skill_count=len(skills),
938
+ tool_count=len(unique_tools),
939
+ tools=unique_tools,
940
+ )
941
+
942
+ return unique_tools
943
+
944
+ def _extract_mcp_tool_names(self, server_name: str, server_obj: Any) -> list[str]:
945
+ """
946
+ Extract tool names from an MCP server object.
947
+
948
+ MCP servers can have different structures, so we try multiple approaches:
949
+ 1. Check for 'tools' attribute (list of tool objects/dicts)
950
+ 2. Check for 'list_tools()' method
951
+ 3. Check if it's a dict with 'tools' key
952
+ 4. Use naming convention: mcp__<server_name> for the server itself
953
+
954
+ Args:
955
+ server_name: Name of the MCP server
956
+ server_obj: MCP server object (could be various types)
957
+
958
+ Returns:
959
+ List of tool names that should be added to allowed_tools
960
+ """
961
+ tool_names = []
962
+
963
+ try:
964
+ # Approach 1: Check if server has a 'tools' attribute (list)
965
+ if hasattr(server_obj, 'tools'):
966
+ tools_attr = getattr(server_obj, 'tools')
967
+ if isinstance(tools_attr, list):
968
+ for tool in tools_attr:
969
+ # Tool might be an object with 'name' attribute
970
+ if hasattr(tool, 'name'):
971
+ tool_names.append(tool.name)
972
+ # Or a dict with 'name' key
973
+ elif isinstance(tool, dict) and 'name' in tool:
974
+ tool_names.append(tool['name'])
975
+ # Or a callable with __name__
976
+ elif callable(tool) and hasattr(tool, '__name__'):
977
+ tool_names.append(tool.__name__)
978
+
979
+ # Approach 2: Check if server has a list_tools() method
980
+ elif hasattr(server_obj, 'list_tools') and callable(getattr(server_obj, 'list_tools')):
981
+ try:
982
+ tools_list = server_obj.list_tools()
983
+ if isinstance(tools_list, list):
984
+ for tool in tools_list:
985
+ if isinstance(tool, str):
986
+ tool_names.append(tool)
987
+ elif isinstance(tool, dict) and 'name' in tool:
988
+ tool_names.append(tool['name'])
989
+ elif hasattr(tool, 'name'):
990
+ tool_names.append(tool.name)
991
+ except Exception as e:
992
+ self.logger.debug(
993
+ f"Failed to call list_tools() on MCP server {server_name}: {e}"
994
+ )
995
+
996
+ # Approach 3: Check if it's a dict with 'tools' key
997
+ elif isinstance(server_obj, dict) and 'tools' in server_obj:
998
+ tools_list = server_obj['tools']
999
+ if isinstance(tools_list, list):
1000
+ for tool in tools_list:
1001
+ if isinstance(tool, str):
1002
+ tool_names.append(tool)
1003
+ elif isinstance(tool, dict) and 'name' in tool:
1004
+ tool_names.append(tool['name'])
1005
+
1006
+ # Approach 4: For external MCP servers, tools often follow pattern mcp__<server_name>__<tool_name>
1007
+ # But without knowing the tool names, we can't construct them
1008
+ # So we'll just log that we couldn't extract tools
1009
+
1010
+ if tool_names:
1011
+ self.logger.debug(
1012
+ f"Extracted {len(tool_names)} tools from MCP server '{server_name}'",
1013
+ tools=tool_names
1014
+ )
1015
+ else:
1016
+ self.logger.debug(
1017
+ f"Could not extract tool names from MCP server '{server_name}' using standard approaches"
1018
+ )
1019
+
1020
+ except Exception as e:
1021
+ self.logger.error(
1022
+ f"Error extracting tools from MCP server '{server_name}': {e}",
1023
+ exc_info=True
1024
+ )
1025
+
1026
+ return tool_names
1027
+
1028
+ def _build_mcp_servers(self, context: RuntimeExecutionContext) -> tuple[Dict[str, Any], list[str]]:
1029
+ """
1030
+ Build MCP server configurations from context and custom skills.
1031
+
1032
+ This converts our skills into Claude Code MCP servers for custom tools.
1033
+ Handles both legacy get_tools() and Toolkit.functions patterns.
1034
+
1035
+ Args:
1036
+ context: Execution context
1037
+
1038
+ Returns:
1039
+ Tuple of (MCP server configurations dict, list of all MCP tool names)
1040
+ """
1041
+ from claude_agent_sdk import create_sdk_mcp_server, tool as mcp_tool
1042
+ import asyncio
1043
+
1044
+ mcp_servers = {}
1045
+ all_mcp_tool_names = [] # Track all tool names across all MCP servers
1046
+
1047
+ # Include MCP servers from context (if any)
1048
+ if context.mcp_servers:
1049
+ self.logger.info(
1050
+ "Processing MCP servers from context",
1051
+ server_count=len(context.mcp_servers),
1052
+ server_names=list(context.mcp_servers.keys()),
1053
+ )
1054
+
1055
+ for server_name, server_obj in context.mcp_servers.items():
1056
+ mcp_servers[server_name] = server_obj
1057
+
1058
+ # Try to extract tool names from the MCP server object
1059
+ # MCP servers may have different structures, so we try multiple approaches
1060
+ extracted_tools = self._extract_mcp_tool_names(server_name, server_obj)
1061
+ if extracted_tools:
1062
+ # Construct full MCP tool names: mcp__<server_name>__<tool_name>
1063
+ # This is the format Claude Code expects in allowed_tools
1064
+ # IMPORTANT: Sanitize names by replacing spaces with underscores
1065
+ full_tool_names = []
1066
+ for tool_name in extracted_tools:
1067
+ # If tool already has mcp__ prefix, use as-is
1068
+ if tool_name.startswith("mcp__"):
1069
+ full_tool_names.append(tool_name)
1070
+ else:
1071
+ # Sanitize names: replace spaces with underscores
1072
+ sanitized_server = server_name.replace(" ", "_")
1073
+ sanitized_tool = tool_name.replace(" ", "_")
1074
+ # Construct: mcp__<server_name>__<tool_name>
1075
+ full_tool_name = f"mcp__{sanitized_server}__{sanitized_tool}"
1076
+ full_tool_names.append(full_tool_name)
1077
+
1078
+ all_mcp_tool_names.extend(full_tool_names)
1079
+ self.logger.info(
1080
+ "Extracted and constructed MCP tool names",
1081
+ server_name=server_name,
1082
+ raw_tool_count=len(extracted_tools),
1083
+ raw_tools=extracted_tools,
1084
+ full_tool_names=full_tool_names,
1085
+ )
1086
+ else:
1087
+ # If we can't extract tools, try comprehensive fallback patterns
1088
+ # MCP servers can expose tools in various ways - try all common patterns
1089
+ # IMPORTANT: Sanitize by replacing spaces with underscores
1090
+ sanitized_server = server_name.replace(" ", "_")
1091
+
1092
+ fallback_tools = [
1093
+ f"mcp__{sanitized_server}", # Pattern: mcp__<server_name>
1094
+ sanitized_server, # Pattern: <server_name> (raw)
1095
+ server_name, # Pattern: <server_name> (unsanitized)
1096
+ ]
1097
+
1098
+ # If server name already has mcp__ prefix, also try without prefix
1099
+ if server_name.startswith("mcp__"):
1100
+ clean_name = server_name[5:] # Remove "mcp__" prefix
1101
+ fallback_tools.append(clean_name)
1102
+ fallback_tools.append(f"mcp__{clean_name}")
1103
+ else:
1104
+ # Also try with mcp__ prefix prepended
1105
+ fallback_tools.append(f"mcp__{sanitized_server}")
1106
+
1107
+ # Try common tool name patterns for this server (sanitized)
1108
+ common_tool_names = ["check", "status", "health", "run", "execute"]
1109
+ for tool_name in common_tool_names:
1110
+ fallback_tools.append(f"mcp__{sanitized_server}__{tool_name}")
1111
+
1112
+ # Deduplicate
1113
+ fallback_tools = list(set(fallback_tools))
1114
+ all_mcp_tool_names.extend(fallback_tools)
1115
+
1116
+ # Log warning with comprehensive debug info
1117
+ self.logger.warning(
1118
+ "āš ļø Could not extract tool names from MCP server - using COMPREHENSIVE fallback patterns",
1119
+ server_name=server_name,
1120
+ fallback_tools_count=len(fallback_tools),
1121
+ fallback_tools=fallback_tools,
1122
+ server_type=type(server_obj).__name__,
1123
+ has_tools_attr=hasattr(server_obj, 'tools'),
1124
+ has_list_tools=hasattr(server_obj, 'list_tools'),
1125
+ is_dict=isinstance(server_obj, dict),
1126
+ dict_keys=list(server_obj.keys()) if isinstance(server_obj, dict) else None,
1127
+ object_attrs=dir(server_obj)[:20] if hasattr(server_obj, '__dict__') else None,
1128
+ message="āš ļø IMPORTANT: Check logs after execution - if still getting permission errors, add exact tool names to runtime_config.explicit_mcp_tools"
1129
+ )
1130
+
1131
+ # Convert custom skills to MCP servers
1132
+ for skill in context.skills:
1133
+ tools_list = []
1134
+ registered_tool_names = [] # Track tool names for logging
1135
+ skill_name = getattr(skill, "name", "custom_skill")
1136
+
1137
+ # Check for Toolkit pattern (has .functions attribute)
1138
+ if hasattr(skill, "functions") and hasattr(skill.functions, 'items'):
1139
+ self.logger.info(
1140
+ "Found skill with registered functions",
1141
+ skill_name=skill_name,
1142
+ function_count=len(skill.functions),
1143
+ function_names=list(skill.functions.keys()),
1144
+ )
1145
+
1146
+ # Extract tools from functions registry
1147
+ for func_name, func_obj in skill.functions.items():
1148
+ # Skip helper tools for workflow_executor skills to avoid confusion
1149
+ # Only expose the main workflow tool(s), not list_all_workflows or get_workflow_info
1150
+ if func_name in ["list_all_workflows", "get_workflow_info"]:
1151
+ self.logger.debug(
1152
+ "Skipping helper tool for workflow_executor skill",
1153
+ skill_name=skill_name,
1154
+ tool_name=func_name,
1155
+ )
1156
+ continue
1157
+
1158
+ # Get entrypoint (the actual callable)
1159
+ entrypoint = getattr(func_obj, 'entrypoint', None)
1160
+ if not entrypoint:
1161
+ self.logger.warning(
1162
+ "Function missing entrypoint",
1163
+ skill_name=skill_name,
1164
+ function_name=func_name,
1165
+ )
1166
+ continue
1167
+
1168
+ # Get function metadata - use function name as-is
1169
+ tool_name = func_name
1170
+ tool_description = getattr(func_obj, 'description', None) or entrypoint.__doc__ or f"{tool_name} tool"
1171
+ tool_parameters = getattr(func_obj, 'parameters', {})
1172
+
1173
+ # Create a closure that captures the entrypoint with proper variable scope
1174
+ def make_tool_wrapper(tool_entrypoint, tool_func_name, tool_func_description, tool_func_parameters, tool_skill_name):
1175
+ """Factory to create tool wrappers with proper closure"""
1176
+ @mcp_tool(tool_func_name, tool_func_description, tool_func_parameters)
1177
+ async def wrapped_tool(args: dict) -> dict:
1178
+ try:
1179
+ self.logger.debug(
1180
+ "Executing workflow tool",
1181
+ tool_name=tool_func_name,
1182
+ args=args,
1183
+ )
1184
+ # Call the entrypoint with unpacked args
1185
+ if asyncio.iscoroutinefunction(tool_entrypoint):
1186
+ result = await tool_entrypoint(**args) if args else await tool_entrypoint()
1187
+ self.logger.info(
1188
+ "Async workflow tool completed successfully",
1189
+ tool_name=tool_func_name,
1190
+ result_length=len(str(result)),
1191
+ result_preview=str(result)[:500] if result else "(empty)"
1192
+ )
1193
+ else:
1194
+ # āœ… Run synchronous tools in thread pool to avoid blocking event loop
1195
+ # This is critical for tools that do blocking I/O (like streaming HTTP requests)
1196
+ result = await asyncio.to_thread(
1197
+ lambda: tool_entrypoint(**args) if args else tool_entrypoint()
1198
+ )
1199
+
1200
+ self.logger.info(
1201
+ "Workflow tool completed successfully",
1202
+ tool_name=tool_func_name,
1203
+ result_length=len(str(result)),
1204
+ result_preview=str(result)[:500] if result else "(empty)"
1205
+ )
1206
+
1207
+ return {
1208
+ "content": [{
1209
+ "type": "text",
1210
+ "text": str(result)
1211
+ }]
1212
+ }
1213
+ except Exception as e:
1214
+ self.logger.error(
1215
+ "Workflow tool execution failed",
1216
+ tool_name=tool_func_name,
1217
+ error=str(e),
1218
+ exc_info=True,
1219
+ )
1220
+ return {
1221
+ "content": [{
1222
+ "type": "text",
1223
+ "text": f"Error executing {tool_func_name}: {str(e)}"
1224
+ }],
1225
+ "isError": True
1226
+ }
1227
+ return wrapped_tool
1228
+
1229
+ wrapped_tool = make_tool_wrapper(entrypoint, tool_name, tool_description, tool_parameters, skill_name)
1230
+ tools_list.append(wrapped_tool)
1231
+ registered_tool_names.append(tool_name)
1232
+
1233
+ # Construct full MCP tool name for allowed_tools: mcp__<server>__<tool>
1234
+ # IMPORTANT: Replace spaces with underscores to match Claude Code SDK sanitization
1235
+ sanitized_skill_name = skill_name.replace(" ", "_")
1236
+ sanitized_tool_name = tool_name.replace(" ", "_")
1237
+
1238
+ # If tool name matches skill name, don't duplicate (e.g., mcp__run_ado_test not mcp__run_ado_test__run_ado_test)
1239
+ if sanitized_tool_name == sanitized_skill_name:
1240
+ full_mcp_tool_name = f"mcp__{sanitized_skill_name}"
1241
+ else:
1242
+ full_mcp_tool_name = f"mcp__{sanitized_skill_name}__{sanitized_tool_name}"
1243
+
1244
+ all_mcp_tool_names.append(full_mcp_tool_name) # Track for allowed_tools
1245
+
1246
+ self.logger.info(
1247
+ "Registered MCP tool from skill function",
1248
+ skill_name=skill_name,
1249
+ tool_name=tool_name,
1250
+ full_mcp_tool_name=full_mcp_tool_name,
1251
+ )
1252
+
1253
+ # Legacy: Check if skill has get_tools() method
1254
+ elif hasattr(skill, "get_tools"):
1255
+ for tool_func in skill.get_tools():
1256
+ # Wrap each tool function with MCP tool decorator
1257
+ tool_name = getattr(tool_func, "__name__", "custom_tool")
1258
+ tool_description = getattr(tool_func, "__doc__", f"{tool_name} tool")
1259
+
1260
+ # Create MCP tool wrapper
1261
+ @mcp_tool(tool_name, tool_description, {})
1262
+ async def wrapped_tool(args: dict) -> dict:
1263
+ # āœ… Run synchronous tools in thread pool to avoid blocking event loop
1264
+ if asyncio.iscoroutinefunction(tool_func):
1265
+ result = await tool_func(**args) if args else await tool_func()
1266
+ else:
1267
+ result = await asyncio.to_thread(
1268
+ lambda: tool_func(**args) if args else tool_func()
1269
+ )
1270
+ return {
1271
+ "content": [{
1272
+ "type": "text",
1273
+ "text": str(result)
1274
+ }]
1275
+ }
1276
+
1277
+ tools_list.append(wrapped_tool)
1278
+ registered_tool_names.append(tool_name)
1279
+
1280
+ # Construct full MCP tool name for allowed_tools: mcp__<server>__<tool>
1281
+ # IMPORTANT: Replace spaces with underscores to match Claude Code SDK sanitization
1282
+ sanitized_skill_name = skill_name.replace(" ", "_")
1283
+ sanitized_tool_name = tool_name.replace(" ", "_")
1284
+
1285
+ # If tool name matches skill name, don't duplicate (e.g., mcp__run_ado_test not mcp__run_ado_test__run_ado_test)
1286
+ if sanitized_tool_name == sanitized_skill_name:
1287
+ full_mcp_tool_name = f"mcp__{sanitized_skill_name}"
1288
+ else:
1289
+ full_mcp_tool_name = f"mcp__{sanitized_skill_name}__{sanitized_tool_name}"
1290
+
1291
+ all_mcp_tool_names.append(full_mcp_tool_name) # Track for allowed_tools
1292
+
1293
+ # Create MCP server for this skill if it has tools
1294
+ if tools_list:
1295
+ # Use clean server name
1296
+ server_name = skill_name
1297
+
1298
+ mcp_servers[server_name] = create_sdk_mcp_server(
1299
+ name=server_name,
1300
+ version="1.0.0",
1301
+ tools=tools_list
1302
+ )
1303
+
1304
+ self.logger.info(
1305
+ "Created MCP server for skill",
1306
+ skill_name=skill_name,
1307
+ server_name=server_name,
1308
+ tool_count=len(tools_list),
1309
+ )
1310
+
1311
+ self.logger.info(
1312
+ "Built MCP servers",
1313
+ server_count=len(mcp_servers),
1314
+ servers=list(mcp_servers.keys()),
1315
+ mcp_tool_count=len(all_mcp_tool_names),
1316
+ mcp_tools=all_mcp_tool_names[:10] if len(all_mcp_tool_names) > 10 else all_mcp_tool_names,
1317
+ )
1318
+
1319
+ return mcp_servers, all_mcp_tool_names
1320
+
1321
+ def _build_hooks(
1322
+ self, context: RuntimeExecutionContext, event_callback: Callable, active_tools: Dict[str, str]
1323
+ ) -> Dict[str, Any]:
1324
+ """
1325
+ Build hooks for tool execution monitoring.
1326
+
1327
+ Hooks intercept events like PreToolUse and PostToolUse to provide
1328
+ real-time feedback and monitoring. Since Claude Code SDK doesn't send
1329
+ ToolResultBlock in the stream, hooks are the only place to publish tool_completed events.
1330
+
1331
+ Args:
1332
+ context: Execution context
1333
+ event_callback: Callback for publishing events
1334
+ active_tools: Shared dict mapping tool_use_id -> tool_name (populated from ToolUseBlock)
1335
+
1336
+ Returns:
1337
+ Dict of hook configurations
1338
+ """
1339
+ from claude_agent_sdk import HookMatcher
1340
+
1341
+ execution_id = context.execution_id
1342
+
1343
+ async def pre_tool_hook(input_data, tool_use_id, tool_context):
1344
+ """Hook called before tool execution"""
1345
+ # ALWAYS debug to see what's available in pre-tool hook
1346
+ print(f"\nšŸŖ Pre-Tool Hook DEBUG:")
1347
+ print(f" Tool Use ID: {tool_use_id}")
1348
+ print(f" input_data type: {type(input_data)}")
1349
+ print(f" input_data keys: {list(input_data.keys()) if isinstance(input_data, dict) else 'not a dict'}")
1350
+ print(f" input_data: {str(input_data)[:500]}")
1351
+ print(f" tool_context: {tool_context}")
1352
+
1353
+ # Try to extract tool name from input_data (similar to post-tool hook)
1354
+ tool_name = "unknown"
1355
+ tool_args = {}
1356
+
1357
+ if isinstance(input_data, dict):
1358
+ # Check if input_data has tool_name like output_data does
1359
+ tool_name = input_data.get("tool_name", "unknown")
1360
+ tool_args = input_data.get("tool_input", {})
1361
+
1362
+ if tool_name == "unknown":
1363
+ print(f" āŒ No tool_name in input_data")
1364
+ else:
1365
+ print(f" āœ… Found tool_name: {tool_name}")
1366
+
1367
+ # Publish tool_start event
1368
+ if event_callback and tool_name != "unknown":
1369
+ try:
1370
+ event_callback({
1371
+ "type": "tool_start",
1372
+ "tool_name": tool_name,
1373
+ "tool_args": tool_args,
1374
+ "tool_execution_id": tool_use_id,
1375
+ "execution_id": execution_id,
1376
+ })
1377
+ print(f" āœ… Published tool_start event")
1378
+ except Exception as e:
1379
+ self.logger.error(
1380
+ "Failed to publish tool_start",
1381
+ tool_name=tool_name,
1382
+ error=str(e),
1383
+ exc_info=True
1384
+ )
1385
+ print(f" āŒ Failed to publish: {e}")
1386
+
1387
+ return {}
1388
+
1389
+ async def post_tool_hook(output_data, tool_use_id, tool_context):
1390
+ """Hook called after tool execution"""
1391
+ # Extract tool name from output_data (provided by Claude Code SDK)
1392
+ tool_name = "unknown"
1393
+ if isinstance(output_data, dict):
1394
+ # Claude SDK provides tool_name directly in output_data
1395
+ tool_name = output_data.get("tool_name", "unknown")
1396
+
1397
+ is_error = tool_context.get("is_error", False)
1398
+
1399
+ # Debug mode logging
1400
+ debug_mode = os.getenv("CLAUDE_CODE_DEBUG", "false").lower() == "true"
1401
+ if debug_mode:
1402
+ print(f"\nšŸŖ Post-Tool Hook:")
1403
+ print(f" Tool: {tool_name}")
1404
+ print(f" Status: {'failed' if is_error else 'success'}")
1405
+
1406
+ # Publish tool_complete event (hooks are the ONLY place for Claude Code)
1407
+ # ToolResultBlock doesn't appear in Claude Code streams
1408
+ if event_callback:
1409
+ try:
1410
+ event_callback({
1411
+ "type": "tool_complete",
1412
+ "tool_name": tool_name,
1413
+ "tool_execution_id": tool_use_id,
1414
+ "status": "failed" if is_error else "success",
1415
+ "output": str(output_data)[:1000] if output_data else None,
1416
+ "error": str(output_data) if is_error else None,
1417
+ "execution_id": execution_id,
1418
+ })
1419
+ except Exception as e:
1420
+ self.logger.error(
1421
+ "Failed to publish tool_complete",
1422
+ tool_name=tool_name,
1423
+ error=str(e),
1424
+ exc_info=True
1425
+ )
1426
+
1427
+ return {}
1428
+
1429
+ # Build hook configuration
1430
+ hooks = {
1431
+ "PreToolUse": [HookMatcher(hooks=[pre_tool_hook])],
1432
+ "PostToolUse": [HookMatcher(hooks=[post_tool_hook])],
1433
+ }
1434
+
1435
+ return hooks
1436
+
1437
+ def _get_sdk_version(self) -> str:
1438
+ """Get Claude Code SDK version."""
1439
+ try:
1440
+ import claude_agent_sdk
1441
+ return getattr(claude_agent_sdk, "__version__", "unknown")
1442
+ except:
1443
+ return "unknown"