massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (268) hide show
  1. massgen/__init__.py +142 -8
  2. massgen/adapters/__init__.py +29 -0
  3. massgen/adapters/ag2_adapter.py +483 -0
  4. massgen/adapters/base.py +183 -0
  5. massgen/adapters/tests/__init__.py +0 -0
  6. massgen/adapters/tests/test_ag2_adapter.py +439 -0
  7. massgen/adapters/tests/test_agent_adapter.py +128 -0
  8. massgen/adapters/utils/__init__.py +2 -0
  9. massgen/adapters/utils/ag2_utils.py +236 -0
  10. massgen/adapters/utils/tests/__init__.py +0 -0
  11. massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
  12. massgen/agent_config.py +329 -55
  13. massgen/api_params_handler/__init__.py +10 -0
  14. massgen/api_params_handler/_api_params_handler_base.py +99 -0
  15. massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
  16. massgen/api_params_handler/_claude_api_params_handler.py +113 -0
  17. massgen/api_params_handler/_response_api_params_handler.py +130 -0
  18. massgen/backend/__init__.py +39 -4
  19. massgen/backend/azure_openai.py +385 -0
  20. massgen/backend/base.py +341 -69
  21. massgen/backend/base_with_mcp.py +1102 -0
  22. massgen/backend/capabilities.py +386 -0
  23. massgen/backend/chat_completions.py +577 -130
  24. massgen/backend/claude.py +1033 -537
  25. massgen/backend/claude_code.py +1203 -0
  26. massgen/backend/cli_base.py +209 -0
  27. massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
  28. massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
  29. massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
  30. massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
  31. massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
  32. massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
  33. massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
  34. massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
  35. massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
  36. massgen/backend/docs/inference_backend.md +257 -0
  37. massgen/backend/docs/permissions_and_context_files.md +1085 -0
  38. massgen/backend/external.py +126 -0
  39. massgen/backend/gemini.py +1850 -241
  40. massgen/backend/grok.py +40 -156
  41. massgen/backend/inference.py +156 -0
  42. massgen/backend/lmstudio.py +171 -0
  43. massgen/backend/response.py +1095 -322
  44. massgen/chat_agent.py +131 -113
  45. massgen/cli.py +1560 -275
  46. massgen/config_builder.py +2396 -0
  47. massgen/configs/BACKEND_CONFIGURATION.md +458 -0
  48. massgen/configs/README.md +559 -216
  49. massgen/configs/ag2/ag2_case_study.yaml +27 -0
  50. massgen/configs/ag2/ag2_coder.yaml +34 -0
  51. massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
  52. massgen/configs/ag2/ag2_gemini.yaml +27 -0
  53. massgen/configs/ag2/ag2_groupchat.yaml +108 -0
  54. massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
  55. massgen/configs/ag2/ag2_single_agent.yaml +21 -0
  56. massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
  57. massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
  58. massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
  59. massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
  60. massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
  61. massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
  62. massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
  63. massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
  64. massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
  65. massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
  66. massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
  67. massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
  68. massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
  69. massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
  70. massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
  71. massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
  72. massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
  73. massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
  74. massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
  75. massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
  76. massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
  77. massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
  78. massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
  79. massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
  80. massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
  81. massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
  82. massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
  83. massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
  84. massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
  85. massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
  86. massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
  87. massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
  88. massgen/configs/debug/skip_coordination_test.yaml +27 -0
  89. massgen/configs/debug/test_sdk_migration.yaml +17 -0
  90. massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
  91. massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
  92. massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
  93. massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
  94. massgen/configs/providers/claude/claude.yaml +14 -0
  95. massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
  96. massgen/configs/providers/local/lmstudio.yaml +11 -0
  97. massgen/configs/providers/openai/gpt5.yaml +46 -0
  98. massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
  99. massgen/configs/providers/others/grok_single_agent.yaml +19 -0
  100. massgen/configs/providers/others/zai_coding_team.yaml +108 -0
  101. massgen/configs/providers/others/zai_glm45.yaml +12 -0
  102. massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
  103. massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
  104. massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
  105. massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
  106. massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
  107. massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
  108. massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
  109. massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
  110. massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
  111. massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
  112. massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
  113. massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
  114. massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
  115. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
  116. massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
  117. massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
  118. massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
  119. massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
  120. massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
  121. massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
  122. massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
  123. massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
  124. massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
  125. massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
  126. massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
  127. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
  128. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
  129. massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
  130. massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
  131. massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
  132. massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
  133. massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
  134. massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
  135. massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
  136. massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
  137. massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
  138. massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
  139. massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
  140. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
  141. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
  142. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
  143. massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
  144. massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
  145. massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
  146. massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
  147. massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
  148. massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
  149. massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
  150. massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
  151. massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
  152. massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
  153. massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
  154. massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
  155. massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
  156. massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
  157. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
  158. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
  159. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
  160. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
  161. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
  162. massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
  163. massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
  164. massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
  165. massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
  166. massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
  167. massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
  168. massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
  169. massgen/coordination_tracker.py +708 -0
  170. massgen/docker/README.md +462 -0
  171. massgen/filesystem_manager/__init__.py +21 -0
  172. massgen/filesystem_manager/_base.py +9 -0
  173. massgen/filesystem_manager/_code_execution_server.py +545 -0
  174. massgen/filesystem_manager/_docker_manager.py +477 -0
  175. massgen/filesystem_manager/_file_operation_tracker.py +248 -0
  176. massgen/filesystem_manager/_filesystem_manager.py +813 -0
  177. massgen/filesystem_manager/_path_permission_manager.py +1261 -0
  178. massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
  179. massgen/formatter/__init__.py +10 -0
  180. massgen/formatter/_chat_completions_formatter.py +284 -0
  181. massgen/formatter/_claude_formatter.py +235 -0
  182. massgen/formatter/_formatter_base.py +156 -0
  183. massgen/formatter/_response_formatter.py +263 -0
  184. massgen/frontend/__init__.py +1 -2
  185. massgen/frontend/coordination_ui.py +471 -286
  186. massgen/frontend/displays/base_display.py +56 -11
  187. massgen/frontend/displays/create_coordination_table.py +1956 -0
  188. massgen/frontend/displays/rich_terminal_display.py +1259 -619
  189. massgen/frontend/displays/simple_display.py +9 -4
  190. massgen/frontend/displays/terminal_display.py +27 -68
  191. massgen/logger_config.py +681 -0
  192. massgen/mcp_tools/README.md +232 -0
  193. massgen/mcp_tools/__init__.py +105 -0
  194. massgen/mcp_tools/backend_utils.py +1035 -0
  195. massgen/mcp_tools/circuit_breaker.py +195 -0
  196. massgen/mcp_tools/client.py +894 -0
  197. massgen/mcp_tools/config_validator.py +138 -0
  198. massgen/mcp_tools/docs/circuit_breaker.md +646 -0
  199. massgen/mcp_tools/docs/client.md +950 -0
  200. massgen/mcp_tools/docs/config_validator.md +478 -0
  201. massgen/mcp_tools/docs/exceptions.md +1165 -0
  202. massgen/mcp_tools/docs/security.md +854 -0
  203. massgen/mcp_tools/exceptions.py +338 -0
  204. massgen/mcp_tools/hooks.py +212 -0
  205. massgen/mcp_tools/security.py +780 -0
  206. massgen/message_templates.py +342 -64
  207. massgen/orchestrator.py +1515 -241
  208. massgen/stream_chunk/__init__.py +35 -0
  209. massgen/stream_chunk/base.py +92 -0
  210. massgen/stream_chunk/multimodal.py +237 -0
  211. massgen/stream_chunk/text.py +162 -0
  212. massgen/tests/mcp_test_server.py +150 -0
  213. massgen/tests/multi_turn_conversation_design.md +0 -8
  214. massgen/tests/test_azure_openai_backend.py +156 -0
  215. massgen/tests/test_backend_capabilities.py +262 -0
  216. massgen/tests/test_backend_event_loop_all.py +179 -0
  217. massgen/tests/test_chat_completions_refactor.py +142 -0
  218. massgen/tests/test_claude_backend.py +15 -28
  219. massgen/tests/test_claude_code.py +268 -0
  220. massgen/tests/test_claude_code_context_sharing.py +233 -0
  221. massgen/tests/test_claude_code_orchestrator.py +175 -0
  222. massgen/tests/test_cli_backends.py +180 -0
  223. massgen/tests/test_code_execution.py +679 -0
  224. massgen/tests/test_external_agent_backend.py +134 -0
  225. massgen/tests/test_final_presentation_fallback.py +237 -0
  226. massgen/tests/test_gemini_planning_mode.py +351 -0
  227. massgen/tests/test_grok_backend.py +7 -10
  228. massgen/tests/test_http_mcp_server.py +42 -0
  229. massgen/tests/test_integration_simple.py +198 -0
  230. massgen/tests/test_mcp_blocking.py +125 -0
  231. massgen/tests/test_message_context_building.py +29 -47
  232. massgen/tests/test_orchestrator_final_presentation.py +48 -0
  233. massgen/tests/test_path_permission_manager.py +2087 -0
  234. massgen/tests/test_rich_terminal_display.py +14 -13
  235. massgen/tests/test_timeout.py +133 -0
  236. massgen/tests/test_v3_3agents.py +11 -12
  237. massgen/tests/test_v3_simple.py +8 -13
  238. massgen/tests/test_v3_three_agents.py +11 -18
  239. massgen/tests/test_v3_two_agents.py +8 -13
  240. massgen/token_manager/__init__.py +7 -0
  241. massgen/token_manager/token_manager.py +400 -0
  242. massgen/utils.py +52 -16
  243. massgen/v1/agent.py +45 -91
  244. massgen/v1/agents.py +18 -53
  245. massgen/v1/backends/gemini.py +50 -153
  246. massgen/v1/backends/grok.py +21 -54
  247. massgen/v1/backends/oai.py +39 -111
  248. massgen/v1/cli.py +36 -93
  249. massgen/v1/config.py +8 -12
  250. massgen/v1/logging.py +43 -127
  251. massgen/v1/main.py +18 -32
  252. massgen/v1/orchestrator.py +68 -209
  253. massgen/v1/streaming_display.py +62 -163
  254. massgen/v1/tools.py +8 -12
  255. massgen/v1/types.py +9 -23
  256. massgen/v1/utils.py +5 -23
  257. massgen-0.1.0.dist-info/METADATA +1245 -0
  258. massgen-0.1.0.dist-info/RECORD +273 -0
  259. massgen-0.1.0.dist-info/entry_points.txt +2 -0
  260. massgen/frontend/logging/__init__.py +0 -9
  261. massgen/frontend/logging/realtime_logger.py +0 -197
  262. massgen-0.0.3.dist-info/METADATA +0 -568
  263. massgen-0.0.3.dist-info/RECORD +0 -76
  264. massgen-0.0.3.dist-info/entry_points.txt +0 -2
  265. /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
  266. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
  267. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
  268. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1203 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Claude Code Stream Backend - Streaming interface using claude-code-sdk-python.
4
+
5
+ This backend provides integration with Claude Code through the
6
+ claude-code-sdk-python, leveraging Claude Code's server-side session
7
+ persistence and tool execution capabilities.
8
+
9
+ Key Features:
10
+ - ✅ Native Claude Code streaming integration
11
+ - ✅ Server-side session persistence (no client-side session
12
+ management needed)
13
+ - ✅ Built-in tool execution (Read, Write, Bash, WebSearch, etc.)
14
+ - ✅ MassGen workflow tool integration (new_answer, vote via system prompts)
15
+ - ✅ Single persistent client with automatic session ID tracking
16
+ - ✅ Cost tracking from server-side usage data
17
+ - ✅ Docker execution mode: Bash tool disabled, execute_command MCP used instead
18
+
19
+ Architecture:
20
+ - Uses ClaudeSDKClient with minimal functionality overlay
21
+ - Claude Code server maintains conversation history
22
+ - Extracts session IDs from ResultMessage responses
23
+ - Injects MassGen workflow tools via system prompts
24
+ - Converts claude-code-sdk Messages to MassGen StreamChunks
25
+
26
+ Requirements:
27
+ - claude-code-sdk-python installed: uv add claude-code-sdk
28
+ - Claude Code CLI available in PATH
29
+ - ANTHROPIC_API_KEY configured OR Claude subscription authentication
30
+
31
+ Test Results:
32
+ ✅ TESTED 2025-08-10: Single agent coordination working correctly
33
+ - Command: uv run python -m massgen.cli --config claude_code_single.yaml "2+2=?"
34
+ - Auto-created working directory: claude_code_workspace/
35
+ - Session: 42593707-bca6-40ad-b154-7dc1c222d319
36
+ - Model: claude-sonnet-4-20250514 (Claude Code default)
37
+ - Tools available: Task, Bash, Glob, Grep, LS, Read, Write, WebSearch, etc.
38
+ - Answer provided: "2 + 2 = 4"
39
+ - Coordination: Agent voted for itself, selected as final answer
40
+ - Performance: 70 seconds total (includes coordination overhead)
41
+
42
+ TODO:
43
+ - Consider including cwd/session_id in new_answer results for context preservation
44
+ - Investigate whether next iterations need working directory context
45
+ """
46
+
47
+ from __future__ import annotations
48
+
49
+ import atexit
50
+ import json
51
+ import os
52
+ import re
53
+ import sys
54
+ import uuid
55
+ import warnings
56
+ from pathlib import Path
57
+ from typing import Any, AsyncGenerator, Dict, List, Optional
58
+
59
+ from claude_agent_sdk import ( # type: ignore
60
+ AssistantMessage,
61
+ ClaudeAgentOptions,
62
+ ClaudeSDKClient,
63
+ PermissionResultAllow,
64
+ ResultMessage,
65
+ SystemMessage,
66
+ TextBlock,
67
+ ToolResultBlock,
68
+ ToolUseBlock,
69
+ UserMessage,
70
+ )
71
+
72
+ from ..logger_config import (
73
+ log_backend_activity,
74
+ log_backend_agent_message,
75
+ log_stream_chunk,
76
+ )
77
+ from .base import FilesystemSupport, LLMBackend, StreamChunk
78
+
79
+
80
+ class ClaudeCodeBackend(LLMBackend):
81
+ """Claude Code backend using claude-code-sdk-python.
82
+
83
+ Provides streaming interface to Claude Code with built-in tool execution
84
+ capabilities and MassGen workflow tool integration. Uses ClaudeSDKClient
85
+ for direct communication with Claude Code server.
86
+
87
+ TODO (v0.0.14 Context Sharing Enhancement - See docs/dev_notes/v0.0.14-context.md):
88
+ - Implement permission enforcement during file/workspace operations
89
+ - Add execute_with_permissions() method to check permissions before operations
90
+ - Integrate with PermissionManager for access control validation
91
+ - Add audit logging for all file system access attempts
92
+ - Enforce workspace boundaries based on agent permissions
93
+ - Prevent unauthorized access to other agents' workspaces
94
+ - Support permission-aware tool execution (Read, Write, Bash, etc.)
95
+ """
96
+
97
+ def __init__(self, api_key: Optional[str] = None, **kwargs):
98
+ """Initialize ClaudeCodeBackend.
99
+
100
+ Args:
101
+ api_key: Anthropic API key (falls back to ANTHROPIC_API_KEY env
102
+ var). If None, will attempt to use Claude subscription
103
+ authentication
104
+ **kwargs: Additional configuration options including:
105
+ - model: Claude model name
106
+ - system_prompt: Base system prompt
107
+ - allowed_tools: List of allowed tools
108
+ - max_thinking_tokens: Maximum thinking tokens
109
+ - cwd: Current working directory
110
+
111
+ Note:
112
+ Authentication is validated on first use. If neither API key nor
113
+ subscription authentication is available, errors will surface when
114
+ attempting to use the backend.
115
+ """
116
+ super().__init__(api_key, **kwargs)
117
+
118
+ self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
119
+ self.use_subscription_auth = not bool(self.api_key)
120
+
121
+ # Set API key in environment for SDK if provided
122
+ if self.api_key:
123
+ os.environ["ANTHROPIC_API_KEY"] = self.api_key
124
+
125
+ # Set git-bash path for Windows compatibility
126
+ if sys.platform == "win32" and not os.environ.get("CLAUDE_CODE_GIT_BASH_PATH"):
127
+ import shutil
128
+
129
+ bash_path = shutil.which("bash")
130
+ if bash_path:
131
+ os.environ["CLAUDE_CODE_GIT_BASH_PATH"] = bash_path
132
+ print(f"[ClaudeCodeBackend] Set CLAUDE_CODE_GIT_BASH_PATH={bash_path}")
133
+
134
+ # Comprehensive Windows subprocess cleanup warning suppression
135
+ if sys.platform == "win32":
136
+ self._setup_windows_subprocess_cleanup_suppression()
137
+
138
+ # Single ClaudeSDKClient for this backend instance
139
+ self._client: Optional[Any] = None # ClaudeSDKClient
140
+ self._current_session_id: Optional[str] = None
141
+
142
+ # Get workspace paths from filesystem manager (required for Claude Code)
143
+ # The filesystem manager handles all workspace setup and management
144
+ if not self.filesystem_manager:
145
+ raise ValueError("Claude Code backend requires 'cwd' configuration for workspace management")
146
+
147
+ self._cwd: str = str(Path(str(self.filesystem_manager.get_current_workspace())).resolve())
148
+
149
+ self._pending_system_prompt: Optional[str] = None # Windows-only workaround
150
+
151
+ def _setup_windows_subprocess_cleanup_suppression(self):
152
+ """Comprehensive Windows subprocess cleanup warning suppression."""
153
+ # All warning filters
154
+ warnings.filterwarnings("ignore", message="unclosed transport")
155
+ warnings.filterwarnings("ignore", message="I/O operation on closed pipe")
156
+ warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed transport")
157
+ warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed event loop")
158
+ warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed <socket.socket")
159
+ warnings.filterwarnings("ignore", category=RuntimeWarning, message="coroutine")
160
+ warnings.filterwarnings("ignore", message="Exception ignored in")
161
+ warnings.filterwarnings("ignore", message="sys:1: ResourceWarning")
162
+ warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*transport.*")
163
+ warnings.filterwarnings("ignore", message=".*BaseSubprocessTransport.*")
164
+ warnings.filterwarnings("ignore", message=".*_ProactorBasePipeTransport.*")
165
+ warnings.filterwarnings("ignore", message=".*Event loop is closed.*")
166
+
167
+ # Patch asyncio transport destructors to be silent
168
+ try:
169
+ import asyncio.base_subprocess
170
+ import asyncio.proactor_events
171
+
172
+ # Store originals
173
+ original_subprocess_del = getattr(asyncio.base_subprocess.BaseSubprocessTransport, "__del__", None)
174
+ original_pipe_del = getattr(asyncio.proactor_events._ProactorBasePipeTransport, "__del__", None)
175
+
176
+ def silent_subprocess_del(self):
177
+ try:
178
+ with warnings.catch_warnings():
179
+ warnings.simplefilter("ignore")
180
+ if original_subprocess_del:
181
+ original_subprocess_del(self)
182
+ except Exception:
183
+ pass
184
+
185
+ def silent_pipe_del(self):
186
+ try:
187
+ with warnings.catch_warnings():
188
+ warnings.simplefilter("ignore")
189
+ if original_pipe_del:
190
+ original_pipe_del(self)
191
+ except Exception:
192
+ pass
193
+
194
+ # Apply patches
195
+ if original_subprocess_del:
196
+ asyncio.base_subprocess.BaseSubprocessTransport.__del__ = silent_subprocess_del
197
+ if original_pipe_del:
198
+ asyncio.proactor_events._ProactorBasePipeTransport.__del__ = silent_pipe_del
199
+ except Exception:
200
+ pass # If patching fails, fall back to warning filters only
201
+
202
+ # Setup exit handler for stderr suppression
203
+ original_stderr = sys.stderr
204
+
205
+ def suppress_exit_warnings():
206
+ try:
207
+ sys.stderr = open(os.devnull, "w")
208
+ import time
209
+
210
+ time.sleep(0.3)
211
+ except Exception:
212
+ pass
213
+ finally:
214
+ try:
215
+ if sys.stderr != original_stderr:
216
+ sys.stderr.close()
217
+ sys.stderr = original_stderr
218
+ except Exception:
219
+ pass
220
+
221
+ atexit.register(suppress_exit_warnings)
222
+
223
+ def get_provider_name(self) -> str:
224
+ """Get the name of this provider."""
225
+ return "claude_code"
226
+
227
+ def get_filesystem_support(self) -> FilesystemSupport:
228
+ """Claude Code has native filesystem support."""
229
+ return FilesystemSupport.NATIVE
230
+
231
+ def is_stateful(self) -> bool:
232
+ """
233
+ Claude Code backend is stateful - maintains conversation context.
234
+
235
+ Returns:
236
+ True - Claude Code maintains server-side session state
237
+ """
238
+ return True
239
+
240
+ async def clear_history(self) -> None:
241
+ """
242
+ Clear Claude Code conversation history while preserving session.
243
+
244
+ Uses the /clear slash command to clear conversation history without
245
+ destroying the session, working directory, or other session state.
246
+ """
247
+ if self._client is None:
248
+ # No active session to clear
249
+ return
250
+
251
+ try:
252
+ # Send /clear command to clear history while preserving session
253
+ await self._client.query("/clear")
254
+
255
+ # The /clear command should preserve:
256
+ # - Session ID
257
+ # - Working directory
258
+ # - Tool availability
259
+ # - Permission settings
260
+ # While clearing only the conversation history
261
+
262
+ except Exception as e:
263
+ # Fallback to full reset if /clear command fails
264
+ print(f"Warning: /clear command failed ({e}), falling back to full reset")
265
+ await self.reset_state()
266
+
267
+ async def reset_state(self) -> None:
268
+ """
269
+ Reset Claude Code backend state.
270
+
271
+ Properly disconnects and clears the current session and client connection to start fresh.
272
+ """
273
+ if self._client is not None:
274
+ try:
275
+ await self._client.disconnect()
276
+ except Exception:
277
+ pass # Ignore cleanup errors
278
+ self._client = None
279
+ self._current_session_id = None
280
+
281
+ def update_token_usage_from_result_message(self, result_message) -> None:
282
+ """Update token usage from Claude Code ResultMessage.
283
+
284
+ Extracts actual token usage and cost data from Claude Code server
285
+ response. This is more accurate than estimation-based methods.
286
+
287
+ Args:
288
+ result_message: ResultMessage from Claude Code with usage data
289
+ """
290
+ # Check if we have a valid ResultMessage
291
+ if ResultMessage is not None and not isinstance(result_message, ResultMessage):
292
+ return
293
+ # Fallback: check if it has the expected attributes (for SDK compatibility)
294
+ if not hasattr(result_message, "usage") or not hasattr(result_message, "total_cost_usd"):
295
+ return
296
+
297
+ # Extract usage information from ResultMessage
298
+ if result_message.usage:
299
+ usage_data = result_message.usage
300
+
301
+ # Claude Code provides actual token counts
302
+ input_tokens = usage_data.get("input_tokens", 0)
303
+ output_tokens = usage_data.get("output_tokens", 0)
304
+
305
+ # Update cumulative tracking
306
+ self.token_usage.input_tokens += input_tokens
307
+ self.token_usage.output_tokens += output_tokens
308
+
309
+ # Use actual cost from Claude Code (preferred over calculation)
310
+ if result_message.total_cost_usd is not None:
311
+ self.token_usage.estimated_cost += result_message.total_cost_usd
312
+ else:
313
+ # Fallback: calculate cost if not provided
314
+ input_tokens = result_message.usage.get("input_tokens", 0) if result_message.usage else 0
315
+ output_tokens = result_message.usage.get("output_tokens", 0) if result_message.usage else 0
316
+ cost = self.calculate_cost(input_tokens, output_tokens, "", result_message)
317
+ self.token_usage.estimated_cost += cost
318
+
319
+ def update_token_usage(self, messages: List[Dict[str, Any]], response_content: str, model: str):
320
+ """Update token usage tracking (fallback method).
321
+
322
+ Only used when no ResultMessage available. Provides estimated token
323
+ tracking for compatibility with base class interface. Should only be
324
+ called when ResultMessage data is not available.
325
+
326
+ Args:
327
+ messages: List of conversation messages
328
+ response_content: Generated response content
329
+ model: Model name for cost calculation
330
+ """
331
+ # This method should only be called when we don't have a
332
+ # ResultMessage. It provides estimated tracking for compatibility
333
+ # with base class interface
334
+
335
+ # Estimate input tokens from messages
336
+ input_text = "\n".join([msg.get("content", "") for msg in messages])
337
+ input_tokens = self.estimate_tokens(input_text)
338
+
339
+ # Estimate output tokens from response
340
+ output_tokens = self.estimate_tokens(response_content)
341
+
342
+ # Update totals
343
+ self.token_usage.input_tokens += input_tokens
344
+ self.token_usage.output_tokens += output_tokens
345
+
346
+ # Calculate estimated cost (no ResultMessage available)
347
+ cost = self.calculate_cost(input_tokens, output_tokens, model, result_message=None)
348
+ self.token_usage.estimated_cost += cost
349
+
350
+ def get_supported_builtin_tools(self) -> List[str]:
351
+ """Get list of builtin tools supported by Claude Code.
352
+
353
+ Returns maximum tool set available, with security enforced through
354
+ disallowed_tools. Dangerous operations are blocked at the tool
355
+ level, not by restricting tool access.
356
+
357
+ Returns:
358
+ List of all tool names that Claude Code provides natively
359
+ """
360
+ return [
361
+ "Read",
362
+ "Write",
363
+ "Edit",
364
+ "MultiEdit",
365
+ "Bash",
366
+ "Grep",
367
+ "Glob",
368
+ "LS",
369
+ "WebSearch",
370
+ "WebFetch",
371
+ "Task",
372
+ "TodoWrite",
373
+ "NotebookEdit",
374
+ "NotebookRead",
375
+ "mcp__ide__getDiagnostics",
376
+ "mcp__ide__executeCode",
377
+ "ExitPlanMode",
378
+ ]
379
+
380
+ def get_current_session_id(self) -> Optional[str]:
381
+ """Get current session ID from server-side session management.
382
+
383
+ Returns:
384
+ Current session ID if available, None otherwise
385
+ """
386
+ return self._current_session_id
387
+
388
+ # TODO (v0.0.14 Context Sharing Enhancement - See docs/dev_notes/v0.0.14-context.md):
389
+ # Add permission enforcement methods:
390
+ # def execute_with_permissions(self, operation, path):
391
+ # """Execute operation only if permissions allow.
392
+ #
393
+ # Args:
394
+ # operation: The operation to execute (e.g., tool call)
395
+ # path: The file/directory path being accessed
396
+ #
397
+ # Raises:
398
+ # PermissionError: If agent lacks required access
399
+ # """
400
+ # if not self.check_permission(path, operation.type):
401
+ # raise PermissionError(f"Agent {self.agent_id} lacks {operation.type} access to {path}")
402
+ #
403
+ # def check_permission(self, path: str, access_type: str) -> bool:
404
+ # """Check if current agent has permission for path access."""
405
+ # # Will integrate with PermissionManager
406
+ # pass
407
+
408
+ def _build_system_prompt_with_workflow_tools(self, tools: List[Dict[str, Any]], base_system: Optional[str] = None) -> str:
409
+ """Build system prompt that includes workflow tools information.
410
+
411
+ Creates comprehensive system prompt that instructs Claude on tool
412
+ usage, particularly for MassGen workflow coordination tools.
413
+
414
+ Args:
415
+ tools: List of available tools
416
+ base_system: Base system prompt to extend (optional)
417
+
418
+ Returns:
419
+ Complete system prompt with tool instructions
420
+ """
421
+ system_parts = []
422
+
423
+ # Start with base system prompt
424
+ if base_system:
425
+ system_parts.append(base_system)
426
+
427
+ # Add docker mode instruction if enabled
428
+ command_line_execution_mode = self.config.get("command_line_execution_mode", "local")
429
+ if command_line_execution_mode == "docker":
430
+ system_parts.append("\n--- Code Execution Environment ---")
431
+ system_parts.append("- Use the execute_command MCP tool for all command execution")
432
+ system_parts.append("- The Bash tool is disabled in this mode")
433
+ # Below is necessary bc Claude Code is automatically loaded with knowledge of the current git repo;
434
+ # this prompt is a temporary workaround before running fully within docker
435
+ system_parts.append(
436
+ "- Do NOT use any git repository information you may see as part of a broader directory. "
437
+ "All git information must come from the execute_command tool and be focused solely on the "
438
+ "directories you were told to work in, not any parent directories.",
439
+ )
440
+
441
+ # Add workflow tools information if present
442
+ if tools:
443
+ workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]]
444
+ if workflow_tools:
445
+ system_parts.append("\n--- Coordination Actions ---")
446
+ for tool in workflow_tools:
447
+ name = tool.get("function", {}).get("name", "unknown")
448
+ description = tool.get("function", {}).get("description", "No description")
449
+ system_parts.append(f"- {name}: {description}")
450
+
451
+ # Add usage examples for workflow tools
452
+ if name == "new_answer":
453
+ system_parts.append(
454
+ ' Usage: {"tool_name": "new_answer", ' '"arguments": {"content": "your improved answer. If any builtin tools were used, mention how they are used here."}}',
455
+ )
456
+ elif name == "vote":
457
+ # Extract valid agent IDs from enum if available
458
+ agent_id_enum = None
459
+ for t in tools:
460
+ if t.get("function", {}).get("name") == "vote":
461
+ agent_id_param = t.get("function", {}).get("parameters", {}).get("properties", {}).get("agent_id", {})
462
+ if "enum" in agent_id_param:
463
+ agent_id_enum = agent_id_param["enum"]
464
+ break
465
+
466
+ if agent_id_enum:
467
+ agent_list = ", ".join(agent_id_enum)
468
+ system_parts.append(f' Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
469
+ else:
470
+ system_parts.append(' Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
471
+
472
+ system_parts.append("\n--- MassGen Coordination Instructions ---")
473
+ system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")
474
+ # system_parts.append(
475
+ # "You must use the coordination tools (new_answer, vote) "
476
+ # "to participate in multi-agent workflows."
477
+ # )
478
+ # system_parts.append(
479
+ # "Make sure to include the JSON in the exact format shown in the usage examples above.")
480
+ system_parts.append("The JSON MUST be formatted as a strict JSON code block:")
481
+ system_parts.append("1. Start with ```json on one line")
482
+ system_parts.append("2. Include your JSON content (properly formatted)")
483
+ system_parts.append("3. End with ``` on one line")
484
+ system_parts.append('Example format:\n```json\n{"tool_name": "vote", "arguments": {"agent_id": "agent1", "reason": "explanation"}}\n```')
485
+ system_parts.append("The JSON block should be placed at the very end of your response, after your analysis.")
486
+
487
+ return "\n".join(system_parts)
488
+
489
+ async def _log_backend_input(self, messages, system_prompt, tools, kwargs):
490
+ """Log backend inputs using StreamChunk for visibility (enabled by default)."""
491
+ # Enable by default, but allow disabling via environment variable
492
+ if os.getenv("MASSGEN_LOG_BACKENDS", "1") == "0":
493
+ return
494
+
495
+ try:
496
+ # Create debug info using the logging approach that works in MassGen
497
+ reset_mode = "🔄 RESET" if kwargs.get("reset_chat") else "💬 CONTINUE"
498
+ tools_info = f"🔧 {len(tools)} tools" if tools else "🚫 No tools"
499
+
500
+ debug_info = f"[BACKEND] {reset_mode} | {tools_info} | Session: {self._current_session_id}"
501
+
502
+ if system_prompt and len(system_prompt) > 0:
503
+ # Show full system prompt in debug logging
504
+ debug_info += f"\n[SYSTEM_FULL] {system_prompt}"
505
+
506
+ # Yield a debug chunk that will be captured by the logging system
507
+ yield StreamChunk(type="debug", content=debug_info, source="claude_code_backend")
508
+
509
+ except Exception as e:
510
+ # Log the error but don't break backend execution
511
+ yield StreamChunk(
512
+ type="debug",
513
+ content=f"[BACKEND_LOG_ERROR] {str(e)}",
514
+ source="claude_code_backend",
515
+ )
516
+
517
+ def extract_structured_response(self, response_text: str) -> Optional[Dict[str, Any]]:
518
+ """Extract structured JSON response for Claude Code format.
519
+
520
+ Looks for JSON in the format:
521
+ {"tool_name": "vote/new_answer", "arguments": {...}}
522
+
523
+ Args:
524
+ response_text: The full response text to search
525
+
526
+ Returns:
527
+ Extracted JSON dict if found, None otherwise
528
+ """
529
+ try:
530
+ import re
531
+
532
+ # Strategy 0: Look for JSON inside markdown code blocks first
533
+ markdown_json_pattern = r"```json\s*(\{.*?\})\s*```"
534
+ markdown_matches = re.findall(markdown_json_pattern, response_text, re.DOTALL)
535
+
536
+ for match in reversed(markdown_matches):
537
+ try:
538
+ parsed = json.loads(match.strip())
539
+ if isinstance(parsed, dict) and "tool_name" in parsed:
540
+ return parsed
541
+ except json.JSONDecodeError:
542
+ continue
543
+
544
+ # Strategy 1: Look for complete JSON blocks with proper braces
545
+ json_pattern = r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}"
546
+ json_matches = re.findall(json_pattern, response_text, re.DOTALL)
547
+
548
+ # Try parsing each match (in reverse order - last one first)
549
+ for match in reversed(json_matches):
550
+ try:
551
+ cleaned_match = match.strip()
552
+ parsed = json.loads(cleaned_match)
553
+ if isinstance(parsed, dict) and "tool_name" in parsed:
554
+ return parsed
555
+ except json.JSONDecodeError:
556
+ continue
557
+
558
+ # Strategy 2: Look for JSON blocks with nested braces (more complex)
559
+ brace_count = 0
560
+ json_start = -1
561
+
562
+ for i, char in enumerate(response_text):
563
+ if char == "{":
564
+ if brace_count == 0:
565
+ json_start = i
566
+ brace_count += 1
567
+ elif char == "}":
568
+ brace_count -= 1
569
+ if brace_count == 0 and json_start >= 0:
570
+ # Found a complete JSON block
571
+ json_block = response_text[json_start : i + 1]
572
+ try:
573
+ parsed = json.loads(json_block)
574
+ if isinstance(parsed, dict) and "tool_name" in parsed:
575
+ return parsed
576
+ except json.JSONDecodeError:
577
+ pass
578
+ json_start = -1
579
+
580
+ # Strategy 3: Line-by-line approach (fallback)
581
+ lines = response_text.strip().split("\n")
582
+ json_candidates = []
583
+
584
+ for i, line in enumerate(lines):
585
+ stripped = line.strip()
586
+ if stripped.startswith("{") and stripped.endswith("}"):
587
+ json_candidates.append(stripped)
588
+ elif stripped.startswith("{"):
589
+ # Multi-line JSON - collect until closing brace
590
+ json_text = stripped
591
+ for j in range(i + 1, len(lines)):
592
+ json_text += "\n" + lines[j].strip()
593
+ if lines[j].strip().endswith("}"):
594
+ json_candidates.append(json_text)
595
+ break
596
+
597
+ # Try to parse each candidate
598
+ for candidate in reversed(json_candidates):
599
+ try:
600
+ parsed = json.loads(candidate)
601
+ if isinstance(parsed, dict) and "tool_name" in parsed:
602
+ return parsed
603
+ except json.JSONDecodeError:
604
+ continue
605
+
606
+ return None
607
+
608
+ except Exception:
609
+ return None
610
+
611
+ def _parse_workflow_tool_calls(self, text_content: str) -> List[Dict[str, Any]]:
612
+ """Parse workflow tool calls from text content.
613
+
614
+ Searches for JSON-formatted tool calls in the response text and
615
+ converts them to the standard tool call format used by MassGen.
616
+ Uses the extract_structured_response method for robust JSON extraction.
617
+
618
+ Args:
619
+ text_content: Response text to search for tool calls
620
+
621
+ Returns:
622
+ List of unique tool call dictionaries in standard format
623
+ """
624
+ tool_calls = []
625
+
626
+ # First try to extract structured JSON response
627
+ structured_response = self.extract_structured_response(text_content)
628
+
629
+ if structured_response and isinstance(structured_response, dict):
630
+ tool_name = structured_response.get("tool_name")
631
+ arguments = structured_response.get("arguments", {})
632
+
633
+ if tool_name and isinstance(arguments, dict):
634
+ tool_calls.append(
635
+ {
636
+ "id": f"call_{uuid.uuid4().hex[:8]}",
637
+ "type": "function",
638
+ "function": {"name": tool_name, "arguments": arguments},
639
+ },
640
+ )
641
+ return tool_calls
642
+
643
+ # Fallback: Look for multiple JSON tool calls using regex patterns
644
+ seen_calls = set() # Track unique tool calls to prevent duplicates
645
+
646
+ # Look for JSON tool call patterns
647
+ json_patterns = [
648
+ r'\{"tool_name":\s*"([^"]+)",\s*"arguments":\s*' r"(\{[^}]*\})\}",
649
+ r'\{\s*"tool_name"\s*:\s*"([^"]+)"\s*,\s*"arguments"' r"\s*:\s*(\{[^}]*\})\s*\}",
650
+ ]
651
+
652
+ for pattern in json_patterns:
653
+ matches = re.finditer(pattern, text_content, re.IGNORECASE)
654
+ for match in matches:
655
+ tool_name = match.group(1)
656
+ try:
657
+ arguments = json.loads(match.group(2))
658
+
659
+ # Create a unique identifier for this tool call
660
+ # Based on tool name and arguments content
661
+ call_signature = (tool_name, json.dumps(arguments, sort_keys=True))
662
+
663
+ # Only add if we haven't seen this exact call before
664
+ if call_signature not in seen_calls:
665
+ seen_calls.add(call_signature)
666
+ tool_calls.append(
667
+ {
668
+ "id": f"call_{uuid.uuid4().hex[:8]}",
669
+ "type": "function",
670
+ "function": {"name": tool_name, "arguments": arguments},
671
+ },
672
+ )
673
+ except json.JSONDecodeError:
674
+ continue
675
+
676
+ return tool_calls
677
+
678
+ def _build_claude_options(self, **options_kwargs) -> ClaudeAgentOptions:
679
+ """Build ClaudeAgentOptions with provided parameters.
680
+
681
+ Creates a secure configuration that allows ALL Claude Code tools while
682
+ explicitly disallowing dangerous operations. This gives Claude Code
683
+ maximum power while maintaining security.
684
+
685
+ Important: Sets the Claude Code preset as the default system prompt to maintain
686
+ v0.0.x behavior. In claude-agent-sdk v0.1.0+, system prompts default to empty,
687
+ so we explicitly request the claude_code preset.
688
+
689
+ When command_line_execution_mode is set to "docker", the Bash tool is disabled
690
+ since execute_command provides all necessary command execution capabilities.
691
+
692
+ Returns:
693
+ ClaudeAgentOptions configured with provided parameters and
694
+ security restrictions
695
+ """
696
+ options_kwargs.get("cwd", os.getcwd())
697
+ permission_mode = options_kwargs.get("permission_mode", "acceptEdits")
698
+ allowed_tools = options_kwargs.get("allowed_tools", self.get_supported_builtin_tools())
699
+
700
+ # Filter out parameters handled separately or not for ClaudeAgentOptions
701
+ excluded_params = self.get_base_excluded_config_params() | {
702
+ # Claude Code specific exclusions
703
+ "api_key",
704
+ "allowed_tools",
705
+ "permission_mode",
706
+ }
707
+
708
+ # Get cwd from filesystem manager (always available since we require it in __init__)
709
+ cwd_option = Path(str(self.filesystem_manager.get_current_workspace())).resolve()
710
+ self._cwd = str(cwd_option)
711
+
712
+ # Get hooks configuration from filesystem manager
713
+ hooks_config = self.filesystem_manager.get_claude_code_hooks_config()
714
+
715
+ # Convert mcp_servers from list format to dict format for ClaudeAgentOptions
716
+ # List format: [{"name": "server1", "type": "stdio", ...}, ...]
717
+ # Dict format: {"server1": {"type": "stdio", ...}, ...}
718
+ mcp_servers_dict = {}
719
+ if "mcp_servers" in options_kwargs:
720
+ mcp_servers = options_kwargs["mcp_servers"]
721
+ if isinstance(mcp_servers, list):
722
+ for server in mcp_servers:
723
+ if isinstance(server, dict) and "name" in server:
724
+ # Create a copy and remove "name" key
725
+ server_config = {k: v for k, v in server.items() if k != "name"}
726
+ mcp_servers_dict[server["name"]] = server_config
727
+ elif isinstance(mcp_servers, dict):
728
+ # Already in dict format
729
+ mcp_servers_dict = mcp_servers
730
+
731
+ options = {
732
+ "cwd": cwd_option,
733
+ "resume": self.get_current_session_id(),
734
+ "permission_mode": permission_mode,
735
+ "allowed_tools": allowed_tools,
736
+ **{k: v for k, v in options_kwargs.items() if k not in excluded_params},
737
+ }
738
+
739
+ # Add converted mcp_servers if present
740
+ if mcp_servers_dict:
741
+ options["mcp_servers"] = mcp_servers_dict
742
+
743
+ # Set Claude Code preset as default system prompt (migration from v0.0.x to v0.1.0+)
744
+ # This ensures we get Claude Code's default behavior instead of empty system prompt
745
+ if "system_prompt" not in options:
746
+ options["system_prompt"] = {"type": "preset", "preset": "claude_code"}
747
+
748
+ # Add hooks if available
749
+ if hooks_config:
750
+ options["hooks"] = hooks_config
751
+
752
+ # Add can_use_tool hook to auto-grant MCP tools
753
+ async def can_use_tool(tool_name: str, tool_args: dict, context):
754
+ """Auto-grant permissions for MCP tools."""
755
+ # Auto-approve all MCP tools (they start with mcp__)
756
+ if tool_name.startswith("mcp__"):
757
+ return PermissionResultAllow(updated_input=tool_args)
758
+ # For non-MCP tools, use default permission behavior
759
+ # Return None to use default permission mode
760
+ return None
761
+
762
+ options["can_use_tool"] = can_use_tool
763
+
764
+ return ClaudeAgentOptions(**options)
765
+
766
+ def create_client(self, **options_kwargs) -> ClaudeSDKClient:
767
+ """Create ClaudeSDKClient with configurable parameters.
768
+
769
+ Args:
770
+ **options_kwargs: ClaudeAgentOptions parameters
771
+
772
+ Returns:
773
+ ClaudeSDKClient instance
774
+ """
775
+
776
+ # Build options with all parameters
777
+ options = self._build_claude_options(**options_kwargs)
778
+
779
+ # Create ClaudeSDKClient with configured options
780
+ self._client = ClaudeSDKClient(options)
781
+ return self._client
782
+
783
+ async def stream_with_tools(self, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]], **kwargs) -> AsyncGenerator[StreamChunk, None]:
784
+ """
785
+ Stream a response with tool calling support using claude-code-sdk.
786
+
787
+ Properly handle messages and tools context for Claude Code.
788
+
789
+ Args:
790
+ messages: List of conversation messages
791
+ tools: List of available tools (includes workflow tools)
792
+ **kwargs: Additional options for client configuration
793
+
794
+ Yields:
795
+ StreamChunk objects with response content and metadata
796
+ """
797
+ # Extract agent_id from kwargs if provided
798
+ agent_id = kwargs.get("agent_id", None)
799
+
800
+ log_backend_activity(
801
+ self.get_provider_name(),
802
+ "Starting stream_with_tools",
803
+ {"num_messages": len(messages), "num_tools": len(tools) if tools else 0},
804
+ agent_id=agent_id,
805
+ )
806
+ # Merge constructor config with stream kwargs (stream kwargs take priority)
807
+ all_params = {**self.config, **kwargs}
808
+ # Check if we already have a client
809
+ if self._client is not None:
810
+ client = self._client
811
+ else:
812
+ # Set default disallowed_tools if not provided
813
+ if "disallowed_tools" not in all_params:
814
+ all_params["disallowed_tools"] = [
815
+ "Bash(rm*)",
816
+ "Bash(sudo*)",
817
+ "Bash(su*)",
818
+ "Bash(chmod*)",
819
+ "Bash(chown*)",
820
+ ]
821
+
822
+ # Disable Bash tool entirely when docker mode is enabled
823
+ # In docker mode, execute_command MCP tool provides all command execution
824
+ command_line_execution_mode = all_params.get("command_line_execution_mode", "local")
825
+ if command_line_execution_mode == "docker":
826
+ disallowed_tools = list(all_params.get("disallowed_tools", []))
827
+ bash_related_tools = ["Bash", "BashOutput", "KillShell"]
828
+ for tool in bash_related_tools:
829
+ if tool not in disallowed_tools:
830
+ disallowed_tools.append(tool)
831
+ all_params["disallowed_tools"] = disallowed_tools
832
+
833
+ # Extract system message from messages for append mode (always do this)
834
+ system_msg = next((msg for msg in messages if msg.get("role") == "system"), None)
835
+ if system_msg:
836
+ system_content = system_msg.get("content", "") # noqa: E128
837
+ else:
838
+ system_content = ""
839
+
840
+ # Build system prompt with tools information
841
+ workflow_system_prompt = self._build_system_prompt_with_workflow_tools(tools or [], system_content)
842
+
843
+ # Windows-specific handling: detect complex prompts that cause subprocess hang
844
+ if sys.platform == "win32" and len(workflow_system_prompt) > 200:
845
+ # Windows with complex prompt: use post-connection delivery to avoid hang
846
+ print("[ClaudeCodeBackend] Windows detected complex system prompt, using post-connection delivery")
847
+ clean_params = {k: v for k, v in all_params.items() if k not in ["system_prompt"]}
848
+ client = self.create_client(**clean_params)
849
+ self._pending_system_prompt = workflow_system_prompt
850
+
851
+ else:
852
+ # Original approach for Mac/Linux and Windows with simple prompts
853
+ try:
854
+ # Use Claude Code preset with append for workflow system prompt
855
+ # This maintains Claude Code's default behavior while adding MassGen tools
856
+ system_prompt_config = {
857
+ "type": "preset",
858
+ "preset": "claude_code",
859
+ "append": workflow_system_prompt,
860
+ }
861
+ client = self.create_client(**{**all_params, "system_prompt": system_prompt_config})
862
+ self._pending_system_prompt = None
863
+
864
+ except Exception as create_error:
865
+ # Fallback for unexpected failures
866
+ if sys.platform == "win32":
867
+ clean_params = {k: v for k, v in all_params.items() if k not in ["system_prompt"]}
868
+ client = self.create_client(**clean_params)
869
+ self._pending_system_prompt = workflow_system_prompt
870
+ else:
871
+ # On Mac/Linux, re-raise the error since this shouldn't happen
872
+ raise create_error
873
+
874
+ # Connect client if not already connected
875
+ if not client._transport:
876
+ try:
877
+ await client.connect()
878
+
879
+ # If we have a pending system prompt, deliver it at system level using /system command
880
+ if hasattr(self, "_pending_system_prompt") and self._pending_system_prompt:
881
+ try:
882
+ # Use Claude Code's native /system command for proper system-level delivery
883
+ system_command = f"/system {self._pending_system_prompt}"
884
+ await client.query(system_command)
885
+
886
+ # Consume the system response
887
+ async for response in client.receive_response():
888
+ if hasattr(response, "subtype") and response.subtype == "init":
889
+ # This is the system initialization response
890
+ break
891
+
892
+ yield StreamChunk(
893
+ type="content",
894
+ content="[SYSTEM] Applied system instructions at system level\n",
895
+ source="claude_code",
896
+ )
897
+
898
+ # Clear the pending prompt
899
+ self._pending_system_prompt = None
900
+
901
+ except Exception as sys_e:
902
+ yield StreamChunk(
903
+ type="content",
904
+ content=f"[SYSTEM] Warning: System-level delivery failed: {str(sys_e)}\n",
905
+ source="claude_code",
906
+ )
907
+
908
+ except Exception as e:
909
+ yield StreamChunk(
910
+ type="error",
911
+ error=f"Failed to connect to Claude Code: {str(e)}",
912
+ source="claude_code",
913
+ )
914
+ return
915
+
916
+ # Log backend inputs when we have workflow_system_prompt available
917
+ if "workflow_system_prompt" in locals():
918
+ async for debug_chunk in self._log_backend_input(messages, workflow_system_prompt, tools, kwargs):
919
+ yield debug_chunk
920
+
921
+ # Format the messages for Claude Code
922
+ if not messages:
923
+ log_stream_chunk(
924
+ "backend.claude_code",
925
+ "error",
926
+ "No messages provided to stream_with_tools",
927
+ agent_id,
928
+ )
929
+ # No messages to process - yield error
930
+ yield StreamChunk(
931
+ type="error",
932
+ error="No messages provided to stream_with_tools",
933
+ source="claude_code",
934
+ )
935
+ return
936
+
937
+ # Validate messages - should only contain user messages for Claude Code
938
+ user_messages = [msg for msg in messages if msg.get("role") == "user"]
939
+ assistant_messages = [msg for msg in messages if msg.get("role") == "assistant"]
940
+
941
+ if assistant_messages:
942
+ log_stream_chunk(
943
+ "backend.claude_code",
944
+ "error",
945
+ "Claude Code backend cannot accept assistant messages - it maintains its own conversation history",
946
+ agent_id,
947
+ )
948
+ yield StreamChunk(
949
+ type="error",
950
+ error="Claude Code backend cannot accept assistant messages - it maintains its own conversation history",
951
+ source="claude_code",
952
+ )
953
+ return
954
+
955
+ if not user_messages:
956
+ log_stream_chunk(
957
+ "backend.claude_code",
958
+ "error",
959
+ "No user messages found to send to Claude Code",
960
+ agent_id,
961
+ )
962
+ yield StreamChunk(
963
+ type="error",
964
+ error="No user messages found to send to Claude Code",
965
+ source="claude_code",
966
+ )
967
+ return
968
+
969
+ # Combine all user messages into a single query
970
+ user_contents = []
971
+ for user_msg in user_messages:
972
+ content = user_msg.get("content", "").strip()
973
+ if content:
974
+ user_contents.append(content)
975
+
976
+ if user_contents:
977
+ # Join multiple user messages with newlines
978
+ combined_query = "\n\n".join(user_contents)
979
+ log_backend_agent_message(
980
+ agent_id or "default",
981
+ "SEND",
982
+ {"system": workflow_system_prompt, "user": combined_query},
983
+ backend_name=self.get_provider_name(),
984
+ )
985
+ await client.query(combined_query)
986
+ else:
987
+ log_stream_chunk("backend.claude_code", "error", "All user messages were empty", agent_id)
988
+ yield StreamChunk(type="error", error="All user messages were empty", source="claude_code")
989
+ return
990
+
991
+ # Stream response and convert to MassGen StreamChunks
992
+ accumulated_content = ""
993
+ try:
994
+ async for message in client.receive_response():
995
+ if isinstance(message, (AssistantMessage, UserMessage)):
996
+ # Process assistant message content
997
+ for block in message.content:
998
+ if isinstance(block, TextBlock):
999
+ accumulated_content += block.text
1000
+
1001
+ # Yield content chunk
1002
+ log_backend_agent_message(
1003
+ agent_id or "default",
1004
+ "RECV",
1005
+ {"content": block.text},
1006
+ backend_name=self.get_provider_name(),
1007
+ )
1008
+ log_stream_chunk("backend.claude_code", "content", block.text, agent_id)
1009
+ yield StreamChunk(type="content", content=block.text, source="claude_code")
1010
+
1011
+ elif isinstance(block, ToolUseBlock):
1012
+ # Claude Code's builtin tool usage
1013
+ log_backend_activity(
1014
+ self.get_provider_name(),
1015
+ f"Builtin tool called: {block.name}",
1016
+ {"tool_id": block.id},
1017
+ agent_id=agent_id,
1018
+ )
1019
+ log_stream_chunk(
1020
+ "backend.claude_code",
1021
+ "tool_use",
1022
+ {"name": block.name, "input": block.input},
1023
+ agent_id,
1024
+ )
1025
+ yield StreamChunk(
1026
+ type="content",
1027
+ content=f"🔧 {block.name}({block.input})",
1028
+ source="claude_code",
1029
+ )
1030
+
1031
+ elif isinstance(block, ToolResultBlock):
1032
+ # Tool result from Claude Code - use simple content format
1033
+ # Note: ToolResultBlock.tool_use_id references
1034
+ # the original ToolUseBlock.id
1035
+ status = "❌ Error" if block.is_error else "✅ Result"
1036
+ log_stream_chunk(
1037
+ "backend.claude_code",
1038
+ "tool_result",
1039
+ {"is_error": block.is_error, "content": block.content},
1040
+ agent_id,
1041
+ )
1042
+ yield StreamChunk(
1043
+ type="content",
1044
+ content=f"🔧 Tool {status}: {block.content}",
1045
+ source="claude_code",
1046
+ )
1047
+
1048
+ # Parse workflow tool calls from accumulated content
1049
+ workflow_tool_calls = self._parse_workflow_tool_calls(accumulated_content)
1050
+ if workflow_tool_calls:
1051
+ log_stream_chunk(
1052
+ "backend.claude_code",
1053
+ "tool_calls",
1054
+ workflow_tool_calls,
1055
+ agent_id,
1056
+ )
1057
+ yield StreamChunk(
1058
+ type="tool_calls",
1059
+ tool_calls=workflow_tool_calls,
1060
+ source="claude_code",
1061
+ )
1062
+
1063
+ # Yield complete message
1064
+ log_stream_chunk(
1065
+ "backend.claude_code",
1066
+ "complete_message",
1067
+ accumulated_content[:200] if len(accumulated_content) > 200 else accumulated_content,
1068
+ agent_id,
1069
+ )
1070
+ yield StreamChunk(
1071
+ type="complete_message",
1072
+ complete_message={
1073
+ "role": "assistant",
1074
+ "content": accumulated_content,
1075
+ },
1076
+ source="claude_code",
1077
+ )
1078
+
1079
+ elif isinstance(message, SystemMessage):
1080
+ # System status updates
1081
+ self._track_session_info(message=message)
1082
+ log_stream_chunk(
1083
+ "backend.claude_code",
1084
+ "backend_status",
1085
+ {"subtype": message.subtype, "data": message.data},
1086
+ agent_id,
1087
+ )
1088
+ yield StreamChunk(
1089
+ type="backend_status",
1090
+ status=message.subtype,
1091
+ content=json.dumps(message.data),
1092
+ source="claude_code",
1093
+ )
1094
+
1095
+ elif isinstance(message, ResultMessage):
1096
+ # Track session ID from server response
1097
+ self._track_session_info(message)
1098
+
1099
+ # Update token usage using ResultMessage data
1100
+ self.update_token_usage_from_result_message(message)
1101
+
1102
+ # Yield completion
1103
+ log_stream_chunk(
1104
+ "backend.claude_code",
1105
+ "complete_response",
1106
+ {
1107
+ "session_id": message.session_id,
1108
+ "cost_usd": message.total_cost_usd,
1109
+ },
1110
+ agent_id,
1111
+ )
1112
+ yield StreamChunk(
1113
+ type="complete_response",
1114
+ complete_message={
1115
+ "session_id": message.session_id,
1116
+ "duration_ms": message.duration_ms,
1117
+ "cost_usd": message.total_cost_usd,
1118
+ "usage": message.usage,
1119
+ "is_error": message.is_error,
1120
+ },
1121
+ source="claude_code",
1122
+ )
1123
+
1124
+ # Final done signal
1125
+ log_stream_chunk("backend.claude_code", "done", None, agent_id)
1126
+ yield StreamChunk(type="done", source="claude_code")
1127
+ break
1128
+
1129
+ except Exception as e:
1130
+ error_msg = str(e)
1131
+
1132
+ # Provide helpful Windows-specific guidance
1133
+ if "git-bash" in error_msg.lower() or "bash.exe" in error_msg.lower():
1134
+ error_msg += (
1135
+ "\n\nWindows Setup Required:\n"
1136
+ "1. Install Git Bash: https://git-scm.com/downloads/win\n"
1137
+ "2. Ensure git-bash is in PATH, or set: "
1138
+ "CLAUDE_CODE_GIT_BASH_PATH=C:\\Program Files\\Git\\bin\\bash.exe"
1139
+ )
1140
+ elif "exit code 1" in error_msg and "win32" in str(sys.platform):
1141
+ error_msg += "\n\nThis may indicate missing git-bash on Windows. Please install Git Bash from https://git-scm.com/downloads/win"
1142
+
1143
+ log_stream_chunk("backend.claude_code", "error", error_msg, agent_id)
1144
+ yield StreamChunk(
1145
+ type="error",
1146
+ error=f"Claude Code streaming error: {str(error_msg)}",
1147
+ source="claude_code",
1148
+ )
1149
+
1150
+ def _track_session_info(self, message) -> None:
1151
+ """Track session information from Claude Code server responses.
1152
+
1153
+ Extracts and stores session ID, working directory, and other session
1154
+ metadata from ResultMessage and SystemMessage responses to enable
1155
+ session continuation and state management across multiple interactions.
1156
+
1157
+ Args:
1158
+ message: Message from Claude Code (ResultMessage or SystemMessage)
1159
+ potentially containing session information
1160
+ """
1161
+ if ResultMessage is not None and isinstance(message, ResultMessage):
1162
+ # ResultMessage contains definitive session information
1163
+ if hasattr(message, "session_id") and message.session_id:
1164
+ old_session_id = self._current_session_id
1165
+ self._current_session_id = message.session_id
1166
+
1167
+ elif SystemMessage is not None and isinstance(message, SystemMessage):
1168
+ # SystemMessage may contain session state updates
1169
+ if hasattr(message, "data") and isinstance(message.data, dict):
1170
+ # Extract session ID from system message data
1171
+ if "session_id" in message.data and message.data["session_id"]:
1172
+ old_session_id = self._current_session_id
1173
+ self._current_session_id = message.data["session_id"]
1174
+ if old_session_id != self._current_session_id:
1175
+ print(f"[ClaudeCodeBackend] Session ID from SystemMessage: {old_session_id} → {self._current_session_id}")
1176
+
1177
+ # Extract working directory from system message data
1178
+ if "cwd" in message.data and message.data["cwd"]:
1179
+ self._cwd = message.data["cwd"]
1180
+
1181
+ async def disconnect(self):
1182
+ """Disconnect the ClaudeSDKClient and clean up resources.
1183
+
1184
+ Properly closes the connection and resets internal state.
1185
+ Should be called when the backend is no longer needed.
1186
+ """
1187
+ if self._client is not None:
1188
+ try:
1189
+ await self._client.disconnect()
1190
+ except Exception:
1191
+ pass # Ignore cleanup errors
1192
+ finally:
1193
+ self._client = None
1194
+ self._current_session_id = None
1195
+
1196
+ def __del__(self):
1197
+ """Cleanup on destruction.
1198
+
1199
+ Note: This won't work for async cleanup in practice.
1200
+ Use explicit disconnect() calls for proper resource cleanup.
1201
+ """
1202
+ # Note: This won't work for async cleanup, but serves as documentation
1203
+ # Real cleanup should be done via explicit disconnect() calls