kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. kolega_code/__init__.py +151 -0
  2. kolega_code/agent/__init__.py +42 -0
  3. kolega_code/agent/baseagent.py +998 -0
  4. kolega_code/agent/browseragent.py +123 -0
  5. kolega_code/agent/coder.py +157 -0
  6. kolega_code/agent/common.py +41 -0
  7. kolega_code/agent/compression.py +81 -0
  8. kolega_code/agent/context.py +112 -0
  9. kolega_code/agent/conversation.py +408 -0
  10. kolega_code/agent/generalagent.py +146 -0
  11. kolega_code/agent/investigationagent.py +123 -0
  12. kolega_code/agent/planningagent.py +187 -0
  13. kolega_code/agent/prompt_provider.py +196 -0
  14. kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
  15. kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
  16. kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
  17. kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
  18. kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
  19. kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
  20. kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
  21. kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
  22. kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
  23. kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
  24. kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
  25. kolega_code/agent/prompts.py +192 -0
  26. kolega_code/agent/tests/__init__.py +0 -0
  27. kolega_code/agent/tests/llm/__init__.py +0 -0
  28. kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
  29. kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
  30. kolega_code/agent/tests/llm/test_client.py +773 -0
  31. kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
  32. kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
  33. kolega_code/agent/tests/llm/test_exceptions.py +249 -0
  34. kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
  35. kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
  36. kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
  37. kolega_code/agent/tests/llm/test_model_specs.py +17 -0
  38. kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
  39. kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
  40. kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
  41. kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
  42. kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
  43. kolega_code/agent/tests/services/__init__.py +1 -0
  44. kolega_code/agent/tests/services/test_browser.py +447 -0
  45. kolega_code/agent/tests/services/test_browser_parity.py +353 -0
  46. kolega_code/agent/tests/services/test_file_system.py +699 -0
  47. kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
  48. kolega_code/agent/tests/services/test_terminal.py +154 -0
  49. kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
  50. kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
  51. kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
  52. kolega_code/agent/tests/test_base_agent.py +1942 -0
  53. kolega_code/agent/tests/test_coder_attachments.py +330 -0
  54. kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
  55. kolega_code/agent/tests/test_commands.py +179 -0
  56. kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
  57. kolega_code/agent/tests/test_empty_message_handling.py +48 -0
  58. kolega_code/agent/tests/test_general_agent.py +242 -0
  59. kolega_code/agent/tests/test_html.py +320 -0
  60. kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
  61. kolega_code/agent/tests/test_planning_agent.py +227 -0
  62. kolega_code/agent/tests/test_prompt_provider.py +271 -0
  63. kolega_code/agent/tests/test_tool_registry.py +102 -0
  64. kolega_code/agent/tests/test_tools.py +549 -0
  65. kolega_code/agent/tests/tool_backend/__init__.py +0 -0
  66. kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
  67. kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
  68. kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
  69. kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
  70. kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
  71. kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
  72. kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
  73. kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
  74. kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
  75. kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
  76. kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
  77. kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
  78. kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
  79. kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
  80. kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
  81. kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
  82. kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
  83. kolega_code/agent/tool_backend/agent_tool.py +414 -0
  84. kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
  85. kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
  86. kolega_code/agent/tool_backend/base_tool.py +217 -0
  87. kolega_code/agent/tool_backend/browser_tool.py +271 -0
  88. kolega_code/agent/tool_backend/build_tool.py +93 -0
  89. kolega_code/agent/tool_backend/create_file_tool.py +52 -0
  90. kolega_code/agent/tool_backend/glob_tool.py +323 -0
  91. kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
  92. kolega_code/agent/tool_backend/memory_tool.py +79 -0
  93. kolega_code/agent/tool_backend/read_file_tool.py +119 -0
  94. kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
  95. kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
  96. kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
  97. kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
  98. kolega_code/agent/tool_backend/streaming_tool.py +47 -0
  99. kolega_code/agent/tool_backend/terminal_tool.py +643 -0
  100. kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
  101. kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
  102. kolega_code/agent/tools.py +1704 -0
  103. kolega_code/agent/utils/commands.py +94 -0
  104. kolega_code/cli/__init__.py +1 -0
  105. kolega_code/cli/app.py +2756 -0
  106. kolega_code/cli/config.py +280 -0
  107. kolega_code/cli/connection.py +49 -0
  108. kolega_code/cli/file_index.py +147 -0
  109. kolega_code/cli/main.py +564 -0
  110. kolega_code/cli/mentions.py +155 -0
  111. kolega_code/cli/messages.py +89 -0
  112. kolega_code/cli/provider_registry.py +96 -0
  113. kolega_code/cli/session_store.py +207 -0
  114. kolega_code/cli/settings.py +87 -0
  115. kolega_code/cli/skills.py +409 -0
  116. kolega_code/cli/slash_commands.py +108 -0
  117. kolega_code/cli/tests/__init__.py +1 -0
  118. kolega_code/cli/tests/test_app.py +4251 -0
  119. kolega_code/cli/tests/test_cli_config.py +171 -0
  120. kolega_code/cli/tests/test_connection.py +26 -0
  121. kolega_code/cli/tests/test_file_index.py +103 -0
  122. kolega_code/cli/tests/test_main.py +455 -0
  123. kolega_code/cli/tests/test_mentions.py +108 -0
  124. kolega_code/cli/tests/test_session_store.py +67 -0
  125. kolega_code/cli/tests/test_settings.py +62 -0
  126. kolega_code/cli/tests/test_skills.py +157 -0
  127. kolega_code/cli/tests/test_slash_commands.py +88 -0
  128. kolega_code/cli/theme.py +180 -0
  129. kolega_code/config.py +154 -0
  130. kolega_code/events.py +202 -0
  131. kolega_code/llm/client.py +300 -0
  132. kolega_code/llm/exceptions.py +285 -0
  133. kolega_code/llm/instrumented_client.py +520 -0
  134. kolega_code/llm/models.py +1368 -0
  135. kolega_code/llm/providers/__init__.py +0 -0
  136. kolega_code/llm/providers/anthropic.py +387 -0
  137. kolega_code/llm/providers/base.py +71 -0
  138. kolega_code/llm/providers/google.py +157 -0
  139. kolega_code/llm/providers/models.py +37 -0
  140. kolega_code/llm/providers/openai.py +363 -0
  141. kolega_code/llm/ratelimit.py +40 -0
  142. kolega_code/llm/specs.py +67 -0
  143. kolega_code/llm/tool_execution_ids.py +18 -0
  144. kolega_code/models/__init__.py +9 -0
  145. kolega_code/models/sandbox_terminal_state.py +47 -0
  146. kolega_code/runtime.py +50 -0
  147. kolega_code/sandbox/README.md +200 -0
  148. kolega_code/sandbox/__init__.py +21 -0
  149. kolega_code/sandbox/async_filesystem.py +475 -0
  150. kolega_code/sandbox/base.py +297 -0
  151. kolega_code/sandbox/browser.py +25 -0
  152. kolega_code/sandbox/event_loop.py +43 -0
  153. kolega_code/sandbox/filesystem.py +341 -0
  154. kolega_code/sandbox/local.py +118 -0
  155. kolega_code/sandbox/serializer.py +175 -0
  156. kolega_code/sandbox/terminal.py +868 -0
  157. kolega_code/sandbox/utils.py +216 -0
  158. kolega_code/services/base.py +255 -0
  159. kolega_code/services/browser.py +444 -0
  160. kolega_code/services/file_system.py +749 -0
  161. kolega_code/services/html.py +221 -0
  162. kolega_code/services/terminal.py +903 -0
  163. kolega_code/tools/__init__.py +22 -0
  164. kolega_code/tools/core.py +33 -0
  165. kolega_code/tools/definitions.py +81 -0
  166. kolega_code/tools/registry.py +73 -0
  167. kolega_code-0.1.0.dist-info/METADATA +157 -0
  168. kolega_code-0.1.0.dist-info/RECORD +171 -0
  169. kolega_code-0.1.0.dist-info/WHEEL +4 -0
  170. kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
  171. kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,291 @@
1
+ """Tests for parallel tool-call execution and task-local tool-call IDs."""
2
+
3
+ import asyncio
4
+ import os
5
+ import uuid
6
+ from types import SimpleNamespace
7
+ from unittest.mock import AsyncMock
8
+
9
+ import pytest
10
+ from dotenv import load_dotenv
11
+
12
+ from kolega_code.agent.baseagent import BaseAgent
13
+ from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
14
+ from kolega_code.events import AgentConnectionManager
15
+ from kolega_code.llm.models import ToolCall
16
+
17
+ # Load environment variables
18
+ load_dotenv()
19
+
20
+
21
+ @pytest.fixture
22
+ def agent_config():
23
+ return AgentConfig(
24
+ anthropic_api_key=os.getenv("ANTHROPIC_API_KEY", "test_key"),
25
+ openai_api_key="test-key",
26
+ long_context_config=ModelConfig(
27
+ provider=ModelProvider.ANTHROPIC,
28
+ model="claude-haiku-4-5-20251001",
29
+ rate_limits=RateLimitConfig(),
30
+ ),
31
+ fast_config=ModelConfig(
32
+ provider=ModelProvider.ANTHROPIC,
33
+ model="claude-haiku-4-5-20251001",
34
+ rate_limits=RateLimitConfig(),
35
+ ),
36
+ thinking_config=ModelConfig(
37
+ provider=ModelProvider.ANTHROPIC,
38
+ model="claude-haiku-4-5-20251001",
39
+ rate_limits=RateLimitConfig(),
40
+ thinking_tokens=1024,
41
+ ),
42
+ )
43
+
44
+
45
+ @pytest.fixture
46
+ def mock_connection_manager():
47
+ return AsyncMock(spec=AgentConnectionManager)
48
+
49
+
50
+ @pytest.fixture
51
+ def base_agent(tmp_path, mock_connection_manager, agent_config):
52
+ agent = BaseAgent(
53
+ project_path=tmp_path,
54
+ workspace_id="test_workspace",
55
+ thread_id=str(uuid.uuid4()),
56
+ connection_manager=mock_connection_manager,
57
+ config=agent_config,
58
+ )
59
+ agent.send_chat_message = AsyncMock()
60
+ agent.log_info = AsyncMock()
61
+ agent.log_error = AsyncMock()
62
+ return agent
63
+
64
+
65
+ def make_tool_call(name: str, index: int) -> ToolCall:
66
+ return ToolCall(
67
+ id=f"{name}_{index}",
68
+ name=name,
69
+ input={"task": f"task {index}"},
70
+ execution_id=f"exec_{name}_{index}",
71
+ )
72
+
73
+
74
+ class ConcurrencyTracker:
75
+ """Tracks how many fake tool executions overlap."""
76
+
77
+ def __init__(self):
78
+ self.active = 0
79
+ self.max_active = 0
80
+
81
+ async def run(self, duration: float = 0.01):
82
+ self.active += 1
83
+ self.max_active = max(self.max_active, self.active)
84
+ await asyncio.sleep(duration)
85
+ self.active -= 1
86
+
87
+
88
+ class FakeToolCollection:
89
+ """Test stand-in for ToolCollection: builds a registry from get_tool_list + methods."""
90
+
91
+ def registry(self):
92
+ from kolega_code.agent.tools import ToolCollection
93
+ from kolega_code.llm.models import ToolDefinition
94
+ from kolega_code.tools import Tool, ToolRegistry
95
+
96
+ parallel = set(ToolCollection.read_only_tools) | set(ToolCollection.agent_dispatch_tools)
97
+ registry = ToolRegistry()
98
+ for spec in self.get_tool_list():
99
+ registry.add(
100
+ Tool(
101
+ name=spec.name,
102
+ definition=ToolDefinition(name=spec.name, description="", parameters=[]),
103
+ handler=getattr(self, spec.name),
104
+ parallel_safe=spec.name in parallel,
105
+ )
106
+ )
107
+ return registry
108
+
109
+
110
+ class TestParallelToolCalls:
111
+ @pytest.mark.asyncio
112
+ async def test_dispatch_tools_run_concurrently(self, base_agent):
113
+ first_started = asyncio.Event()
114
+ second_started = asyncio.Event()
115
+
116
+ class Tools(FakeToolCollection):
117
+ def __init__(self):
118
+ self.calls = 0
119
+
120
+ def get_tool_list(self):
121
+ return [SimpleNamespace(name="dispatch_general_agent")]
122
+
123
+ async def dispatch_general_agent(self, task: str):
124
+ if "task 0" in task:
125
+ first_started.set()
126
+ # Deadlocks unless the second call runs concurrently
127
+ await asyncio.wait_for(second_started.wait(), timeout=2)
128
+ return "first done"
129
+ second_started.set()
130
+ await asyncio.wait_for(first_started.wait(), timeout=2)
131
+ return "second done"
132
+
133
+ base_agent.tool_collection = Tools()
134
+ blocks = [make_tool_call("dispatch_general_agent", i) for i in range(2)]
135
+
136
+ results = await asyncio.wait_for(base_agent.process_tool_calls(blocks), timeout=5)
137
+
138
+ assert [r.content for r in results] == ["first done", "second done"]
139
+ assert [r.tool_use_id for r in results] == ["dispatch_general_agent_0", "dispatch_general_agent_1"]
140
+ assert not any(r.is_error for r in results)
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_mixed_read_only_and_dispatch_parallelize(self, base_agent):
144
+ tracker = ConcurrencyTracker()
145
+
146
+ class Tools(FakeToolCollection):
147
+ def get_tool_list(self):
148
+ return [
149
+ SimpleNamespace(name="dispatch_general_agent"),
150
+ SimpleNamespace(name="read_entire_file"),
151
+ ]
152
+
153
+ async def dispatch_general_agent(self, task: str):
154
+ await tracker.run()
155
+ return "dispatched"
156
+
157
+ async def read_entire_file(self, task: str):
158
+ await tracker.run()
159
+ return "file contents"
160
+
161
+ base_agent.tool_collection = Tools()
162
+ blocks = [
163
+ make_tool_call("read_entire_file", 0),
164
+ make_tool_call("dispatch_general_agent", 1),
165
+ make_tool_call("read_entire_file", 2),
166
+ ]
167
+
168
+ results = await base_agent.process_tool_calls(blocks)
169
+
170
+ assert tracker.max_active > 1
171
+ assert len(results) == 3
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_write_tool_forces_sequential(self, base_agent):
175
+ tracker = ConcurrencyTracker()
176
+
177
+ class Tools(FakeToolCollection):
178
+ def get_tool_list(self):
179
+ return [
180
+ SimpleNamespace(name="dispatch_general_agent"),
181
+ SimpleNamespace(name="create_file"),
182
+ ]
183
+
184
+ async def dispatch_general_agent(self, task: str):
185
+ await tracker.run()
186
+ return "dispatched"
187
+
188
+ async def create_file(self, task: str):
189
+ await tracker.run()
190
+ return "created"
191
+
192
+ base_agent.tool_collection = Tools()
193
+ blocks = [
194
+ make_tool_call("dispatch_general_agent", 0),
195
+ make_tool_call("create_file", 1),
196
+ ]
197
+
198
+ results = await base_agent.process_tool_calls(blocks)
199
+
200
+ assert tracker.max_active == 1
201
+ assert len(results) == 2
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_semaphore_caps_concurrency(self, base_agent):
205
+ tracker = ConcurrencyTracker()
206
+ total = BaseAgent.PARALLEL_TOOL_LIMIT + 4
207
+
208
+ class Tools(FakeToolCollection):
209
+ def get_tool_list(self):
210
+ return [SimpleNamespace(name="dispatch_general_agent")]
211
+
212
+ async def dispatch_general_agent(self, task: str):
213
+ await tracker.run()
214
+ return "done"
215
+
216
+ base_agent.tool_collection = Tools()
217
+ blocks = [make_tool_call("dispatch_general_agent", i) for i in range(total)]
218
+
219
+ results = await base_agent.process_tool_calls(blocks)
220
+
221
+ assert len(results) == total
222
+ assert tracker.max_active > 1
223
+ assert tracker.max_active <= BaseAgent.PARALLEL_TOOL_LIMIT
224
+
225
+ @pytest.mark.asyncio
226
+ async def test_contextvar_isolation_under_gather(self, base_agent):
227
+ captured: dict[str, str] = {}
228
+
229
+ class Tools(FakeToolCollection):
230
+ def get_tool_list(self):
231
+ return [SimpleNamespace(name="dispatch_general_agent")]
232
+
233
+ async def dispatch_general_agent(self, task: str):
234
+ # Yield so concurrent executions interleave before reading the ID
235
+ await asyncio.sleep(0.01)
236
+ captured[task] = base_agent.current_tool_execution_id
237
+ return "done"
238
+
239
+ base_agent.tool_collection = Tools()
240
+ blocks = [make_tool_call("dispatch_general_agent", i) for i in range(3)]
241
+
242
+ await base_agent.process_tool_calls(blocks)
243
+
244
+ assert captured == {
245
+ "task 0": "exec_dispatch_general_agent_0",
246
+ "task 1": "exec_dispatch_general_agent_1",
247
+ "task 2": "exec_dispatch_general_agent_2",
248
+ }
249
+ assert base_agent.current_tool_call_id is None
250
+ assert base_agent.current_tool_execution_id is None
251
+ assert base_agent.current_provider_tool_call_id is None
252
+
253
+ @pytest.mark.asyncio
254
+ async def test_nested_agent_does_not_clobber_parent_ids(
255
+ self, tmp_path, mock_connection_manager, agent_config, base_agent
256
+ ):
257
+ nested_agent = BaseAgent(
258
+ project_path=tmp_path,
259
+ workspace_id="test_workspace",
260
+ thread_id=str(uuid.uuid4()),
261
+ connection_manager=mock_connection_manager,
262
+ config=agent_config,
263
+ )
264
+ nested_agent.send_chat_message = AsyncMock()
265
+ nested_agent.log_info = AsyncMock()
266
+
267
+ class NestedTools(FakeToolCollection):
268
+ def get_tool_list(self):
269
+ return [SimpleNamespace(name="read_entire_file")]
270
+
271
+ async def read_entire_file(self, task: str):
272
+ return "nested contents"
273
+
274
+ nested_agent.tool_collection = NestedTools()
275
+ observed = {}
276
+
277
+ class ParentTools(FakeToolCollection):
278
+ def get_tool_list(self):
279
+ return [SimpleNamespace(name="dispatch_general_agent")]
280
+
281
+ async def dispatch_general_agent(self, task: str):
282
+ # Simulate a sub-agent running its own tool within the same asyncio task
283
+ await nested_agent.execute_single_tool(make_tool_call("read_entire_file", 99))
284
+ observed["parent_id"] = base_agent.current_tool_execution_id
285
+ return "done"
286
+
287
+ base_agent.tool_collection = ParentTools()
288
+
289
+ await base_agent.process_tool_calls([make_tool_call("dispatch_general_agent", 0)])
290
+
291
+ assert observed["parent_id"] == "exec_dispatch_general_agent_0"
@@ -0,0 +1,227 @@
1
+ import base64
2
+ import uuid
3
+ from unittest.mock import AsyncMock, Mock
4
+
5
+ import pytest
6
+
7
+ from kolega_code.agent.baseagent import BaseAgent
8
+ from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
9
+ from kolega_code.events import AgentConnectionManager
10
+ from kolega_code.llm.models import Message, TextBlock, ToolCall
11
+ from kolega_code.llm.providers.models import TokenCount
12
+ from kolega_code.agent.planningagent import PlanningAgent
13
+ from kolega_code.agent.prompt_provider import AgentMode
14
+ from kolega_code.agent.tools import ToolExtension
15
+
16
+
17
+ def _deepseek_config() -> AgentConfig:
18
+ model_config = ModelConfig(
19
+ provider=ModelProvider.DEEPSEEK,
20
+ model="deepseek-v4-pro",
21
+ rate_limits=RateLimitConfig(),
22
+ )
23
+ return AgentConfig(
24
+ deepseek_api_key="test-key",
25
+ long_context_config=model_config,
26
+ fast_config=model_config,
27
+ edit_model_config=model_config,
28
+ thinking_config=model_config,
29
+ )
30
+
31
+
32
+ def _image_attachment() -> dict:
33
+ return {
34
+ "type": "image",
35
+ "media_type": "image/png",
36
+ "data": base64.b64encode(b"fake-image-data").decode("utf-8"),
37
+ "filename": "test-image.png",
38
+ }
39
+
40
+
41
+ class _EmptyStream:
42
+ async def __aenter__(self):
43
+ return self
44
+
45
+ async def __aexit__(self, exc_type, exc, tb):
46
+ return False
47
+
48
+ def __aiter__(self):
49
+ return self
50
+
51
+ async def __anext__(self):
52
+ raise StopAsyncIteration
53
+
54
+ async def get_final_message(self):
55
+ return Message("assistant", [TextBlock("done")], stop_reason="end_turn")
56
+
57
+
58
+ @pytest.fixture
59
+ def mock_connection_manager():
60
+ manager = Mock(spec=AgentConnectionManager)
61
+ manager.workspace_id = "test_workspace"
62
+ manager.send_message = AsyncMock()
63
+ return manager
64
+
65
+
66
+ @pytest.fixture
67
+ def agent_config():
68
+ config = Mock(spec=AgentConfig)
69
+ config.long_context_config = Mock()
70
+ config.long_context_config.provider = "anthropic"
71
+ config.long_context_config.model = "claude-sonnet-4-5-20250929"
72
+ config.long_context_config.thinking_tokens = None
73
+ config.openai_api_key = "test_key"
74
+ config.anthropic_api_key = "test_key"
75
+ config.browser_use_headless = True
76
+ return config
77
+
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_planning_agent_uses_host_task_list_extension(tmp_path, mock_connection_manager, agent_config):
81
+ task_list = ""
82
+
83
+ async def update_task_list(task_list_markdown: str) -> str:
84
+ nonlocal task_list
85
+ task_list = task_list_markdown.strip()
86
+ return "Task list updated."
87
+
88
+ async def get_task_list() -> str:
89
+ return task_list or "No task list has been set."
90
+
91
+ agent = PlanningAgent(
92
+ project_path=tmp_path,
93
+ workspace_id="test_workspace",
94
+ thread_id=str(uuid.uuid4()),
95
+ connection_manager=mock_connection_manager,
96
+ config=agent_config,
97
+ agent_mode=AgentMode.CLI,
98
+ tool_extensions=[
99
+ ToolExtension(
100
+ name="host-task-list",
101
+ tools={"update_task_list": update_task_list, "get_task_list": get_task_list},
102
+ tool_groups={"planning_tools": ["update_task_list", "get_task_list"]},
103
+ )
104
+ ],
105
+ )
106
+
107
+ result = await agent.tool_collection.update_task_list("- [ ] inspect CLI\n- [x] choose tool shape")
108
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
109
+
110
+ assert result == "Task list updated."
111
+ assert task_list == "- [ ] inspect CLI\n- [x] choose tool shape"
112
+ assert await agent.tool_collection.get_task_list() == task_list
113
+ assert {"write_plan", "get_task_list", "update_task_list"}.issubset(tool_names)
114
+ assert "replace_entire_file" not in tool_names
115
+ assert "create_file" not in tool_names
116
+
117
+
118
+ def test_planning_agent_only_exposes_write_plan_without_host_task_tools(
119
+ tmp_path, mock_connection_manager, agent_config
120
+ ):
121
+ agent = PlanningAgent(
122
+ project_path=tmp_path,
123
+ workspace_id="test_workspace",
124
+ thread_id=str(uuid.uuid4()),
125
+ connection_manager=mock_connection_manager,
126
+ config=agent_config,
127
+ agent_mode=AgentMode.CLI,
128
+ )
129
+
130
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
131
+
132
+ assert "write_plan" in tool_names
133
+ assert "update_task_list" not in tool_names
134
+ assert "get_task_list" not in tool_names
135
+
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_planning_agent_rejects_unavailable_file_edit_tool(
139
+ tmp_path, mock_connection_manager, agent_config
140
+ ):
141
+ target = tmp_path / "notes.txt"
142
+ target.write_text("original\n", encoding="utf-8")
143
+ agent = PlanningAgent(
144
+ project_path=tmp_path,
145
+ workspace_id="test_workspace",
146
+ thread_id=str(uuid.uuid4()),
147
+ connection_manager=mock_connection_manager,
148
+ config=agent_config,
149
+ agent_mode=AgentMode.CLI,
150
+ )
151
+
152
+ result = await agent.execute_single_tool(
153
+ ToolCall(
154
+ id="tool-call-1",
155
+ name="replace_entire_file",
156
+ input={"relative_path": "notes.txt", "content": "mutated\n"},
157
+ )
158
+ )
159
+
160
+ assert result.is_error is True
161
+ assert result.content == "Tool 'replace_entire_file' is not available in this mode."
162
+ assert target.read_text(encoding="utf-8") == "original\n"
163
+
164
+
165
+ @pytest.mark.asyncio
166
+ async def test_planning_agent_write_plan_is_consumable(tmp_path, mock_connection_manager, agent_config):
167
+ agent = PlanningAgent(
168
+ project_path=tmp_path,
169
+ workspace_id="test_workspace",
170
+ thread_id=str(uuid.uuid4()),
171
+ connection_manager=mock_connection_manager,
172
+ config=agent_config,
173
+ agent_mode=AgentMode.CLI,
174
+ )
175
+
176
+ result = await agent.tool_collection.write_plan("# Plan\n\nImplement planning mode.")
177
+
178
+ assert result == "Plan captured."
179
+ assert agent.consume_completed_plan() == "# Plan\n\nImplement planning mode."
180
+ assert agent.consume_completed_plan() is None
181
+
182
+
183
+ @pytest.mark.asyncio
184
+ async def test_planning_agent_rejects_deepseek_image_without_llm_call(tmp_path, mock_connection_manager):
185
+ agent = PlanningAgent(
186
+ project_path=tmp_path,
187
+ workspace_id="test_workspace",
188
+ thread_id=str(uuid.uuid4()),
189
+ connection_manager=mock_connection_manager,
190
+ config=_deepseek_config(),
191
+ agent_mode=AgentMode.CLI,
192
+ )
193
+ agent.llm = Mock()
194
+
195
+ chunks = [
196
+ chunk
197
+ async for chunk in agent.process_message_stream("Plan from this screenshot", [_image_attachment()])
198
+ ]
199
+
200
+ assert len(chunks) == 1
201
+ assert chunks[0]["type"] == "response"
202
+ assert chunks[0]["content"] == BaseAgent.deepseek_image_unsupported_message
203
+ assert chunks[0]["complete"] is True
204
+ assert agent.history == []
205
+ agent.llm.stream.assert_not_called()
206
+
207
+
208
+ @pytest.mark.asyncio
209
+ async def test_planning_agent_does_not_print_context_token_counts(
210
+ tmp_path, mock_connection_manager, capsys
211
+ ):
212
+ agent = PlanningAgent(
213
+ project_path=tmp_path,
214
+ workspace_id="test_workspace",
215
+ thread_id=str(uuid.uuid4()),
216
+ connection_manager=mock_connection_manager,
217
+ config=_deepseek_config(),
218
+ agent_mode=AgentMode.CLI,
219
+ )
220
+ agent.count_current_context = AsyncMock(return_value=TokenCount(input_tokens=42))
221
+ agent.llm = Mock()
222
+ agent.llm.stream = AsyncMock(return_value=_EmptyStream())
223
+
224
+ chunks = [chunk async for chunk in agent.process_message_stream("hello")]
225
+
226
+ assert chunks[-1]["complete"] is True
227
+ assert capsys.readouterr().out == ""