kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. kolega_code/__init__.py +151 -0
  2. kolega_code/agent/__init__.py +42 -0
  3. kolega_code/agent/baseagent.py +998 -0
  4. kolega_code/agent/browseragent.py +123 -0
  5. kolega_code/agent/coder.py +157 -0
  6. kolega_code/agent/common.py +41 -0
  7. kolega_code/agent/compression.py +81 -0
  8. kolega_code/agent/context.py +112 -0
  9. kolega_code/agent/conversation.py +408 -0
  10. kolega_code/agent/generalagent.py +146 -0
  11. kolega_code/agent/investigationagent.py +123 -0
  12. kolega_code/agent/planningagent.py +187 -0
  13. kolega_code/agent/prompt_provider.py +196 -0
  14. kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
  15. kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
  16. kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
  17. kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
  18. kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
  19. kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
  20. kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
  21. kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
  22. kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
  23. kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
  24. kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
  25. kolega_code/agent/prompts.py +192 -0
  26. kolega_code/agent/tests/__init__.py +0 -0
  27. kolega_code/agent/tests/llm/__init__.py +0 -0
  28. kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
  29. kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
  30. kolega_code/agent/tests/llm/test_client.py +773 -0
  31. kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
  32. kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
  33. kolega_code/agent/tests/llm/test_exceptions.py +249 -0
  34. kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
  35. kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
  36. kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
  37. kolega_code/agent/tests/llm/test_model_specs.py +17 -0
  38. kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
  39. kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
  40. kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
  41. kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
  42. kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
  43. kolega_code/agent/tests/services/__init__.py +1 -0
  44. kolega_code/agent/tests/services/test_browser.py +447 -0
  45. kolega_code/agent/tests/services/test_browser_parity.py +353 -0
  46. kolega_code/agent/tests/services/test_file_system.py +699 -0
  47. kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
  48. kolega_code/agent/tests/services/test_terminal.py +154 -0
  49. kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
  50. kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
  51. kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
  52. kolega_code/agent/tests/test_base_agent.py +1942 -0
  53. kolega_code/agent/tests/test_coder_attachments.py +330 -0
  54. kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
  55. kolega_code/agent/tests/test_commands.py +179 -0
  56. kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
  57. kolega_code/agent/tests/test_empty_message_handling.py +48 -0
  58. kolega_code/agent/tests/test_general_agent.py +242 -0
  59. kolega_code/agent/tests/test_html.py +320 -0
  60. kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
  61. kolega_code/agent/tests/test_planning_agent.py +227 -0
  62. kolega_code/agent/tests/test_prompt_provider.py +271 -0
  63. kolega_code/agent/tests/test_tool_registry.py +102 -0
  64. kolega_code/agent/tests/test_tools.py +549 -0
  65. kolega_code/agent/tests/tool_backend/__init__.py +0 -0
  66. kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
  67. kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
  68. kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
  69. kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
  70. kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
  71. kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
  72. kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
  73. kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
  74. kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
  75. kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
  76. kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
  77. kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
  78. kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
  79. kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
  80. kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
  81. kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
  82. kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
  83. kolega_code/agent/tool_backend/agent_tool.py +414 -0
  84. kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
  85. kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
  86. kolega_code/agent/tool_backend/base_tool.py +217 -0
  87. kolega_code/agent/tool_backend/browser_tool.py +271 -0
  88. kolega_code/agent/tool_backend/build_tool.py +93 -0
  89. kolega_code/agent/tool_backend/create_file_tool.py +52 -0
  90. kolega_code/agent/tool_backend/glob_tool.py +323 -0
  91. kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
  92. kolega_code/agent/tool_backend/memory_tool.py +79 -0
  93. kolega_code/agent/tool_backend/read_file_tool.py +119 -0
  94. kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
  95. kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
  96. kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
  97. kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
  98. kolega_code/agent/tool_backend/streaming_tool.py +47 -0
  99. kolega_code/agent/tool_backend/terminal_tool.py +643 -0
  100. kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
  101. kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
  102. kolega_code/agent/tools.py +1704 -0
  103. kolega_code/agent/utils/commands.py +94 -0
  104. kolega_code/cli/__init__.py +1 -0
  105. kolega_code/cli/app.py +2756 -0
  106. kolega_code/cli/config.py +280 -0
  107. kolega_code/cli/connection.py +49 -0
  108. kolega_code/cli/file_index.py +147 -0
  109. kolega_code/cli/main.py +564 -0
  110. kolega_code/cli/mentions.py +155 -0
  111. kolega_code/cli/messages.py +89 -0
  112. kolega_code/cli/provider_registry.py +96 -0
  113. kolega_code/cli/session_store.py +207 -0
  114. kolega_code/cli/settings.py +87 -0
  115. kolega_code/cli/skills.py +409 -0
  116. kolega_code/cli/slash_commands.py +108 -0
  117. kolega_code/cli/tests/__init__.py +1 -0
  118. kolega_code/cli/tests/test_app.py +4251 -0
  119. kolega_code/cli/tests/test_cli_config.py +171 -0
  120. kolega_code/cli/tests/test_connection.py +26 -0
  121. kolega_code/cli/tests/test_file_index.py +103 -0
  122. kolega_code/cli/tests/test_main.py +455 -0
  123. kolega_code/cli/tests/test_mentions.py +108 -0
  124. kolega_code/cli/tests/test_session_store.py +67 -0
  125. kolega_code/cli/tests/test_settings.py +62 -0
  126. kolega_code/cli/tests/test_skills.py +157 -0
  127. kolega_code/cli/tests/test_slash_commands.py +88 -0
  128. kolega_code/cli/theme.py +180 -0
  129. kolega_code/config.py +154 -0
  130. kolega_code/events.py +202 -0
  131. kolega_code/llm/client.py +300 -0
  132. kolega_code/llm/exceptions.py +285 -0
  133. kolega_code/llm/instrumented_client.py +520 -0
  134. kolega_code/llm/models.py +1368 -0
  135. kolega_code/llm/providers/__init__.py +0 -0
  136. kolega_code/llm/providers/anthropic.py +387 -0
  137. kolega_code/llm/providers/base.py +71 -0
  138. kolega_code/llm/providers/google.py +157 -0
  139. kolega_code/llm/providers/models.py +37 -0
  140. kolega_code/llm/providers/openai.py +363 -0
  141. kolega_code/llm/ratelimit.py +40 -0
  142. kolega_code/llm/specs.py +67 -0
  143. kolega_code/llm/tool_execution_ids.py +18 -0
  144. kolega_code/models/__init__.py +9 -0
  145. kolega_code/models/sandbox_terminal_state.py +47 -0
  146. kolega_code/runtime.py +50 -0
  147. kolega_code/sandbox/README.md +200 -0
  148. kolega_code/sandbox/__init__.py +21 -0
  149. kolega_code/sandbox/async_filesystem.py +475 -0
  150. kolega_code/sandbox/base.py +297 -0
  151. kolega_code/sandbox/browser.py +25 -0
  152. kolega_code/sandbox/event_loop.py +43 -0
  153. kolega_code/sandbox/filesystem.py +341 -0
  154. kolega_code/sandbox/local.py +118 -0
  155. kolega_code/sandbox/serializer.py +175 -0
  156. kolega_code/sandbox/terminal.py +868 -0
  157. kolega_code/sandbox/utils.py +216 -0
  158. kolega_code/services/base.py +255 -0
  159. kolega_code/services/browser.py +444 -0
  160. kolega_code/services/file_system.py +749 -0
  161. kolega_code/services/html.py +221 -0
  162. kolega_code/services/terminal.py +903 -0
  163. kolega_code/tools/__init__.py +22 -0
  164. kolega_code/tools/core.py +33 -0
  165. kolega_code/tools/definitions.py +81 -0
  166. kolega_code/tools/registry.py +73 -0
  167. kolega_code-0.1.0.dist-info/METADATA +157 -0
  168. kolega_code-0.1.0.dist-info/RECORD +171 -0
  169. kolega_code-0.1.0.dist-info/WHEEL +4 -0
  170. kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
  171. kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,262 @@
1
+ """Unit tests for terminal state serialization."""
2
+
3
+ import pytest
4
+ from datetime import datetime
5
+ from unittest.mock import Mock, MagicMock
6
+ from kolega_code.sandbox.serializer import TerminalStateSerializer
7
+ from kolega_code.models.sandbox_terminal_state import SandboxTerminalState, TerminalInfo, TerminalOutput
8
+
9
+
10
+ class TestTerminalStateSerializer:
11
+ """Test terminal state serialization and deserialization."""
12
+
13
+ def test_serialize_empty_terminal_manager(self):
14
+ """Test serializing an empty terminal manager."""
15
+ terminal_manager = Mock()
16
+ terminal_manager.terminals = {}
17
+ terminal_manager.outputs = {}
18
+
19
+ state = TerminalStateSerializer.serialize_to_model(terminal_manager, "workspace-123", "sandbox-456")
20
+
21
+ assert state.workspace_id == "workspace-123"
22
+ assert state.sandbox_id == "sandbox-456"
23
+ assert len(state.terminals) == 0
24
+ assert len(state.outputs) == 0
25
+ assert state.total_output_size == 0
26
+
27
+ def test_serialize_with_terminals(self):
28
+ """Test serializing terminal manager with multiple terminals."""
29
+ terminal_manager = Mock()
30
+ terminal_manager.terminals = {
31
+ "term1": {
32
+ "created_at": datetime.now(),
33
+ "cwd": "/home/user/workspace",
34
+ "env": {"PATH": "/usr/bin"},
35
+ "last_command": "ls -la",
36
+ "last_command_purpose": "List files",
37
+ },
38
+ "term2": {
39
+ "created_at": datetime.now(),
40
+ "cwd": "/tmp",
41
+ "env": {},
42
+ "last_command": "pwd",
43
+ "last_command_purpose": "Check directory",
44
+ },
45
+ }
46
+ terminal_manager.outputs = {
47
+ "term1": [
48
+ {"type": "command", "data": "ls -la", "timestamp": datetime.now(), "purpose": "List files"},
49
+ {
50
+ "type": "stdout",
51
+ "data": "total 24\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 .\n",
52
+ "timestamp": datetime.now(),
53
+ },
54
+ ],
55
+ "term2": [
56
+ {"type": "command", "data": "pwd", "timestamp": datetime.now(), "purpose": "Check directory"},
57
+ {"type": "stdout", "data": "/tmp\n", "timestamp": datetime.now()},
58
+ ],
59
+ }
60
+ terminal_manager._default_terminal_id = "term1"
61
+
62
+ state = TerminalStateSerializer.serialize_to_model(terminal_manager, "workspace-123", "sandbox-456")
63
+
64
+ assert len(state.terminals) == 2
65
+ assert "term1" in state.terminals
66
+ assert "term2" in state.terminals
67
+ assert state.terminals["term1"].last_command == "ls -la"
68
+ assert state.terminals["term2"].cwd == "/tmp"
69
+ assert state.default_terminal_id == "term1"
70
+
71
+ # Check outputs
72
+ assert len(state.outputs["term1"]) == 2
73
+ assert len(state.outputs["term2"]) == 2
74
+ assert state.outputs["term1"][0].type == "command"
75
+ assert state.outputs["term1"][1].type == "stdout"
76
+ assert state.total_output_size > 0
77
+
78
+ def test_serialize_with_size_limits(self):
79
+ """Test that serialization respects size limits."""
80
+ terminal_manager = Mock()
81
+ terminal_manager.terminals = {
82
+ "term1": {
83
+ "created_at": datetime.now(),
84
+ "cwd": "/home/user",
85
+ "env": {},
86
+ "last_command": "cat large_file.txt",
87
+ "last_command_purpose": "View file",
88
+ }
89
+ }
90
+
91
+ # Create large output that exceeds limit
92
+ large_output = "x" * 300000 # 300KB, exceeds 256KB limit
93
+ terminal_manager.outputs = {
94
+ "term1": [
95
+ {"type": "command", "data": "cat large_file.txt", "timestamp": datetime.now(), "purpose": "View file"},
96
+ {"type": "stdout", "data": large_output, "timestamp": datetime.now()},
97
+ ]
98
+ }
99
+
100
+ state = TerminalStateSerializer.serialize_to_model(terminal_manager, "workspace-123", "sandbox-456")
101
+
102
+ # Should have truncation notice
103
+ assert any(output.type == "truncation" for output in state.outputs["term1"])
104
+ # Total size should be under limit
105
+ assert state.total_output_size <= state.MAX_OUTPUT_SIZE
106
+
107
+ def test_restore_from_model(self):
108
+ """Test restoring terminal manager from model."""
109
+ # Create a state model
110
+ state = SandboxTerminalState(
111
+ workspace_id="workspace-123",
112
+ sandbox_id="sandbox-456",
113
+ terminals={
114
+ "term1": TerminalInfo(
115
+ terminal_id="term1",
116
+ created_at=datetime.now(),
117
+ cwd="/home/user/workspace",
118
+ env={"FOO": "bar"},
119
+ last_command="echo hello",
120
+ last_command_purpose="Test echo",
121
+ )
122
+ },
123
+ outputs={
124
+ "term1": [
125
+ TerminalOutput(type="command", data="echo hello", timestamp=datetime.now(), purpose="Test echo"),
126
+ TerminalOutput(type="stdout", data="hello\n", timestamp=datetime.now()),
127
+ ]
128
+ },
129
+ default_terminal_id="term1",
130
+ )
131
+
132
+ # Create mock terminal manager
133
+ terminal_manager = Mock()
134
+ terminal_manager.terminals = {}
135
+ terminal_manager.outputs = {}
136
+ terminal_manager._default_terminal_id = None
137
+
138
+ # Restore
139
+ TerminalStateSerializer.restore_from_model(terminal_manager, state)
140
+
141
+ # Verify restoration
142
+ assert len(terminal_manager.terminals) == 1
143
+ assert "term1" in terminal_manager.terminals
144
+ assert terminal_manager.terminals["term1"]["cwd"] == "/home/user/workspace"
145
+ assert terminal_manager.terminals["term1"]["env"]["FOO"] == "bar"
146
+ assert terminal_manager.terminals["term1"]["last_command"] == "echo hello"
147
+ assert terminal_manager.terminals["term1"]["process"] is None # Can't restore process
148
+
149
+ # Check outputs
150
+ assert len(terminal_manager.outputs["term1"]) == 2
151
+ assert terminal_manager.outputs["term1"][0]["type"] == "command"
152
+ assert terminal_manager.outputs["term1"][1]["data"] == "hello\n"
153
+
154
+ # Check default terminal
155
+ assert terminal_manager._default_terminal_id == "term1"
156
+
157
+ def test_to_frontend_format(self):
158
+ """Test converting state to frontend format."""
159
+ state = SandboxTerminalState(
160
+ workspace_id="workspace-123",
161
+ sandbox_id="sandbox-456",
162
+ terminals={
163
+ "term1": TerminalInfo(
164
+ terminal_id="term1",
165
+ created_at=datetime.now(),
166
+ cwd="/home/user",
167
+ env={},
168
+ last_command="ls",
169
+ last_command_purpose="",
170
+ ),
171
+ "term2": TerminalInfo(
172
+ terminal_id="term2",
173
+ created_at=datetime.now(),
174
+ cwd="/tmp",
175
+ env={},
176
+ last_command="pwd",
177
+ last_command_purpose="",
178
+ ),
179
+ },
180
+ outputs={
181
+ "term1": [
182
+ TerminalOutput(type="command", data="ls", timestamp=datetime.now()),
183
+ TerminalOutput(type="stdout", data="file1.txt\nfile2.txt\n", timestamp=datetime.now()),
184
+ TerminalOutput(
185
+ type="exit", data="Process exited with code 0", timestamp=datetime.now(), exit_code=0
186
+ ),
187
+ ],
188
+ "term2": [
189
+ TerminalOutput(type="command", data="pwd", timestamp=datetime.now()),
190
+ TerminalOutput(type="stdout", data="/tmp", timestamp=datetime.now()),
191
+ ],
192
+ },
193
+ )
194
+
195
+ frontend_data = TerminalStateSerializer.to_frontend_format(state)
196
+
197
+ assert "terminals" in frontend_data
198
+ assert len(frontend_data["terminals"]) == 2
199
+
200
+ # Find terminals by ID
201
+ term1_data = next(t for t in frontend_data["terminals"] if t["id"] == "term1")
202
+ term2_data = next(t for t in frontend_data["terminals"] if t["id"] == "term2")
203
+
204
+ # Check content formatting
205
+ assert "$ ls" in term1_data["content"]
206
+ assert "file1.txt\nfile2.txt" in term1_data["content"]
207
+ assert "Process exited with code 0" in term1_data["content"]
208
+
209
+ assert "$ pwd" in term2_data["content"]
210
+ assert "/tmp" in term2_data["content"]
211
+
212
+ def test_get_recent_outputs(self):
213
+ """Test getting recent outputs with line limit."""
214
+ outputs = []
215
+
216
+ # Add many outputs
217
+ for i in range(20):
218
+ outputs.append({"type": "command", "data": f"echo line{i}", "timestamp": datetime.now()})
219
+ outputs.append({"type": "stdout", "data": f"line{i}\n" * 10, "timestamp": datetime.now()}) # 10 lines each
220
+
221
+ # Get recent outputs with limit
222
+ recent = TerminalStateSerializer.get_recent_outputs(outputs, max_lines=50)
223
+
224
+ # Should have truncated older outputs
225
+ assert len(recent) < len(outputs)
226
+
227
+ # Count total lines
228
+ total_lines = 0
229
+ for output in recent:
230
+ if output["type"] == "command":
231
+ total_lines += 1
232
+ elif output["type"] in ["stdout", "stderr"]:
233
+ total_lines += output["data"].count("\n") + 1
234
+
235
+ # Should be close to limit (may be slightly over due to partial output)
236
+ assert total_lines <= 60 # Some buffer for partial outputs
237
+
238
+ def test_serialize_handles_missing_terminal_manager_attrs(self):
239
+ """Test serialization handles terminal managers without expected attributes."""
240
+ # Terminal manager without 'terminals' attribute (e.g., local terminal manager)
241
+ terminal_manager = Mock(spec=[]) # No attributes
242
+
243
+ state = TerminalStateSerializer.serialize_to_model(terminal_manager, "workspace-123", "sandbox-456")
244
+
245
+ # Should return empty state
246
+ assert state.workspace_id == "workspace-123"
247
+ assert state.sandbox_id == "sandbox-456"
248
+ assert len(state.terminals) == 0
249
+ assert len(state.outputs) == 0
250
+
251
+ def test_restore_handles_none_state(self):
252
+ """Test restore handles None state gracefully."""
253
+ terminal_manager = Mock()
254
+ terminal_manager.terminals = {"existing": {}}
255
+ terminal_manager.outputs = {"existing": []}
256
+
257
+ # Should not raise exception
258
+ TerminalStateSerializer.restore_from_model(terminal_manager, None)
259
+
260
+ # Should not modify terminal manager
261
+ assert "existing" in terminal_manager.terminals
262
+ assert "existing" in terminal_manager.outputs
@@ -0,0 +1,267 @@
1
+ """Tool inventory checks for shared agent classes."""
2
+
3
+ import uuid
4
+ from pathlib import Path
5
+ from unittest.mock import AsyncMock, Mock
6
+
7
+ import pytest
8
+
9
+ from kolega_code.agent.browseragent import BrowserAgent
10
+ from kolega_code.agent.coder import CoderAgent
11
+ from kolega_code.config import AgentConfig
12
+ from kolega_code.events import AgentConnectionManager
13
+ from kolega_code.agent.generalagent import GeneralAgent
14
+ from kolega_code.agent.investigationagent import InvestigationAgent
15
+ from kolega_code.agent.planningagent import PlanningAgent
16
+ from kolega_code.agent.prompt_provider import AgentMode, PromptProvider
17
+
18
+
19
+ @pytest.fixture
20
+ def mock_connection_manager():
21
+ """Create a mock connection manager."""
22
+ manager = Mock(spec=AgentConnectionManager)
23
+ manager.workspace_id = "test_workspace"
24
+ manager.send_message = AsyncMock()
25
+ return manager
26
+
27
+
28
+ @pytest.fixture
29
+ def agent_config():
30
+ """Create a mock agent configuration."""
31
+ config = Mock(spec=AgentConfig)
32
+ config.long_context_config = Mock()
33
+ config.long_context_config.provider = "anthropic"
34
+ config.long_context_config.model = "claude-sonnet-4-5-20250929"
35
+ config.openai_api_key = "test_key"
36
+ config.anthropic_api_key = "test_key"
37
+ config.browser_use_headless = True
38
+ return config
39
+
40
+
41
+ @pytest.fixture
42
+ def project_path(tmp_path):
43
+ """Create a temporary project path."""
44
+ return str(tmp_path)
45
+
46
+
47
+ def hosted_prompt_provider(project_path):
48
+ template_dir = Path(project_path) / "prompt_templates"
49
+ agents_dir = template_dir / "agents"
50
+ agents_dir.mkdir(parents=True, exist_ok=True)
51
+ (agents_dir / "coder_code_mode.j2").write_text("Private hosted test prompt.", encoding="utf-8")
52
+ return PromptProvider(template_dirs=[template_dir])
53
+
54
+
55
+ def test_browser_agent_tools(project_path, mock_connection_manager, agent_config):
56
+ """BrowserAgent exposes only browser tools."""
57
+ agent = BrowserAgent(
58
+ project_path=project_path,
59
+ workspace_id="test_workspace",
60
+ thread_id=str(uuid.uuid4()),
61
+ connection_manager=mock_connection_manager,
62
+ config=agent_config,
63
+ )
64
+
65
+ tools = agent.tool_collection.get_tool_list()
66
+ tool_names = [tool.name for tool in tools]
67
+
68
+ expected_tools = [
69
+ "close_browser",
70
+ "get_browser_console_logs",
71
+ "get_browser_interactive_elements",
72
+ "interact_with_browser",
73
+ "launch_browser",
74
+ "list_browsers",
75
+ "set_browser_select_value",
76
+ "take_browser_screenshot",
77
+ ]
78
+
79
+ assert len(tools) == len(expected_tools)
80
+ assert set(tool_names) == set(expected_tools)
81
+
82
+
83
+ def test_investigation_agent_tools(project_path, mock_connection_manager, agent_config):
84
+ """InvestigationAgent exposes read-only investigation tools."""
85
+ agent = InvestigationAgent(
86
+ project_path=project_path,
87
+ workspace_id="test_workspace",
88
+ thread_id=str(uuid.uuid4()),
89
+ connection_manager=mock_connection_manager,
90
+ config=agent_config,
91
+ )
92
+
93
+ tools = agent.tool_collection.get_tool_list()
94
+ tool_names = [tool.name for tool in tools]
95
+
96
+ expected_tools = [
97
+ "find_files_by_pattern",
98
+ "list_directory",
99
+ "read_entire_file",
100
+ "read_file_section",
101
+ "search_codebase",
102
+ "sleep",
103
+ "think_hard",
104
+ "web_fetch",
105
+ ]
106
+
107
+ assert len(tools) == len(expected_tools)
108
+ assert set(tool_names) == set(expected_tools)
109
+
110
+
111
+ def test_cli_coder_agent_does_not_expose_manifest_build_tools(project_path, mock_connection_manager, agent_config):
112
+ """CLI CoderAgent does not expose platform-only manifest build tools."""
113
+ agent = CoderAgent(
114
+ project_path=project_path,
115
+ workspace_id="test_workspace",
116
+ thread_id=str(uuid.uuid4()),
117
+ connection_manager=mock_connection_manager,
118
+ config=agent_config,
119
+ agent_mode=AgentMode.CLI,
120
+ )
121
+
122
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
123
+
124
+ assert "build_backend" not in tool_names
125
+ assert "build_frontend" not in tool_names
126
+
127
+
128
+ def test_non_cli_coder_agent_keeps_manifest_build_tools(project_path, mock_connection_manager, agent_config):
129
+ """Non-CLI CoderAgent keeps manifest build tools for platform use."""
130
+ agent = CoderAgent(
131
+ project_path=project_path,
132
+ workspace_id="test_workspace",
133
+ thread_id=str(uuid.uuid4()),
134
+ connection_manager=mock_connection_manager,
135
+ config=agent_config,
136
+ agent_mode=AgentMode.CODE,
137
+ prompt_provider=hosted_prompt_provider(project_path),
138
+ )
139
+
140
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
141
+
142
+ assert "build_backend" in tool_names
143
+ assert "build_frontend" in tool_names
144
+
145
+
146
+ def test_coder_agent_exposes_dispatch_general_agent(project_path, mock_connection_manager, agent_config):
147
+ """CoderAgent can dispatch general sub-agents but still not coding agents."""
148
+ agent = CoderAgent(
149
+ project_path=project_path,
150
+ workspace_id="test_workspace",
151
+ thread_id=str(uuid.uuid4()),
152
+ connection_manager=mock_connection_manager,
153
+ config=agent_config,
154
+ agent_mode=AgentMode.CLI,
155
+ )
156
+
157
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
158
+
159
+ assert "dispatch_general_agent" in tool_names
160
+ assert "dispatch_investigation_agent" in tool_names
161
+ assert "dispatch_coding_agent" not in tool_names
162
+
163
+
164
+ def test_sub_agent_coder_cannot_dispatch_general_agent(project_path, mock_connection_manager, agent_config):
165
+ """A dispatched CoderAgent must not fan out into further sub-agents."""
166
+ agent = CoderAgent(
167
+ project_path=project_path,
168
+ workspace_id="test_workspace",
169
+ thread_id=str(uuid.uuid4()),
170
+ connection_manager=mock_connection_manager,
171
+ config=agent_config,
172
+ agent_mode=AgentMode.CLI,
173
+ sub_agent=True,
174
+ )
175
+
176
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
177
+
178
+ assert "dispatch_general_agent" not in tool_names
179
+
180
+
181
+ def test_general_agent_tool_inventory(project_path, mock_connection_manager, agent_config):
182
+ """GeneralAgent has the full toolset but cannot dispatch sub-agents."""
183
+ agent = GeneralAgent(
184
+ project_path=project_path,
185
+ workspace_id="test_workspace",
186
+ thread_id=str(uuid.uuid4()),
187
+ connection_manager=mock_connection_manager,
188
+ config=agent_config,
189
+ )
190
+
191
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
192
+
193
+ # Full read/write/terminal access
194
+ assert "read_entire_file" in tool_names
195
+ assert "search_codebase" in tool_names
196
+ assert "create_file" in tool_names
197
+ assert "replace_entire_file" in tool_names
198
+ assert "run_command_tracked" in tool_names
199
+ # Recursion guard: no dispatch tools at all
200
+ assert not any(name.startswith("dispatch_") for name in tool_names)
201
+
202
+
203
+ def test_cli_general_agent_excludes_manifest_build_tools(project_path, mock_connection_manager, agent_config):
204
+ """GeneralAgent inherits the CLI-mode exclusion of platform build tools."""
205
+ agent = GeneralAgent(
206
+ project_path=project_path,
207
+ workspace_id="test_workspace",
208
+ thread_id=str(uuid.uuid4()),
209
+ connection_manager=mock_connection_manager,
210
+ config=agent_config,
211
+ agent_mode=AgentMode.CLI,
212
+ )
213
+
214
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
215
+
216
+ assert "build_backend" not in tool_names
217
+ assert "build_frontend" not in tool_names
218
+
219
+
220
+ def test_planning_agent_exposes_read_only_and_planning_tools(project_path, mock_connection_manager, agent_config):
221
+ """PlanningAgent cannot edit files and can capture a final plan."""
222
+ agent = PlanningAgent(
223
+ project_path=project_path,
224
+ workspace_id="test_workspace",
225
+ thread_id=str(uuid.uuid4()),
226
+ connection_manager=mock_connection_manager,
227
+ config=agent_config,
228
+ agent_mode=AgentMode.CLI,
229
+ )
230
+
231
+ tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
232
+ expected_planning_tools = {"write_plan"}
233
+
234
+ assert expected_planning_tools.issubset(tool_names)
235
+ assert "get_task_list" not in tool_names
236
+ assert "update_task_list" not in tool_names
237
+ assert "create_file" not in tool_names
238
+ assert "replace_entire_file" not in tool_names
239
+ assert "apply_patch" not in tool_names
240
+ assert "run_command_tracked" not in tool_names
241
+ assert tool_names - expected_planning_tools <= set(agent.tool_collection.read_only_tools)
242
+
243
+
244
+ def test_shared_tool_names_are_well_formed(project_path, mock_connection_manager, agent_config):
245
+ """Shared agent tool definitions have valid names and descriptions."""
246
+ agents = [
247
+ BrowserAgent(
248
+ project_path=project_path,
249
+ workspace_id="test_workspace",
250
+ thread_id=str(uuid.uuid4()),
251
+ connection_manager=mock_connection_manager,
252
+ config=agent_config,
253
+ ),
254
+ InvestigationAgent(
255
+ project_path=project_path,
256
+ workspace_id="test_workspace",
257
+ thread_id=str(uuid.uuid4()),
258
+ connection_manager=mock_connection_manager,
259
+ config=agent_config,
260
+ ),
261
+ ]
262
+
263
+ for agent in agents:
264
+ for tool in agent.tool_collection.get_tool_list():
265
+ assert tool.name.replace("_", "").isalnum()
266
+ assert tool.name.islower() or tool.name.replace("_", "").isalnum()
267
+ assert tool.description