kolega-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolega_code/__init__.py +151 -0
- kolega_code/agent/__init__.py +42 -0
- kolega_code/agent/baseagent.py +998 -0
- kolega_code/agent/browseragent.py +123 -0
- kolega_code/agent/coder.py +157 -0
- kolega_code/agent/common.py +41 -0
- kolega_code/agent/compression.py +81 -0
- kolega_code/agent/context.py +112 -0
- kolega_code/agent/conversation.py +408 -0
- kolega_code/agent/generalagent.py +146 -0
- kolega_code/agent/investigationagent.py +123 -0
- kolega_code/agent/planningagent.py +187 -0
- kolega_code/agent/prompt_provider.py +196 -0
- kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
- kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
- kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
- kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
- kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
- kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
- kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
- kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
- kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
- kolega_code/agent/prompts.py +192 -0
- kolega_code/agent/tests/__init__.py +0 -0
- kolega_code/agent/tests/llm/__init__.py +0 -0
- kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
- kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
- kolega_code/agent/tests/llm/test_client.py +773 -0
- kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
- kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
- kolega_code/agent/tests/llm/test_exceptions.py +249 -0
- kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
- kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
- kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
- kolega_code/agent/tests/llm/test_model_specs.py +17 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
- kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
- kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
- kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
- kolega_code/agent/tests/services/__init__.py +1 -0
- kolega_code/agent/tests/services/test_browser.py +447 -0
- kolega_code/agent/tests/services/test_browser_parity.py +353 -0
- kolega_code/agent/tests/services/test_file_system.py +699 -0
- kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
- kolega_code/agent/tests/services/test_terminal.py +154 -0
- kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
- kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
- kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
- kolega_code/agent/tests/test_base_agent.py +1942 -0
- kolega_code/agent/tests/test_coder_attachments.py +330 -0
- kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
- kolega_code/agent/tests/test_commands.py +179 -0
- kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
- kolega_code/agent/tests/test_empty_message_handling.py +48 -0
- kolega_code/agent/tests/test_general_agent.py +242 -0
- kolega_code/agent/tests/test_html.py +320 -0
- kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
- kolega_code/agent/tests/test_planning_agent.py +227 -0
- kolega_code/agent/tests/test_prompt_provider.py +271 -0
- kolega_code/agent/tests/test_tool_registry.py +102 -0
- kolega_code/agent/tests/test_tools.py +549 -0
- kolega_code/agent/tests/tool_backend/__init__.py +0 -0
- kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
- kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
- kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
- kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
- kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
- kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
- kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
- kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
- kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
- kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
- kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
- kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
- kolega_code/agent/tool_backend/agent_tool.py +414 -0
- kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
- kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
- kolega_code/agent/tool_backend/base_tool.py +217 -0
- kolega_code/agent/tool_backend/browser_tool.py +271 -0
- kolega_code/agent/tool_backend/build_tool.py +93 -0
- kolega_code/agent/tool_backend/create_file_tool.py +52 -0
- kolega_code/agent/tool_backend/glob_tool.py +323 -0
- kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
- kolega_code/agent/tool_backend/memory_tool.py +79 -0
- kolega_code/agent/tool_backend/read_file_tool.py +119 -0
- kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
- kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
- kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
- kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
- kolega_code/agent/tool_backend/streaming_tool.py +47 -0
- kolega_code/agent/tool_backend/terminal_tool.py +643 -0
- kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
- kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
- kolega_code/agent/tools.py +1704 -0
- kolega_code/agent/utils/commands.py +94 -0
- kolega_code/cli/__init__.py +1 -0
- kolega_code/cli/app.py +2756 -0
- kolega_code/cli/config.py +280 -0
- kolega_code/cli/connection.py +49 -0
- kolega_code/cli/file_index.py +147 -0
- kolega_code/cli/main.py +564 -0
- kolega_code/cli/mentions.py +155 -0
- kolega_code/cli/messages.py +89 -0
- kolega_code/cli/provider_registry.py +96 -0
- kolega_code/cli/session_store.py +207 -0
- kolega_code/cli/settings.py +87 -0
- kolega_code/cli/skills.py +409 -0
- kolega_code/cli/slash_commands.py +108 -0
- kolega_code/cli/tests/__init__.py +1 -0
- kolega_code/cli/tests/test_app.py +4251 -0
- kolega_code/cli/tests/test_cli_config.py +171 -0
- kolega_code/cli/tests/test_connection.py +26 -0
- kolega_code/cli/tests/test_file_index.py +103 -0
- kolega_code/cli/tests/test_main.py +455 -0
- kolega_code/cli/tests/test_mentions.py +108 -0
- kolega_code/cli/tests/test_session_store.py +67 -0
- kolega_code/cli/tests/test_settings.py +62 -0
- kolega_code/cli/tests/test_skills.py +157 -0
- kolega_code/cli/tests/test_slash_commands.py +88 -0
- kolega_code/cli/theme.py +180 -0
- kolega_code/config.py +154 -0
- kolega_code/events.py +202 -0
- kolega_code/llm/client.py +300 -0
- kolega_code/llm/exceptions.py +285 -0
- kolega_code/llm/instrumented_client.py +520 -0
- kolega_code/llm/models.py +1368 -0
- kolega_code/llm/providers/__init__.py +0 -0
- kolega_code/llm/providers/anthropic.py +387 -0
- kolega_code/llm/providers/base.py +71 -0
- kolega_code/llm/providers/google.py +157 -0
- kolega_code/llm/providers/models.py +37 -0
- kolega_code/llm/providers/openai.py +363 -0
- kolega_code/llm/ratelimit.py +40 -0
- kolega_code/llm/specs.py +67 -0
- kolega_code/llm/tool_execution_ids.py +18 -0
- kolega_code/models/__init__.py +9 -0
- kolega_code/models/sandbox_terminal_state.py +47 -0
- kolega_code/runtime.py +50 -0
- kolega_code/sandbox/README.md +200 -0
- kolega_code/sandbox/__init__.py +21 -0
- kolega_code/sandbox/async_filesystem.py +475 -0
- kolega_code/sandbox/base.py +297 -0
- kolega_code/sandbox/browser.py +25 -0
- kolega_code/sandbox/event_loop.py +43 -0
- kolega_code/sandbox/filesystem.py +341 -0
- kolega_code/sandbox/local.py +118 -0
- kolega_code/sandbox/serializer.py +175 -0
- kolega_code/sandbox/terminal.py +868 -0
- kolega_code/sandbox/utils.py +216 -0
- kolega_code/services/base.py +255 -0
- kolega_code/services/browser.py +444 -0
- kolega_code/services/file_system.py +749 -0
- kolega_code/services/html.py +221 -0
- kolega_code/services/terminal.py +903 -0
- kolega_code/tools/__init__.py +22 -0
- kolega_code/tools/core.py +33 -0
- kolega_code/tools/definitions.py +81 -0
- kolega_code/tools/registry.py +73 -0
- kolega_code-0.1.0.dist-info/METADATA +157 -0
- kolega_code-0.1.0.dist-info/RECORD +171 -0
- kolega_code-0.1.0.dist-info/WHEEL +4 -0
- kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
- kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Tests for parallel tool-call execution and task-local tool-call IDs."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
from types import SimpleNamespace
|
|
7
|
+
from unittest.mock import AsyncMock
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
12
|
+
from kolega_code.agent.baseagent import BaseAgent
|
|
13
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
14
|
+
from kolega_code.events import AgentConnectionManager
|
|
15
|
+
from kolega_code.llm.models import ToolCall
|
|
16
|
+
|
|
17
|
+
# Load environment variables
|
|
18
|
+
load_dotenv()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def agent_config():
|
|
23
|
+
return AgentConfig(
|
|
24
|
+
anthropic_api_key=os.getenv("ANTHROPIC_API_KEY", "test_key"),
|
|
25
|
+
openai_api_key="test-key",
|
|
26
|
+
long_context_config=ModelConfig(
|
|
27
|
+
provider=ModelProvider.ANTHROPIC,
|
|
28
|
+
model="claude-haiku-4-5-20251001",
|
|
29
|
+
rate_limits=RateLimitConfig(),
|
|
30
|
+
),
|
|
31
|
+
fast_config=ModelConfig(
|
|
32
|
+
provider=ModelProvider.ANTHROPIC,
|
|
33
|
+
model="claude-haiku-4-5-20251001",
|
|
34
|
+
rate_limits=RateLimitConfig(),
|
|
35
|
+
),
|
|
36
|
+
thinking_config=ModelConfig(
|
|
37
|
+
provider=ModelProvider.ANTHROPIC,
|
|
38
|
+
model="claude-haiku-4-5-20251001",
|
|
39
|
+
rate_limits=RateLimitConfig(),
|
|
40
|
+
thinking_tokens=1024,
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.fixture
|
|
46
|
+
def mock_connection_manager():
|
|
47
|
+
return AsyncMock(spec=AgentConnectionManager)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def base_agent(tmp_path, mock_connection_manager, agent_config):
|
|
52
|
+
agent = BaseAgent(
|
|
53
|
+
project_path=tmp_path,
|
|
54
|
+
workspace_id="test_workspace",
|
|
55
|
+
thread_id=str(uuid.uuid4()),
|
|
56
|
+
connection_manager=mock_connection_manager,
|
|
57
|
+
config=agent_config,
|
|
58
|
+
)
|
|
59
|
+
agent.send_chat_message = AsyncMock()
|
|
60
|
+
agent.log_info = AsyncMock()
|
|
61
|
+
agent.log_error = AsyncMock()
|
|
62
|
+
return agent
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def make_tool_call(name: str, index: int) -> ToolCall:
|
|
66
|
+
return ToolCall(
|
|
67
|
+
id=f"{name}_{index}",
|
|
68
|
+
name=name,
|
|
69
|
+
input={"task": f"task {index}"},
|
|
70
|
+
execution_id=f"exec_{name}_{index}",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ConcurrencyTracker:
|
|
75
|
+
"""Tracks how many fake tool executions overlap."""
|
|
76
|
+
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self.active = 0
|
|
79
|
+
self.max_active = 0
|
|
80
|
+
|
|
81
|
+
async def run(self, duration: float = 0.01):
|
|
82
|
+
self.active += 1
|
|
83
|
+
self.max_active = max(self.max_active, self.active)
|
|
84
|
+
await asyncio.sleep(duration)
|
|
85
|
+
self.active -= 1
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class FakeToolCollection:
|
|
89
|
+
"""Test stand-in for ToolCollection: builds a registry from get_tool_list + methods."""
|
|
90
|
+
|
|
91
|
+
def registry(self):
|
|
92
|
+
from kolega_code.agent.tools import ToolCollection
|
|
93
|
+
from kolega_code.llm.models import ToolDefinition
|
|
94
|
+
from kolega_code.tools import Tool, ToolRegistry
|
|
95
|
+
|
|
96
|
+
parallel = set(ToolCollection.read_only_tools) | set(ToolCollection.agent_dispatch_tools)
|
|
97
|
+
registry = ToolRegistry()
|
|
98
|
+
for spec in self.get_tool_list():
|
|
99
|
+
registry.add(
|
|
100
|
+
Tool(
|
|
101
|
+
name=spec.name,
|
|
102
|
+
definition=ToolDefinition(name=spec.name, description="", parameters=[]),
|
|
103
|
+
handler=getattr(self, spec.name),
|
|
104
|
+
parallel_safe=spec.name in parallel,
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
return registry
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class TestParallelToolCalls:
|
|
111
|
+
@pytest.mark.asyncio
|
|
112
|
+
async def test_dispatch_tools_run_concurrently(self, base_agent):
|
|
113
|
+
first_started = asyncio.Event()
|
|
114
|
+
second_started = asyncio.Event()
|
|
115
|
+
|
|
116
|
+
class Tools(FakeToolCollection):
|
|
117
|
+
def __init__(self):
|
|
118
|
+
self.calls = 0
|
|
119
|
+
|
|
120
|
+
def get_tool_list(self):
|
|
121
|
+
return [SimpleNamespace(name="dispatch_general_agent")]
|
|
122
|
+
|
|
123
|
+
async def dispatch_general_agent(self, task: str):
|
|
124
|
+
if "task 0" in task:
|
|
125
|
+
first_started.set()
|
|
126
|
+
# Deadlocks unless the second call runs concurrently
|
|
127
|
+
await asyncio.wait_for(second_started.wait(), timeout=2)
|
|
128
|
+
return "first done"
|
|
129
|
+
second_started.set()
|
|
130
|
+
await asyncio.wait_for(first_started.wait(), timeout=2)
|
|
131
|
+
return "second done"
|
|
132
|
+
|
|
133
|
+
base_agent.tool_collection = Tools()
|
|
134
|
+
blocks = [make_tool_call("dispatch_general_agent", i) for i in range(2)]
|
|
135
|
+
|
|
136
|
+
results = await asyncio.wait_for(base_agent.process_tool_calls(blocks), timeout=5)
|
|
137
|
+
|
|
138
|
+
assert [r.content for r in results] == ["first done", "second done"]
|
|
139
|
+
assert [r.tool_use_id for r in results] == ["dispatch_general_agent_0", "dispatch_general_agent_1"]
|
|
140
|
+
assert not any(r.is_error for r in results)
|
|
141
|
+
|
|
142
|
+
@pytest.mark.asyncio
|
|
143
|
+
async def test_mixed_read_only_and_dispatch_parallelize(self, base_agent):
|
|
144
|
+
tracker = ConcurrencyTracker()
|
|
145
|
+
|
|
146
|
+
class Tools(FakeToolCollection):
|
|
147
|
+
def get_tool_list(self):
|
|
148
|
+
return [
|
|
149
|
+
SimpleNamespace(name="dispatch_general_agent"),
|
|
150
|
+
SimpleNamespace(name="read_entire_file"),
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
async def dispatch_general_agent(self, task: str):
|
|
154
|
+
await tracker.run()
|
|
155
|
+
return "dispatched"
|
|
156
|
+
|
|
157
|
+
async def read_entire_file(self, task: str):
|
|
158
|
+
await tracker.run()
|
|
159
|
+
return "file contents"
|
|
160
|
+
|
|
161
|
+
base_agent.tool_collection = Tools()
|
|
162
|
+
blocks = [
|
|
163
|
+
make_tool_call("read_entire_file", 0),
|
|
164
|
+
make_tool_call("dispatch_general_agent", 1),
|
|
165
|
+
make_tool_call("read_entire_file", 2),
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
results = await base_agent.process_tool_calls(blocks)
|
|
169
|
+
|
|
170
|
+
assert tracker.max_active > 1
|
|
171
|
+
assert len(results) == 3
|
|
172
|
+
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
async def test_write_tool_forces_sequential(self, base_agent):
|
|
175
|
+
tracker = ConcurrencyTracker()
|
|
176
|
+
|
|
177
|
+
class Tools(FakeToolCollection):
|
|
178
|
+
def get_tool_list(self):
|
|
179
|
+
return [
|
|
180
|
+
SimpleNamespace(name="dispatch_general_agent"),
|
|
181
|
+
SimpleNamespace(name="create_file"),
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
async def dispatch_general_agent(self, task: str):
|
|
185
|
+
await tracker.run()
|
|
186
|
+
return "dispatched"
|
|
187
|
+
|
|
188
|
+
async def create_file(self, task: str):
|
|
189
|
+
await tracker.run()
|
|
190
|
+
return "created"
|
|
191
|
+
|
|
192
|
+
base_agent.tool_collection = Tools()
|
|
193
|
+
blocks = [
|
|
194
|
+
make_tool_call("dispatch_general_agent", 0),
|
|
195
|
+
make_tool_call("create_file", 1),
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
results = await base_agent.process_tool_calls(blocks)
|
|
199
|
+
|
|
200
|
+
assert tracker.max_active == 1
|
|
201
|
+
assert len(results) == 2
|
|
202
|
+
|
|
203
|
+
@pytest.mark.asyncio
|
|
204
|
+
async def test_semaphore_caps_concurrency(self, base_agent):
|
|
205
|
+
tracker = ConcurrencyTracker()
|
|
206
|
+
total = BaseAgent.PARALLEL_TOOL_LIMIT + 4
|
|
207
|
+
|
|
208
|
+
class Tools(FakeToolCollection):
|
|
209
|
+
def get_tool_list(self):
|
|
210
|
+
return [SimpleNamespace(name="dispatch_general_agent")]
|
|
211
|
+
|
|
212
|
+
async def dispatch_general_agent(self, task: str):
|
|
213
|
+
await tracker.run()
|
|
214
|
+
return "done"
|
|
215
|
+
|
|
216
|
+
base_agent.tool_collection = Tools()
|
|
217
|
+
blocks = [make_tool_call("dispatch_general_agent", i) for i in range(total)]
|
|
218
|
+
|
|
219
|
+
results = await base_agent.process_tool_calls(blocks)
|
|
220
|
+
|
|
221
|
+
assert len(results) == total
|
|
222
|
+
assert tracker.max_active > 1
|
|
223
|
+
assert tracker.max_active <= BaseAgent.PARALLEL_TOOL_LIMIT
|
|
224
|
+
|
|
225
|
+
@pytest.mark.asyncio
|
|
226
|
+
async def test_contextvar_isolation_under_gather(self, base_agent):
|
|
227
|
+
captured: dict[str, str] = {}
|
|
228
|
+
|
|
229
|
+
class Tools(FakeToolCollection):
|
|
230
|
+
def get_tool_list(self):
|
|
231
|
+
return [SimpleNamespace(name="dispatch_general_agent")]
|
|
232
|
+
|
|
233
|
+
async def dispatch_general_agent(self, task: str):
|
|
234
|
+
# Yield so concurrent executions interleave before reading the ID
|
|
235
|
+
await asyncio.sleep(0.01)
|
|
236
|
+
captured[task] = base_agent.current_tool_execution_id
|
|
237
|
+
return "done"
|
|
238
|
+
|
|
239
|
+
base_agent.tool_collection = Tools()
|
|
240
|
+
blocks = [make_tool_call("dispatch_general_agent", i) for i in range(3)]
|
|
241
|
+
|
|
242
|
+
await base_agent.process_tool_calls(blocks)
|
|
243
|
+
|
|
244
|
+
assert captured == {
|
|
245
|
+
"task 0": "exec_dispatch_general_agent_0",
|
|
246
|
+
"task 1": "exec_dispatch_general_agent_1",
|
|
247
|
+
"task 2": "exec_dispatch_general_agent_2",
|
|
248
|
+
}
|
|
249
|
+
assert base_agent.current_tool_call_id is None
|
|
250
|
+
assert base_agent.current_tool_execution_id is None
|
|
251
|
+
assert base_agent.current_provider_tool_call_id is None
|
|
252
|
+
|
|
253
|
+
@pytest.mark.asyncio
|
|
254
|
+
async def test_nested_agent_does_not_clobber_parent_ids(
|
|
255
|
+
self, tmp_path, mock_connection_manager, agent_config, base_agent
|
|
256
|
+
):
|
|
257
|
+
nested_agent = BaseAgent(
|
|
258
|
+
project_path=tmp_path,
|
|
259
|
+
workspace_id="test_workspace",
|
|
260
|
+
thread_id=str(uuid.uuid4()),
|
|
261
|
+
connection_manager=mock_connection_manager,
|
|
262
|
+
config=agent_config,
|
|
263
|
+
)
|
|
264
|
+
nested_agent.send_chat_message = AsyncMock()
|
|
265
|
+
nested_agent.log_info = AsyncMock()
|
|
266
|
+
|
|
267
|
+
class NestedTools(FakeToolCollection):
|
|
268
|
+
def get_tool_list(self):
|
|
269
|
+
return [SimpleNamespace(name="read_entire_file")]
|
|
270
|
+
|
|
271
|
+
async def read_entire_file(self, task: str):
|
|
272
|
+
return "nested contents"
|
|
273
|
+
|
|
274
|
+
nested_agent.tool_collection = NestedTools()
|
|
275
|
+
observed = {}
|
|
276
|
+
|
|
277
|
+
class ParentTools(FakeToolCollection):
|
|
278
|
+
def get_tool_list(self):
|
|
279
|
+
return [SimpleNamespace(name="dispatch_general_agent")]
|
|
280
|
+
|
|
281
|
+
async def dispatch_general_agent(self, task: str):
|
|
282
|
+
# Simulate a sub-agent running its own tool within the same asyncio task
|
|
283
|
+
await nested_agent.execute_single_tool(make_tool_call("read_entire_file", 99))
|
|
284
|
+
observed["parent_id"] = base_agent.current_tool_execution_id
|
|
285
|
+
return "done"
|
|
286
|
+
|
|
287
|
+
base_agent.tool_collection = ParentTools()
|
|
288
|
+
|
|
289
|
+
await base_agent.process_tool_calls([make_tool_call("dispatch_general_agent", 0)])
|
|
290
|
+
|
|
291
|
+
assert observed["parent_id"] == "exec_dispatch_general_agent_0"
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import uuid
|
|
3
|
+
from unittest.mock import AsyncMock, Mock
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from kolega_code.agent.baseagent import BaseAgent
|
|
8
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
9
|
+
from kolega_code.events import AgentConnectionManager
|
|
10
|
+
from kolega_code.llm.models import Message, TextBlock, ToolCall
|
|
11
|
+
from kolega_code.llm.providers.models import TokenCount
|
|
12
|
+
from kolega_code.agent.planningagent import PlanningAgent
|
|
13
|
+
from kolega_code.agent.prompt_provider import AgentMode
|
|
14
|
+
from kolega_code.agent.tools import ToolExtension
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _deepseek_config() -> AgentConfig:
|
|
18
|
+
model_config = ModelConfig(
|
|
19
|
+
provider=ModelProvider.DEEPSEEK,
|
|
20
|
+
model="deepseek-v4-pro",
|
|
21
|
+
rate_limits=RateLimitConfig(),
|
|
22
|
+
)
|
|
23
|
+
return AgentConfig(
|
|
24
|
+
deepseek_api_key="test-key",
|
|
25
|
+
long_context_config=model_config,
|
|
26
|
+
fast_config=model_config,
|
|
27
|
+
edit_model_config=model_config,
|
|
28
|
+
thinking_config=model_config,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _image_attachment() -> dict:
|
|
33
|
+
return {
|
|
34
|
+
"type": "image",
|
|
35
|
+
"media_type": "image/png",
|
|
36
|
+
"data": base64.b64encode(b"fake-image-data").decode("utf-8"),
|
|
37
|
+
"filename": "test-image.png",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class _EmptyStream:
|
|
42
|
+
async def __aenter__(self):
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
def __aiter__(self):
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
async def __anext__(self):
|
|
52
|
+
raise StopAsyncIteration
|
|
53
|
+
|
|
54
|
+
async def get_final_message(self):
|
|
55
|
+
return Message("assistant", [TextBlock("done")], stop_reason="end_turn")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.fixture
|
|
59
|
+
def mock_connection_manager():
|
|
60
|
+
manager = Mock(spec=AgentConnectionManager)
|
|
61
|
+
manager.workspace_id = "test_workspace"
|
|
62
|
+
manager.send_message = AsyncMock()
|
|
63
|
+
return manager
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.fixture
|
|
67
|
+
def agent_config():
|
|
68
|
+
config = Mock(spec=AgentConfig)
|
|
69
|
+
config.long_context_config = Mock()
|
|
70
|
+
config.long_context_config.provider = "anthropic"
|
|
71
|
+
config.long_context_config.model = "claude-sonnet-4-5-20250929"
|
|
72
|
+
config.long_context_config.thinking_tokens = None
|
|
73
|
+
config.openai_api_key = "test_key"
|
|
74
|
+
config.anthropic_api_key = "test_key"
|
|
75
|
+
config.browser_use_headless = True
|
|
76
|
+
return config
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_planning_agent_uses_host_task_list_extension(tmp_path, mock_connection_manager, agent_config):
|
|
81
|
+
task_list = ""
|
|
82
|
+
|
|
83
|
+
async def update_task_list(task_list_markdown: str) -> str:
|
|
84
|
+
nonlocal task_list
|
|
85
|
+
task_list = task_list_markdown.strip()
|
|
86
|
+
return "Task list updated."
|
|
87
|
+
|
|
88
|
+
async def get_task_list() -> str:
|
|
89
|
+
return task_list or "No task list has been set."
|
|
90
|
+
|
|
91
|
+
agent = PlanningAgent(
|
|
92
|
+
project_path=tmp_path,
|
|
93
|
+
workspace_id="test_workspace",
|
|
94
|
+
thread_id=str(uuid.uuid4()),
|
|
95
|
+
connection_manager=mock_connection_manager,
|
|
96
|
+
config=agent_config,
|
|
97
|
+
agent_mode=AgentMode.CLI,
|
|
98
|
+
tool_extensions=[
|
|
99
|
+
ToolExtension(
|
|
100
|
+
name="host-task-list",
|
|
101
|
+
tools={"update_task_list": update_task_list, "get_task_list": get_task_list},
|
|
102
|
+
tool_groups={"planning_tools": ["update_task_list", "get_task_list"]},
|
|
103
|
+
)
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
result = await agent.tool_collection.update_task_list("- [ ] inspect CLI\n- [x] choose tool shape")
|
|
108
|
+
tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
|
|
109
|
+
|
|
110
|
+
assert result == "Task list updated."
|
|
111
|
+
assert task_list == "- [ ] inspect CLI\n- [x] choose tool shape"
|
|
112
|
+
assert await agent.tool_collection.get_task_list() == task_list
|
|
113
|
+
assert {"write_plan", "get_task_list", "update_task_list"}.issubset(tool_names)
|
|
114
|
+
assert "replace_entire_file" not in tool_names
|
|
115
|
+
assert "create_file" not in tool_names
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_planning_agent_only_exposes_write_plan_without_host_task_tools(
|
|
119
|
+
tmp_path, mock_connection_manager, agent_config
|
|
120
|
+
):
|
|
121
|
+
agent = PlanningAgent(
|
|
122
|
+
project_path=tmp_path,
|
|
123
|
+
workspace_id="test_workspace",
|
|
124
|
+
thread_id=str(uuid.uuid4()),
|
|
125
|
+
connection_manager=mock_connection_manager,
|
|
126
|
+
config=agent_config,
|
|
127
|
+
agent_mode=AgentMode.CLI,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
tool_names = {tool.name for tool in agent.tool_collection.get_tool_list()}
|
|
131
|
+
|
|
132
|
+
assert "write_plan" in tool_names
|
|
133
|
+
assert "update_task_list" not in tool_names
|
|
134
|
+
assert "get_task_list" not in tool_names
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@pytest.mark.asyncio
|
|
138
|
+
async def test_planning_agent_rejects_unavailable_file_edit_tool(
|
|
139
|
+
tmp_path, mock_connection_manager, agent_config
|
|
140
|
+
):
|
|
141
|
+
target = tmp_path / "notes.txt"
|
|
142
|
+
target.write_text("original\n", encoding="utf-8")
|
|
143
|
+
agent = PlanningAgent(
|
|
144
|
+
project_path=tmp_path,
|
|
145
|
+
workspace_id="test_workspace",
|
|
146
|
+
thread_id=str(uuid.uuid4()),
|
|
147
|
+
connection_manager=mock_connection_manager,
|
|
148
|
+
config=agent_config,
|
|
149
|
+
agent_mode=AgentMode.CLI,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
result = await agent.execute_single_tool(
|
|
153
|
+
ToolCall(
|
|
154
|
+
id="tool-call-1",
|
|
155
|
+
name="replace_entire_file",
|
|
156
|
+
input={"relative_path": "notes.txt", "content": "mutated\n"},
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
assert result.is_error is True
|
|
161
|
+
assert result.content == "Tool 'replace_entire_file' is not available in this mode."
|
|
162
|
+
assert target.read_text(encoding="utf-8") == "original\n"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@pytest.mark.asyncio
|
|
166
|
+
async def test_planning_agent_write_plan_is_consumable(tmp_path, mock_connection_manager, agent_config):
|
|
167
|
+
agent = PlanningAgent(
|
|
168
|
+
project_path=tmp_path,
|
|
169
|
+
workspace_id="test_workspace",
|
|
170
|
+
thread_id=str(uuid.uuid4()),
|
|
171
|
+
connection_manager=mock_connection_manager,
|
|
172
|
+
config=agent_config,
|
|
173
|
+
agent_mode=AgentMode.CLI,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
result = await agent.tool_collection.write_plan("# Plan\n\nImplement planning mode.")
|
|
177
|
+
|
|
178
|
+
assert result == "Plan captured."
|
|
179
|
+
assert agent.consume_completed_plan() == "# Plan\n\nImplement planning mode."
|
|
180
|
+
assert agent.consume_completed_plan() is None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@pytest.mark.asyncio
|
|
184
|
+
async def test_planning_agent_rejects_deepseek_image_without_llm_call(tmp_path, mock_connection_manager):
|
|
185
|
+
agent = PlanningAgent(
|
|
186
|
+
project_path=tmp_path,
|
|
187
|
+
workspace_id="test_workspace",
|
|
188
|
+
thread_id=str(uuid.uuid4()),
|
|
189
|
+
connection_manager=mock_connection_manager,
|
|
190
|
+
config=_deepseek_config(),
|
|
191
|
+
agent_mode=AgentMode.CLI,
|
|
192
|
+
)
|
|
193
|
+
agent.llm = Mock()
|
|
194
|
+
|
|
195
|
+
chunks = [
|
|
196
|
+
chunk
|
|
197
|
+
async for chunk in agent.process_message_stream("Plan from this screenshot", [_image_attachment()])
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
assert len(chunks) == 1
|
|
201
|
+
assert chunks[0]["type"] == "response"
|
|
202
|
+
assert chunks[0]["content"] == BaseAgent.deepseek_image_unsupported_message
|
|
203
|
+
assert chunks[0]["complete"] is True
|
|
204
|
+
assert agent.history == []
|
|
205
|
+
agent.llm.stream.assert_not_called()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@pytest.mark.asyncio
|
|
209
|
+
async def test_planning_agent_does_not_print_context_token_counts(
|
|
210
|
+
tmp_path, mock_connection_manager, capsys
|
|
211
|
+
):
|
|
212
|
+
agent = PlanningAgent(
|
|
213
|
+
project_path=tmp_path,
|
|
214
|
+
workspace_id="test_workspace",
|
|
215
|
+
thread_id=str(uuid.uuid4()),
|
|
216
|
+
connection_manager=mock_connection_manager,
|
|
217
|
+
config=_deepseek_config(),
|
|
218
|
+
agent_mode=AgentMode.CLI,
|
|
219
|
+
)
|
|
220
|
+
agent.count_current_context = AsyncMock(return_value=TokenCount(input_tokens=42))
|
|
221
|
+
agent.llm = Mock()
|
|
222
|
+
agent.llm.stream = AsyncMock(return_value=_EmptyStream())
|
|
223
|
+
|
|
224
|
+
chunks = [chunk async for chunk in agent.process_message_stream("hello")]
|
|
225
|
+
|
|
226
|
+
assert chunks[-1]["complete"] is True
|
|
227
|
+
assert capsys.readouterr().out == ""
|