kolega-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolega_code/__init__.py +151 -0
- kolega_code/agent/__init__.py +42 -0
- kolega_code/agent/baseagent.py +998 -0
- kolega_code/agent/browseragent.py +123 -0
- kolega_code/agent/coder.py +157 -0
- kolega_code/agent/common.py +41 -0
- kolega_code/agent/compression.py +81 -0
- kolega_code/agent/context.py +112 -0
- kolega_code/agent/conversation.py +408 -0
- kolega_code/agent/generalagent.py +146 -0
- kolega_code/agent/investigationagent.py +123 -0
- kolega_code/agent/planningagent.py +187 -0
- kolega_code/agent/prompt_provider.py +196 -0
- kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
- kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
- kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
- kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
- kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
- kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
- kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
- kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
- kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
- kolega_code/agent/prompts.py +192 -0
- kolega_code/agent/tests/__init__.py +0 -0
- kolega_code/agent/tests/llm/__init__.py +0 -0
- kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
- kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
- kolega_code/agent/tests/llm/test_client.py +773 -0
- kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
- kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
- kolega_code/agent/tests/llm/test_exceptions.py +249 -0
- kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
- kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
- kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
- kolega_code/agent/tests/llm/test_model_specs.py +17 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
- kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
- kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
- kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
- kolega_code/agent/tests/services/__init__.py +1 -0
- kolega_code/agent/tests/services/test_browser.py +447 -0
- kolega_code/agent/tests/services/test_browser_parity.py +353 -0
- kolega_code/agent/tests/services/test_file_system.py +699 -0
- kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
- kolega_code/agent/tests/services/test_terminal.py +154 -0
- kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
- kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
- kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
- kolega_code/agent/tests/test_base_agent.py +1942 -0
- kolega_code/agent/tests/test_coder_attachments.py +330 -0
- kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
- kolega_code/agent/tests/test_commands.py +179 -0
- kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
- kolega_code/agent/tests/test_empty_message_handling.py +48 -0
- kolega_code/agent/tests/test_general_agent.py +242 -0
- kolega_code/agent/tests/test_html.py +320 -0
- kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
- kolega_code/agent/tests/test_planning_agent.py +227 -0
- kolega_code/agent/tests/test_prompt_provider.py +271 -0
- kolega_code/agent/tests/test_tool_registry.py +102 -0
- kolega_code/agent/tests/test_tools.py +549 -0
- kolega_code/agent/tests/tool_backend/__init__.py +0 -0
- kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
- kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
- kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
- kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
- kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
- kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
- kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
- kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
- kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
- kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
- kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
- kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
- kolega_code/agent/tool_backend/agent_tool.py +414 -0
- kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
- kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
- kolega_code/agent/tool_backend/base_tool.py +217 -0
- kolega_code/agent/tool_backend/browser_tool.py +271 -0
- kolega_code/agent/tool_backend/build_tool.py +93 -0
- kolega_code/agent/tool_backend/create_file_tool.py +52 -0
- kolega_code/agent/tool_backend/glob_tool.py +323 -0
- kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
- kolega_code/agent/tool_backend/memory_tool.py +79 -0
- kolega_code/agent/tool_backend/read_file_tool.py +119 -0
- kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
- kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
- kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
- kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
- kolega_code/agent/tool_backend/streaming_tool.py +47 -0
- kolega_code/agent/tool_backend/terminal_tool.py +643 -0
- kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
- kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
- kolega_code/agent/tools.py +1704 -0
- kolega_code/agent/utils/commands.py +94 -0
- kolega_code/cli/__init__.py +1 -0
- kolega_code/cli/app.py +2756 -0
- kolega_code/cli/config.py +280 -0
- kolega_code/cli/connection.py +49 -0
- kolega_code/cli/file_index.py +147 -0
- kolega_code/cli/main.py +564 -0
- kolega_code/cli/mentions.py +155 -0
- kolega_code/cli/messages.py +89 -0
- kolega_code/cli/provider_registry.py +96 -0
- kolega_code/cli/session_store.py +207 -0
- kolega_code/cli/settings.py +87 -0
- kolega_code/cli/skills.py +409 -0
- kolega_code/cli/slash_commands.py +108 -0
- kolega_code/cli/tests/__init__.py +1 -0
- kolega_code/cli/tests/test_app.py +4251 -0
- kolega_code/cli/tests/test_cli_config.py +171 -0
- kolega_code/cli/tests/test_connection.py +26 -0
- kolega_code/cli/tests/test_file_index.py +103 -0
- kolega_code/cli/tests/test_main.py +455 -0
- kolega_code/cli/tests/test_mentions.py +108 -0
- kolega_code/cli/tests/test_session_store.py +67 -0
- kolega_code/cli/tests/test_settings.py +62 -0
- kolega_code/cli/tests/test_skills.py +157 -0
- kolega_code/cli/tests/test_slash_commands.py +88 -0
- kolega_code/cli/theme.py +180 -0
- kolega_code/config.py +154 -0
- kolega_code/events.py +202 -0
- kolega_code/llm/client.py +300 -0
- kolega_code/llm/exceptions.py +285 -0
- kolega_code/llm/instrumented_client.py +520 -0
- kolega_code/llm/models.py +1368 -0
- kolega_code/llm/providers/__init__.py +0 -0
- kolega_code/llm/providers/anthropic.py +387 -0
- kolega_code/llm/providers/base.py +71 -0
- kolega_code/llm/providers/google.py +157 -0
- kolega_code/llm/providers/models.py +37 -0
- kolega_code/llm/providers/openai.py +363 -0
- kolega_code/llm/ratelimit.py +40 -0
- kolega_code/llm/specs.py +67 -0
- kolega_code/llm/tool_execution_ids.py +18 -0
- kolega_code/models/__init__.py +9 -0
- kolega_code/models/sandbox_terminal_state.py +47 -0
- kolega_code/runtime.py +50 -0
- kolega_code/sandbox/README.md +200 -0
- kolega_code/sandbox/__init__.py +21 -0
- kolega_code/sandbox/async_filesystem.py +475 -0
- kolega_code/sandbox/base.py +297 -0
- kolega_code/sandbox/browser.py +25 -0
- kolega_code/sandbox/event_loop.py +43 -0
- kolega_code/sandbox/filesystem.py +341 -0
- kolega_code/sandbox/local.py +118 -0
- kolega_code/sandbox/serializer.py +175 -0
- kolega_code/sandbox/terminal.py +868 -0
- kolega_code/sandbox/utils.py +216 -0
- kolega_code/services/base.py +255 -0
- kolega_code/services/browser.py +444 -0
- kolega_code/services/file_system.py +749 -0
- kolega_code/services/html.py +221 -0
- kolega_code/services/terminal.py +903 -0
- kolega_code/tools/__init__.py +22 -0
- kolega_code/tools/core.py +33 -0
- kolega_code/tools/definitions.py +81 -0
- kolega_code/tools/registry.py +73 -0
- kolega_code-0.1.0.dist-info/METADATA +157 -0
- kolega_code-0.1.0.dist-info/RECORD +171 -0
- kolega_code-0.1.0.dist-info/WHEEL +4 -0
- kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
- kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
"""Test suite for the think_hard tool with streaming implementation."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
5
|
+
|
|
6
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
7
|
+
from kolega_code.events import AgentConnectionManager
|
|
8
|
+
from kolega_code.llm.models import Message, TextBlock, ThinkingBlock
|
|
9
|
+
from kolega_code.agent.tool_backend.think_hard_tool import ThinkHardTool
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MockStreamWrapper:
|
|
13
|
+
"""Mock stream wrapper that simulates the AnthropicStreamWrapper behavior."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, final_message: Message):
|
|
16
|
+
self.final_message = final_message
|
|
17
|
+
self._entered = False
|
|
18
|
+
self.chunks = [] # No chunks to iterate over
|
|
19
|
+
self.chunk_index = 0
|
|
20
|
+
|
|
21
|
+
async def __aenter__(self):
|
|
22
|
+
self._entered = True
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
26
|
+
self._entered = False
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
def __aiter__(self):
|
|
30
|
+
"""Make the stream async iterable."""
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
async def __anext__(self):
|
|
34
|
+
"""Return chunks for async iteration."""
|
|
35
|
+
if self.chunk_index >= len(self.chunks):
|
|
36
|
+
raise StopAsyncIteration
|
|
37
|
+
chunk = self.chunks[self.chunk_index]
|
|
38
|
+
self.chunk_index += 1
|
|
39
|
+
return chunk
|
|
40
|
+
|
|
41
|
+
async def get_final_message(self) -> Message:
|
|
42
|
+
"""Return the final message after streaming completes."""
|
|
43
|
+
if not self._entered:
|
|
44
|
+
raise RuntimeError("Must use 'async with' before getting final message")
|
|
45
|
+
return self.final_message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class MockStreamChunk:
|
|
49
|
+
def __init__(self, thinking: str = "", text: str = ""):
|
|
50
|
+
self.thinking = thinking
|
|
51
|
+
self.text = text
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@pytest.fixture
|
|
55
|
+
def mock_config():
|
|
56
|
+
"""Create a mock agent configuration."""
|
|
57
|
+
return AgentConfig(
|
|
58
|
+
anthropic_api_key="test-key",
|
|
59
|
+
openai_api_key="test-openai-key", # Required for edit_model_config
|
|
60
|
+
thinking_config=ModelConfig(
|
|
61
|
+
provider=ModelProvider.ANTHROPIC,
|
|
62
|
+
model="claude-3-7-sonnet-20250131",
|
|
63
|
+
rate_limits=RateLimitConfig(requests_per_minute=10, tokens_per_minute=100000, max_retries=3),
|
|
64
|
+
thinking_tokens=5000,
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.fixture
|
|
70
|
+
def mock_connection_manager():
|
|
71
|
+
"""Create a mock connection manager."""
|
|
72
|
+
return AsyncMock(spec=AgentConnectionManager)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@pytest.fixture
|
|
76
|
+
def mock_caller():
|
|
77
|
+
"""Create a mock caller (base agent)."""
|
|
78
|
+
mock = Mock()
|
|
79
|
+
mock.agent_name = "test_agent"
|
|
80
|
+
mock.user_id = "user-123"
|
|
81
|
+
mock.user_email = "user@example.com"
|
|
82
|
+
return mock
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@pytest.fixture
|
|
86
|
+
def think_hard_tool(mock_config, mock_connection_manager, mock_caller):
|
|
87
|
+
"""Create a ThinkHardTool instance with mocked dependencies."""
|
|
88
|
+
tool = ThinkHardTool(
|
|
89
|
+
project_path="/test/path",
|
|
90
|
+
workspace_id="test_workspace",
|
|
91
|
+
thread_id="test_thread",
|
|
92
|
+
connection_manager=mock_connection_manager,
|
|
93
|
+
config=mock_config,
|
|
94
|
+
caller=mock_caller,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Mock the log methods
|
|
98
|
+
tool.log_info = AsyncMock()
|
|
99
|
+
tool.log_error = AsyncMock()
|
|
100
|
+
|
|
101
|
+
# Mock the streaming update method
|
|
102
|
+
tool.send_streaming_update = AsyncMock()
|
|
103
|
+
|
|
104
|
+
return tool
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pytest.mark.asyncio
|
|
108
|
+
async def test_think_hard_streaming_with_thinking_and_text(think_hard_tool, mock_connection_manager):
|
|
109
|
+
"""Test think_hard with both thinking and text content using streaming."""
|
|
110
|
+
|
|
111
|
+
# Create a mock final message with both thinking and text blocks
|
|
112
|
+
final_message = Message(
|
|
113
|
+
role="assistant",
|
|
114
|
+
content=[
|
|
115
|
+
ThinkingBlock(thinking="This is deep thinking about the problem..."),
|
|
116
|
+
ThinkingBlock(thinking="Additional thinking process..."),
|
|
117
|
+
TextBlock(text="This is the final analysis."),
|
|
118
|
+
TextBlock(text="Additional insights."),
|
|
119
|
+
],
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Create mock stream wrapper
|
|
123
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
124
|
+
|
|
125
|
+
# Mock the LLMClient and its stream method
|
|
126
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
127
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
128
|
+
# Mock model specs
|
|
129
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
130
|
+
|
|
131
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
132
|
+
|
|
133
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
134
|
+
async def stream_coroutine(*args, **kwargs):
|
|
135
|
+
return mock_stream
|
|
136
|
+
|
|
137
|
+
mock_llm_instance.stream = stream_coroutine
|
|
138
|
+
|
|
139
|
+
# Call think_hard
|
|
140
|
+
result = await think_hard_tool.think_hard("Test problem statement")
|
|
141
|
+
|
|
142
|
+
# Verify the LLMClient was created with correct parameters
|
|
143
|
+
mock_llm_class.assert_called_once_with(
|
|
144
|
+
provider="anthropic",
|
|
145
|
+
api_key="test-key",
|
|
146
|
+
max_retries=3,
|
|
147
|
+
requests_per_minute=10,
|
|
148
|
+
tokens_per_minute=100000,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Verify stream was called (we can't use assert_called_once with a regular function)
|
|
152
|
+
# The test passing indicates stream was called successfully
|
|
153
|
+
|
|
154
|
+
# Verify the result format is correct
|
|
155
|
+
expected_result = (
|
|
156
|
+
"# Extended Thinking Process\n\n"
|
|
157
|
+
"This is deep thinking about the problem...\n"
|
|
158
|
+
"Additional thinking process...\n\n"
|
|
159
|
+
"# Final Analysis\n\n"
|
|
160
|
+
"This is the final analysis.\n"
|
|
161
|
+
"Additional insights."
|
|
162
|
+
)
|
|
163
|
+
assert result == expected_result
|
|
164
|
+
|
|
165
|
+
# Verify logging
|
|
166
|
+
think_hard_tool.log_info.assert_called_once()
|
|
167
|
+
assert "Thinking hard about: Test problem statement" in think_hard_tool.log_info.call_args[0][0]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@pytest.mark.asyncio
|
|
171
|
+
async def test_think_hard_streaming_updates_use_append_mode_for_live_deltas(think_hard_tool):
|
|
172
|
+
"""Test think_hard marks live deltas as append mode and final content as replacement."""
|
|
173
|
+
think_hard_tool.caller.current_tool_call_id = "tool-1"
|
|
174
|
+
|
|
175
|
+
final_message = Message(
|
|
176
|
+
role="assistant",
|
|
177
|
+
content=[
|
|
178
|
+
ThinkingBlock(thinking="T" * 60),
|
|
179
|
+
TextBlock(text="A" * 60),
|
|
180
|
+
],
|
|
181
|
+
)
|
|
182
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
183
|
+
mock_stream.chunks = [
|
|
184
|
+
MockStreamChunk(thinking="T" * 60),
|
|
185
|
+
MockStreamChunk(text="A" * 60),
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
189
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
190
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
191
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
192
|
+
|
|
193
|
+
async def stream_coroutine(*args, **kwargs):
|
|
194
|
+
return mock_stream
|
|
195
|
+
|
|
196
|
+
mock_llm_instance.stream = stream_coroutine
|
|
197
|
+
|
|
198
|
+
await think_hard_tool.think_hard("Test problem statement")
|
|
199
|
+
|
|
200
|
+
calls = think_hard_tool.send_streaming_update.await_args_list
|
|
201
|
+
incomplete_calls = [call for call in calls if call.kwargs.get("is_complete") is False]
|
|
202
|
+
|
|
203
|
+
assert incomplete_calls
|
|
204
|
+
assert all(call.kwargs["stream_mode"] == "append" for call in incomplete_calls)
|
|
205
|
+
assert calls[-1].kwargs["is_complete"] is True
|
|
206
|
+
assert calls[-1].kwargs["stream_mode"] == "replace"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@pytest.mark.asyncio
|
|
210
|
+
async def test_think_hard_streaming_only_text(think_hard_tool, mock_connection_manager):
|
|
211
|
+
"""Test think_hard with only text content (no thinking blocks)."""
|
|
212
|
+
|
|
213
|
+
# Create a mock final message with only text blocks
|
|
214
|
+
final_message = Message(role="assistant", content=[TextBlock(text="Direct response without extended thinking.")])
|
|
215
|
+
|
|
216
|
+
# Create mock stream wrapper
|
|
217
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
218
|
+
|
|
219
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
220
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
221
|
+
# Mock model specs
|
|
222
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
223
|
+
|
|
224
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
225
|
+
|
|
226
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
227
|
+
async def stream_coroutine(*args, **kwargs):
|
|
228
|
+
return mock_stream
|
|
229
|
+
|
|
230
|
+
mock_llm_instance.stream = stream_coroutine
|
|
231
|
+
|
|
232
|
+
# Call think_hard
|
|
233
|
+
result = await think_hard_tool.think_hard("Simple question")
|
|
234
|
+
|
|
235
|
+
# Verify the result format (no thinking section)
|
|
236
|
+
expected_result = "# Final Analysis\n\n" "Direct response without extended thinking."
|
|
237
|
+
assert result == expected_result
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@pytest.mark.asyncio
|
|
241
|
+
async def test_think_hard_streaming_error_handling(think_hard_tool, mock_connection_manager):
|
|
242
|
+
"""Test error handling during streaming."""
|
|
243
|
+
|
|
244
|
+
# Mock the LLMClient to raise an exception
|
|
245
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
246
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
247
|
+
# Mock model specs
|
|
248
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
249
|
+
|
|
250
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
251
|
+
|
|
252
|
+
# stream method raises an exception
|
|
253
|
+
async def stream_coroutine(*args, **kwargs):
|
|
254
|
+
raise Exception("API Error: Streaming failed")
|
|
255
|
+
|
|
256
|
+
mock_llm_instance.stream = stream_coroutine
|
|
257
|
+
|
|
258
|
+
# Call think_hard
|
|
259
|
+
result = await think_hard_tool.think_hard("Test problem")
|
|
260
|
+
|
|
261
|
+
# Verify error handling
|
|
262
|
+
assert result == "Error during extended thinking: API Error: Streaming failed"
|
|
263
|
+
think_hard_tool.log_error.assert_called_once()
|
|
264
|
+
assert "API Error" in think_hard_tool.log_error.call_args[0][0]
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@pytest.mark.asyncio
|
|
268
|
+
async def test_think_hard_stream_context_manager_error(think_hard_tool, mock_connection_manager):
|
|
269
|
+
"""Test error handling when stream context manager fails."""
|
|
270
|
+
|
|
271
|
+
class FailingStreamWrapper:
|
|
272
|
+
async def __aenter__(self):
|
|
273
|
+
raise Exception("Stream initialization failed")
|
|
274
|
+
|
|
275
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
failing_stream = FailingStreamWrapper()
|
|
279
|
+
|
|
280
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
281
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
282
|
+
# Mock model specs
|
|
283
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
284
|
+
|
|
285
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
286
|
+
|
|
287
|
+
# stream method returns a coroutine that returns the failing stream wrapper
|
|
288
|
+
async def stream_coroutine(*args, **kwargs):
|
|
289
|
+
return failing_stream
|
|
290
|
+
|
|
291
|
+
mock_llm_instance.stream = stream_coroutine
|
|
292
|
+
|
|
293
|
+
# Call think_hard
|
|
294
|
+
result = await think_hard_tool.think_hard("Test problem")
|
|
295
|
+
|
|
296
|
+
# Verify error handling
|
|
297
|
+
assert result == "Error during extended thinking: Stream initialization failed"
|
|
298
|
+
think_hard_tool.log_error.assert_called_once()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@pytest.mark.asyncio
|
|
302
|
+
async def test_think_hard_mixed_content_blocks(think_hard_tool, mock_connection_manager):
|
|
303
|
+
"""Test think_hard with mixed content blocks including tool calls (should be ignored)."""
|
|
304
|
+
|
|
305
|
+
from kolega_code.llm.models import ToolCall
|
|
306
|
+
|
|
307
|
+
# Create a mock final message with mixed content types
|
|
308
|
+
final_message = Message(
|
|
309
|
+
role="assistant",
|
|
310
|
+
content=[
|
|
311
|
+
ThinkingBlock(thinking="Analyzing the problem..."),
|
|
312
|
+
TextBlock(text="Here's my analysis:"),
|
|
313
|
+
ToolCall(id="tool_1", name="some_tool", input={"arg": "value"}), # Should be ignored
|
|
314
|
+
TextBlock(text="Conclusion based on analysis."),
|
|
315
|
+
],
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Create mock stream wrapper
|
|
319
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
320
|
+
|
|
321
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
322
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
323
|
+
# Mock model specs
|
|
324
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
325
|
+
|
|
326
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
327
|
+
|
|
328
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
329
|
+
async def stream_coroutine(*args, **kwargs):
|
|
330
|
+
return mock_stream
|
|
331
|
+
|
|
332
|
+
mock_llm_instance.stream = stream_coroutine
|
|
333
|
+
|
|
334
|
+
# Call think_hard
|
|
335
|
+
result = await think_hard_tool.think_hard("Complex problem")
|
|
336
|
+
|
|
337
|
+
# Verify only thinking and text blocks are included
|
|
338
|
+
expected_result = (
|
|
339
|
+
"# Extended Thinking Process\n\n"
|
|
340
|
+
"Analyzing the problem...\n\n"
|
|
341
|
+
"# Final Analysis\n\n"
|
|
342
|
+
"Here's my analysis:\n"
|
|
343
|
+
"Conclusion based on analysis."
|
|
344
|
+
)
|
|
345
|
+
assert result == expected_result
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@pytest.mark.asyncio
|
|
349
|
+
async def test_think_hard_empty_response(think_hard_tool, mock_connection_manager):
|
|
350
|
+
"""Test think_hard with empty response content."""
|
|
351
|
+
|
|
352
|
+
# Create a mock final message with empty content
|
|
353
|
+
final_message = Message(role="assistant", content=[])
|
|
354
|
+
|
|
355
|
+
# Create mock stream wrapper
|
|
356
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
357
|
+
|
|
358
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
359
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
360
|
+
# Mock model specs
|
|
361
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
362
|
+
|
|
363
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
364
|
+
|
|
365
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
366
|
+
async def stream_coroutine(*args, **kwargs):
|
|
367
|
+
return mock_stream
|
|
368
|
+
|
|
369
|
+
mock_llm_instance.stream = stream_coroutine
|
|
370
|
+
|
|
371
|
+
# Call think_hard
|
|
372
|
+
result = await think_hard_tool.think_hard("Empty response test")
|
|
373
|
+
|
|
374
|
+
# Verify the result handles empty content gracefully
|
|
375
|
+
expected_result = "# Final Analysis\n\n"
|
|
376
|
+
assert result == expected_result
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
@pytest.mark.asyncio
|
|
380
|
+
async def test_think_hard_large_thinking_content(think_hard_tool, mock_connection_manager):
|
|
381
|
+
"""Test think_hard with very large thinking content (simulating long operations)."""
|
|
382
|
+
|
|
383
|
+
# Create a large thinking content
|
|
384
|
+
large_thinking = "\n".join([f"Thinking step {i}: " + "x" * 100 for i in range(100)])
|
|
385
|
+
|
|
386
|
+
# Create a mock final message with large thinking content
|
|
387
|
+
final_message = Message(
|
|
388
|
+
role="assistant",
|
|
389
|
+
content=[ThinkingBlock(thinking=large_thinking), TextBlock(text="Final conclusion after extensive thinking.")],
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Create mock stream wrapper
|
|
393
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
394
|
+
|
|
395
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
396
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
397
|
+
# Mock model specs
|
|
398
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
399
|
+
|
|
400
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
401
|
+
|
|
402
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
403
|
+
async def stream_coroutine(*args, **kwargs):
|
|
404
|
+
return mock_stream
|
|
405
|
+
|
|
406
|
+
mock_llm_instance.stream = stream_coroutine
|
|
407
|
+
|
|
408
|
+
# Call think_hard
|
|
409
|
+
result = await think_hard_tool.think_hard("Complex problem requiring extensive thinking")
|
|
410
|
+
|
|
411
|
+
# Verify the result contains the large thinking content
|
|
412
|
+
assert "# Extended Thinking Process\n\n" in result
|
|
413
|
+
assert large_thinking in result
|
|
414
|
+
assert "# Final Analysis\n\n" in result
|
|
415
|
+
assert "Final conclusion after extensive thinking." in result
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@pytest.mark.asyncio
|
|
419
|
+
async def test_think_hard_model_specs_usage(think_hard_tool, mock_connection_manager):
|
|
420
|
+
"""Test that model specs are correctly retrieved and used."""
|
|
421
|
+
|
|
422
|
+
final_message = Message(role="assistant", content=[TextBlock(text="Response")])
|
|
423
|
+
|
|
424
|
+
mock_stream = MockStreamWrapper(final_message)
|
|
425
|
+
|
|
426
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.LLMClient") as mock_llm_class:
|
|
427
|
+
with patch("kolega_code.agent.tool_backend.think_hard_tool.get_model_specs") as mock_get_specs:
|
|
428
|
+
# Mock model specs
|
|
429
|
+
mock_get_specs.return_value = {"max_completion_tokens": 8192}
|
|
430
|
+
|
|
431
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
432
|
+
|
|
433
|
+
# stream method returns a coroutine that returns the mock stream wrapper
|
|
434
|
+
async def stream_coroutine(*args, **kwargs):
|
|
435
|
+
return mock_stream
|
|
436
|
+
|
|
437
|
+
mock_llm_instance.stream = stream_coroutine
|
|
438
|
+
|
|
439
|
+
# Call think_hard
|
|
440
|
+
await think_hard_tool.think_hard("Test")
|
|
441
|
+
|
|
442
|
+
# Verify get_model_specs was called correctly
|
|
443
|
+
mock_get_specs.assert_called_once_with(ModelProvider.ANTHROPIC, "claude-3-7-sonnet-20250131")
|
|
444
|
+
|
|
445
|
+
# The test passing indicates stream was called with correct parameters
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
6
|
+
from kolega_code.agent.tool_backend.web_fetch_tool import WebFetchTool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def mock_connection_manager():
|
|
11
|
+
return AsyncMock()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def project_path(tmp_path):
|
|
16
|
+
return tmp_path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def agent_config():
|
|
21
|
+
return AgentConfig(
|
|
22
|
+
anthropic_api_key="test_key",
|
|
23
|
+
openai_api_key="test_key",
|
|
24
|
+
long_context_config=ModelConfig(
|
|
25
|
+
provider=ModelProvider.ANTHROPIC, model="long-model", rate_limits=RateLimitConfig()
|
|
26
|
+
),
|
|
27
|
+
fast_config=ModelConfig(provider=ModelProvider.ANTHROPIC, model="haiku-model", rate_limits=RateLimitConfig()),
|
|
28
|
+
thinking_config=ModelConfig(
|
|
29
|
+
provider=ModelProvider.ANTHROPIC, model="think-model", rate_limits=RateLimitConfig(), thinking_tokens=512
|
|
30
|
+
),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.fixture
|
|
35
|
+
def mock_caller():
|
|
36
|
+
caller = Mock()
|
|
37
|
+
caller.agent_name = "coder"
|
|
38
|
+
caller.current_tool_call_id = None
|
|
39
|
+
caller.workspace_id = "test_workspace"
|
|
40
|
+
caller.thread_id = "test_thread"
|
|
41
|
+
caller.llm = None
|
|
42
|
+
caller.user_id = "user-123"
|
|
43
|
+
caller.user_email = "user@example.com"
|
|
44
|
+
return caller
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.fixture
|
|
48
|
+
def web_fetch_tool(project_path, mock_connection_manager, agent_config, mock_caller):
|
|
49
|
+
return WebFetchTool(
|
|
50
|
+
project_path, "test_workspace", "test_thread", mock_connection_manager, agent_config, mock_caller
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestWebFetchTool:
|
|
55
|
+
@pytest.mark.asyncio
|
|
56
|
+
async def test_web_fetch_success(self, web_fetch_tool, agent_config):
|
|
57
|
+
with patch(
|
|
58
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
59
|
+
return_value="<html>content</html>",
|
|
60
|
+
) as mock_fetch, patch(
|
|
61
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.extract", return_value="Extracted content"
|
|
62
|
+
) as mock_extract, patch(
|
|
63
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.get_model_specs",
|
|
64
|
+
return_value={"max_completion_tokens": 1024},
|
|
65
|
+
) as mock_specs, patch(
|
|
66
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
67
|
+
) as mock_llm_class:
|
|
68
|
+
mock_response = Mock()
|
|
69
|
+
mock_response.get_text_content.return_value = "Summarized answer"
|
|
70
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
71
|
+
mock_llm_instance.generate = AsyncMock(return_value=mock_response)
|
|
72
|
+
|
|
73
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize the page")
|
|
74
|
+
|
|
75
|
+
assert result == "Summarized answer"
|
|
76
|
+
mock_fetch.assert_called_once_with("https://example.com")
|
|
77
|
+
mock_extract.assert_called_once()
|
|
78
|
+
mock_specs.assert_called_once()
|
|
79
|
+
mock_llm_instance.generate.assert_awaited_once()
|
|
80
|
+
|
|
81
|
+
await_args, await_kwargs = mock_llm_instance.generate.await_args
|
|
82
|
+
assert await_kwargs["model"] == agent_config.fast_config.model
|
|
83
|
+
assert await_kwargs["max_completion_tokens"] == 1024
|
|
84
|
+
|
|
85
|
+
@pytest.mark.asyncio
|
|
86
|
+
async def test_web_fetch_applies_char_limit(self, web_fetch_tool):
|
|
87
|
+
with patch.object(WebFetchTool, "DEFAULT_RESPONSE_CHAR_LIMIT", 10), patch(
|
|
88
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
89
|
+
return_value="<html>content</html>",
|
|
90
|
+
), patch(
|
|
91
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.extract", return_value="Extracted content"
|
|
92
|
+
), patch(
|
|
93
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.get_model_specs",
|
|
94
|
+
return_value={"max_completion_tokens": 1024},
|
|
95
|
+
), patch(
|
|
96
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
97
|
+
) as mock_llm_class:
|
|
98
|
+
long_text = "Alpha Beta Gamma Delta"
|
|
99
|
+
mock_response = Mock()
|
|
100
|
+
mock_response.get_text_content.return_value = long_text
|
|
101
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
102
|
+
mock_llm_instance.generate = AsyncMock(return_value=mock_response)
|
|
103
|
+
|
|
104
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize")
|
|
105
|
+
|
|
106
|
+
assert result == "Alpha…"
|
|
107
|
+
|
|
108
|
+
@pytest.mark.asyncio
|
|
109
|
+
async def test_web_fetch_caps_large_model_token_limit(self, web_fetch_tool):
|
|
110
|
+
with patch(
|
|
111
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
112
|
+
return_value="<html>content</html>",
|
|
113
|
+
), patch(
|
|
114
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.extract", return_value="Extracted content"
|
|
115
|
+
), patch(
|
|
116
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.get_model_specs",
|
|
117
|
+
return_value={"max_completion_tokens": 384000},
|
|
118
|
+
), patch(
|
|
119
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
120
|
+
) as mock_llm_class:
|
|
121
|
+
mock_response = Mock()
|
|
122
|
+
mock_response.get_text_content.return_value = "Summarized answer"
|
|
123
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
124
|
+
mock_llm_instance.generate = AsyncMock(return_value=mock_response)
|
|
125
|
+
|
|
126
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize")
|
|
127
|
+
|
|
128
|
+
assert result == "Summarized answer"
|
|
129
|
+
await_args, await_kwargs = mock_llm_instance.generate.await_args
|
|
130
|
+
assert await_kwargs["max_completion_tokens"] == WebFetchTool.WEB_FETCH_MAX_COMPLETION_TOKENS
|
|
131
|
+
|
|
132
|
+
@pytest.mark.asyncio
|
|
133
|
+
async def test_web_fetch_preserves_smaller_model_token_limit(self, web_fetch_tool):
|
|
134
|
+
with patch(
|
|
135
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
136
|
+
return_value="<html>content</html>",
|
|
137
|
+
), patch(
|
|
138
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.extract", return_value="Extracted content"
|
|
139
|
+
), patch(
|
|
140
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.get_model_specs",
|
|
141
|
+
return_value={"max_completion_tokens": 512},
|
|
142
|
+
), patch(
|
|
143
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
144
|
+
) as mock_llm_class:
|
|
145
|
+
mock_response = Mock()
|
|
146
|
+
mock_response.get_text_content.return_value = "Summarized answer"
|
|
147
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
148
|
+
mock_llm_instance.generate = AsyncMock(return_value=mock_response)
|
|
149
|
+
|
|
150
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize")
|
|
151
|
+
|
|
152
|
+
assert result == "Summarized answer"
|
|
153
|
+
await_args, await_kwargs = mock_llm_instance.generate.await_args
|
|
154
|
+
assert await_kwargs["max_completion_tokens"] == 512
|
|
155
|
+
|
|
156
|
+
@pytest.mark.asyncio
|
|
157
|
+
async def test_web_fetch_reports_empty_model_response(self, web_fetch_tool):
|
|
158
|
+
with patch(
|
|
159
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
160
|
+
return_value="<html>content</html>",
|
|
161
|
+
), patch(
|
|
162
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.extract", return_value="Extracted content"
|
|
163
|
+
), patch(
|
|
164
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.get_model_specs",
|
|
165
|
+
return_value={"max_completion_tokens": 1024},
|
|
166
|
+
), patch(
|
|
167
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
168
|
+
) as mock_llm_class:
|
|
169
|
+
mock_response = Mock()
|
|
170
|
+
mock_response.get_text_content.return_value = ""
|
|
171
|
+
mock_llm_instance = mock_llm_class.return_value
|
|
172
|
+
mock_llm_instance.generate = AsyncMock(return_value=mock_response)
|
|
173
|
+
|
|
174
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize")
|
|
175
|
+
|
|
176
|
+
assert result == "Error: Fast model returned an empty response for fetched content."
|
|
177
|
+
mock_llm_instance.generate.assert_awaited_once()
|
|
178
|
+
|
|
179
|
+
@pytest.mark.asyncio
|
|
180
|
+
async def test_web_fetch_invalid_url(self, web_fetch_tool):
|
|
181
|
+
result = await web_fetch_tool.web_fetch("ftp://example.com", "Summarize")
|
|
182
|
+
assert result.startswith("Error: Provide a valid http(s) URL.")
|
|
183
|
+
|
|
184
|
+
@pytest.mark.asyncio
|
|
185
|
+
async def test_web_fetch_no_content_downloaded(self, web_fetch_tool):
|
|
186
|
+
with patch(
|
|
187
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.trafilatura.fetch_url",
|
|
188
|
+
return_value=None,
|
|
189
|
+
), patch(
|
|
190
|
+
"kolega_code.agent.tool_backend.web_fetch_tool.LLMClient"
|
|
191
|
+
) as mock_llm_class:
|
|
192
|
+
result = await web_fetch_tool.web_fetch("https://example.com", "Summarize")
|
|
193
|
+
assert result.startswith("Error: No content retrieved from https://example.com")
|
|
194
|
+
mock_llm_class.assert_not_called()
|