kolega-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolega_code/__init__.py +151 -0
- kolega_code/agent/__init__.py +42 -0
- kolega_code/agent/baseagent.py +998 -0
- kolega_code/agent/browseragent.py +123 -0
- kolega_code/agent/coder.py +157 -0
- kolega_code/agent/common.py +41 -0
- kolega_code/agent/compression.py +81 -0
- kolega_code/agent/context.py +112 -0
- kolega_code/agent/conversation.py +408 -0
- kolega_code/agent/generalagent.py +146 -0
- kolega_code/agent/investigationagent.py +123 -0
- kolega_code/agent/planningagent.py +187 -0
- kolega_code/agent/prompt_provider.py +196 -0
- kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
- kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
- kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
- kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
- kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
- kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
- kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
- kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
- kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
- kolega_code/agent/prompts.py +192 -0
- kolega_code/agent/tests/__init__.py +0 -0
- kolega_code/agent/tests/llm/__init__.py +0 -0
- kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
- kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
- kolega_code/agent/tests/llm/test_client.py +773 -0
- kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
- kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
- kolega_code/agent/tests/llm/test_exceptions.py +249 -0
- kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
- kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
- kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
- kolega_code/agent/tests/llm/test_model_specs.py +17 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
- kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
- kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
- kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
- kolega_code/agent/tests/services/__init__.py +1 -0
- kolega_code/agent/tests/services/test_browser.py +447 -0
- kolega_code/agent/tests/services/test_browser_parity.py +353 -0
- kolega_code/agent/tests/services/test_file_system.py +699 -0
- kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
- kolega_code/agent/tests/services/test_terminal.py +154 -0
- kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
- kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
- kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
- kolega_code/agent/tests/test_base_agent.py +1942 -0
- kolega_code/agent/tests/test_coder_attachments.py +330 -0
- kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
- kolega_code/agent/tests/test_commands.py +179 -0
- kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
- kolega_code/agent/tests/test_empty_message_handling.py +48 -0
- kolega_code/agent/tests/test_general_agent.py +242 -0
- kolega_code/agent/tests/test_html.py +320 -0
- kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
- kolega_code/agent/tests/test_planning_agent.py +227 -0
- kolega_code/agent/tests/test_prompt_provider.py +271 -0
- kolega_code/agent/tests/test_tool_registry.py +102 -0
- kolega_code/agent/tests/test_tools.py +549 -0
- kolega_code/agent/tests/tool_backend/__init__.py +0 -0
- kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
- kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
- kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
- kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
- kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
- kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
- kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
- kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
- kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
- kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
- kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
- kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
- kolega_code/agent/tool_backend/agent_tool.py +414 -0
- kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
- kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
- kolega_code/agent/tool_backend/base_tool.py +217 -0
- kolega_code/agent/tool_backend/browser_tool.py +271 -0
- kolega_code/agent/tool_backend/build_tool.py +93 -0
- kolega_code/agent/tool_backend/create_file_tool.py +52 -0
- kolega_code/agent/tool_backend/glob_tool.py +323 -0
- kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
- kolega_code/agent/tool_backend/memory_tool.py +79 -0
- kolega_code/agent/tool_backend/read_file_tool.py +119 -0
- kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
- kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
- kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
- kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
- kolega_code/agent/tool_backend/streaming_tool.py +47 -0
- kolega_code/agent/tool_backend/terminal_tool.py +643 -0
- kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
- kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
- kolega_code/agent/tools.py +1704 -0
- kolega_code/agent/utils/commands.py +94 -0
- kolega_code/cli/__init__.py +1 -0
- kolega_code/cli/app.py +2756 -0
- kolega_code/cli/config.py +280 -0
- kolega_code/cli/connection.py +49 -0
- kolega_code/cli/file_index.py +147 -0
- kolega_code/cli/main.py +564 -0
- kolega_code/cli/mentions.py +155 -0
- kolega_code/cli/messages.py +89 -0
- kolega_code/cli/provider_registry.py +96 -0
- kolega_code/cli/session_store.py +207 -0
- kolega_code/cli/settings.py +87 -0
- kolega_code/cli/skills.py +409 -0
- kolega_code/cli/slash_commands.py +108 -0
- kolega_code/cli/tests/__init__.py +1 -0
- kolega_code/cli/tests/test_app.py +4251 -0
- kolega_code/cli/tests/test_cli_config.py +171 -0
- kolega_code/cli/tests/test_connection.py +26 -0
- kolega_code/cli/tests/test_file_index.py +103 -0
- kolega_code/cli/tests/test_main.py +455 -0
- kolega_code/cli/tests/test_mentions.py +108 -0
- kolega_code/cli/tests/test_session_store.py +67 -0
- kolega_code/cli/tests/test_settings.py +62 -0
- kolega_code/cli/tests/test_skills.py +157 -0
- kolega_code/cli/tests/test_slash_commands.py +88 -0
- kolega_code/cli/theme.py +180 -0
- kolega_code/config.py +154 -0
- kolega_code/events.py +202 -0
- kolega_code/llm/client.py +300 -0
- kolega_code/llm/exceptions.py +285 -0
- kolega_code/llm/instrumented_client.py +520 -0
- kolega_code/llm/models.py +1368 -0
- kolega_code/llm/providers/__init__.py +0 -0
- kolega_code/llm/providers/anthropic.py +387 -0
- kolega_code/llm/providers/base.py +71 -0
- kolega_code/llm/providers/google.py +157 -0
- kolega_code/llm/providers/models.py +37 -0
- kolega_code/llm/providers/openai.py +363 -0
- kolega_code/llm/ratelimit.py +40 -0
- kolega_code/llm/specs.py +67 -0
- kolega_code/llm/tool_execution_ids.py +18 -0
- kolega_code/models/__init__.py +9 -0
- kolega_code/models/sandbox_terminal_state.py +47 -0
- kolega_code/runtime.py +50 -0
- kolega_code/sandbox/README.md +200 -0
- kolega_code/sandbox/__init__.py +21 -0
- kolega_code/sandbox/async_filesystem.py +475 -0
- kolega_code/sandbox/base.py +297 -0
- kolega_code/sandbox/browser.py +25 -0
- kolega_code/sandbox/event_loop.py +43 -0
- kolega_code/sandbox/filesystem.py +341 -0
- kolega_code/sandbox/local.py +118 -0
- kolega_code/sandbox/serializer.py +175 -0
- kolega_code/sandbox/terminal.py +868 -0
- kolega_code/sandbox/utils.py +216 -0
- kolega_code/services/base.py +255 -0
- kolega_code/services/browser.py +444 -0
- kolega_code/services/file_system.py +749 -0
- kolega_code/services/html.py +221 -0
- kolega_code/services/terminal.py +903 -0
- kolega_code/tools/__init__.py +22 -0
- kolega_code/tools/core.py +33 -0
- kolega_code/tools/definitions.py +81 -0
- kolega_code/tools/registry.py +73 -0
- kolega_code-0.1.0.dist-info/METADATA +157 -0
- kolega_code-0.1.0.dist-info/RECORD +171 -0
- kolega_code-0.1.0.dist-info/WHEEL +4 -0
- kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
- kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test duplicate tool result prevention logic.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
8
|
+
from kolega_code.agent.baseagent import BaseAgent
|
|
9
|
+
from kolega_code.llm.models import Message, TextBlock, ToolCall, ToolResult
|
|
10
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
11
|
+
from kolega_code.services.file_system import LocalFileSystem
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestDuplicateToolResultPrevention:
|
|
15
|
+
"""Test that duplicate tool results are properly handled."""
|
|
16
|
+
|
|
17
|
+
@pytest.fixture
|
|
18
|
+
def base_agent(self):
|
|
19
|
+
"""Create a base agent instance for testing."""
|
|
20
|
+
config = AgentConfig(
|
|
21
|
+
anthropic_api_key="test-key",
|
|
22
|
+
openai_api_key="test-key",
|
|
23
|
+
long_context_config=ModelConfig(
|
|
24
|
+
provider=ModelProvider.ANTHROPIC, model="test-model", rate_limits=RateLimitConfig()
|
|
25
|
+
),
|
|
26
|
+
fast_config=ModelConfig(
|
|
27
|
+
provider=ModelProvider.ANTHROPIC, model="test-model", rate_limits=RateLimitConfig()
|
|
28
|
+
),
|
|
29
|
+
thinking_config=ModelConfig(
|
|
30
|
+
provider=ModelProvider.ANTHROPIC,
|
|
31
|
+
model="test-model",
|
|
32
|
+
rate_limits=RateLimitConfig(),
|
|
33
|
+
thinking_tokens=1024,
|
|
34
|
+
),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
with patch("kolega_code.agent.baseagent.AgentConnectionManager"), patch(
|
|
38
|
+
"kolega_code.agent.baseagent.get_model_specs"
|
|
39
|
+
) as mock_get_model_specs, patch("kolega_code.agent.context.LocalTerminalManager"), patch(
|
|
40
|
+
"kolega_code.agent.context.PlaywrightBrowserManager"
|
|
41
|
+
), patch(
|
|
42
|
+
"kolega_code.agent.context.LLMClient"
|
|
43
|
+
), patch(
|
|
44
|
+
"kolega_code.agent.baseagent.ToolCollection"
|
|
45
|
+
), patch(
|
|
46
|
+
"kolega_code.agent.context.LocalFileSystem"
|
|
47
|
+
) as mock_filesystem_class:
|
|
48
|
+
|
|
49
|
+
# Mock get_model_specs to return reasonable values
|
|
50
|
+
mock_get_model_specs.return_value = {"context_length": 100000, "max_completion_tokens": 4096}
|
|
51
|
+
|
|
52
|
+
# Create mock filesystem instance
|
|
53
|
+
mock_filesystem = Mock()
|
|
54
|
+
mock_filesystem.exists.return_value = True
|
|
55
|
+
mock_filesystem.is_dir.return_value = True
|
|
56
|
+
mock_filesystem_class.return_value = mock_filesystem
|
|
57
|
+
|
|
58
|
+
agent = BaseAgent(
|
|
59
|
+
project_path="/test/path",
|
|
60
|
+
workspace_id="test-workspace",
|
|
61
|
+
thread_id="test-thread",
|
|
62
|
+
connection_manager=Mock(),
|
|
63
|
+
config=config,
|
|
64
|
+
)
|
|
65
|
+
agent.llm = Mock()
|
|
66
|
+
return agent
|
|
67
|
+
|
|
68
|
+
def test_replace_dummy_result_with_real_result(self, base_agent):
|
|
69
|
+
"""Test that dummy 'Operation was interrupted' results are replaced with real results."""
|
|
70
|
+
# Set up history with tool call
|
|
71
|
+
base_agent.history.extend(
|
|
72
|
+
[
|
|
73
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
74
|
+
Message(role="assistant", content=[ToolCall(id="tool_123", name="test_tool", input={})]),
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# The append_user_message will trigger fix_incomplete_tool_calls
|
|
79
|
+
# which adds a dummy result, then the real result should replace it
|
|
80
|
+
real_result = ToolResult(
|
|
81
|
+
tool_use_id="tool_123", name="test_tool", content="Real tool execution result", is_error=False
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
base_agent.append_user_message([real_result])
|
|
85
|
+
|
|
86
|
+
# Check that we have exactly one user message with the real result
|
|
87
|
+
user_messages = [msg for msg in base_agent.history if msg.role == "user"]
|
|
88
|
+
assert len(user_messages) == 2 # Original user message + tool result message
|
|
89
|
+
|
|
90
|
+
# Check the tool result message
|
|
91
|
+
tool_result_msg = user_messages[-1]
|
|
92
|
+
assert len(tool_result_msg.content) == 1
|
|
93
|
+
assert tool_result_msg.content[0].tool_use_id == "tool_123"
|
|
94
|
+
assert tool_result_msg.content[0].content == "Real tool execution result"
|
|
95
|
+
assert tool_result_msg.content[0].is_error is False
|
|
96
|
+
|
|
97
|
+
def test_multiple_tool_calls_with_partial_real_results(self, base_agent):
|
|
98
|
+
"""Test handling multiple tool calls where some have real results and others are interrupted."""
|
|
99
|
+
# Set up history with multiple tool calls
|
|
100
|
+
base_agent.history.extend(
|
|
101
|
+
[
|
|
102
|
+
Message(role="user", content=[TextBlock(text="Do multiple things")]),
|
|
103
|
+
Message(
|
|
104
|
+
role="assistant",
|
|
105
|
+
content=[
|
|
106
|
+
ToolCall(id="tool_1", name="tool1", input={}),
|
|
107
|
+
ToolCall(id="tool_2", name="tool2", input={}),
|
|
108
|
+
ToolCall(id="tool_3", name="tool3", input={}),
|
|
109
|
+
],
|
|
110
|
+
),
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Append real results for tool_1 and tool_3, but tool_2 was interrupted
|
|
115
|
+
real_results = [
|
|
116
|
+
ToolResult(tool_use_id="tool_1", name="tool1", content="Tool 1 completed successfully", is_error=False),
|
|
117
|
+
ToolResult(tool_use_id="tool_3", name="tool3", content="Tool 3 completed successfully", is_error=False),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
base_agent.append_user_message(real_results)
|
|
121
|
+
|
|
122
|
+
# History is now invalid - append doesn't fix
|
|
123
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
124
|
+
|
|
125
|
+
# Check that we have only the provided results
|
|
126
|
+
user_messages = [msg for msg in base_agent.history if msg.role == "user"]
|
|
127
|
+
tool_result_msg = user_messages[-1]
|
|
128
|
+
|
|
129
|
+
# Should have only 2 results (what was provided)
|
|
130
|
+
assert len(tool_result_msg.content) == 2
|
|
131
|
+
|
|
132
|
+
# But when we fix the history, it should have all 3
|
|
133
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
|
|
134
|
+
|
|
135
|
+
# Find the tool result message in fixed history
|
|
136
|
+
fixed_user_messages = [msg for msg in fixed_history if msg.role == "user"]
|
|
137
|
+
fixed_tool_result_msg = fixed_user_messages[-1]
|
|
138
|
+
|
|
139
|
+
# Should have 3 results total: 2 real + 1 dummy for tool_2
|
|
140
|
+
assert len(fixed_tool_result_msg.content) == 3
|
|
141
|
+
|
|
142
|
+
# Check each tool result
|
|
143
|
+
tool_results_by_id = {r.tool_use_id: r for r in fixed_tool_result_msg.content}
|
|
144
|
+
|
|
145
|
+
# tool_1 should have real result
|
|
146
|
+
assert tool_results_by_id["tool_1"].content == "Tool 1 completed successfully"
|
|
147
|
+
assert tool_results_by_id["tool_1"].is_error is False
|
|
148
|
+
|
|
149
|
+
# tool_2 should have dummy result
|
|
150
|
+
assert "Operation was interrupted" in tool_results_by_id["tool_2"].content
|
|
151
|
+
assert tool_results_by_id["tool_2"].is_error is True
|
|
152
|
+
|
|
153
|
+
# tool_3 should have real result
|
|
154
|
+
assert tool_results_by_id["tool_3"].content == "Tool 3 completed successfully"
|
|
155
|
+
assert tool_results_by_id["tool_3"].is_error is False
|
|
156
|
+
|
|
157
|
+
def test_immediate_real_result_replaces_dummy_same_operation(self, base_agent):
|
|
158
|
+
"""Test that real results replace dummies when appended in the same operation that creates the dummy."""
|
|
159
|
+
# Set up history with tool call
|
|
160
|
+
base_agent.history.extend(
|
|
161
|
+
[
|
|
162
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
163
|
+
Message(role="assistant", content=[ToolCall(id="immediate_tool", name="test_tool", input={})]),
|
|
164
|
+
]
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# This simulates the actual flow where:
|
|
168
|
+
# 1. Assistant message with tool call exists
|
|
169
|
+
# 2. Tool execution completes (possibly after brief interruption)
|
|
170
|
+
# 3. append_user_message is called with the real result
|
|
171
|
+
# 4. _needs_tool_call_fix() returns True, so dummy is created
|
|
172
|
+
# 5. But then the real result replaces the dummy
|
|
173
|
+
|
|
174
|
+
real_result = ToolResult(
|
|
175
|
+
tool_use_id="immediate_tool", name="test_tool", content="Real execution result", is_error=False
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# This single append will:
|
|
179
|
+
# 1. Detect incomplete tool calls and add dummy
|
|
180
|
+
# 2. Replace the dummy with the real result
|
|
181
|
+
base_agent.append_user_message([real_result])
|
|
182
|
+
|
|
183
|
+
# Verify only the real result exists, not the dummy
|
|
184
|
+
tool_results = []
|
|
185
|
+
for msg in base_agent.history:
|
|
186
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
187
|
+
for block in msg.content:
|
|
188
|
+
if isinstance(block, ToolResult):
|
|
189
|
+
tool_results.append(block)
|
|
190
|
+
|
|
191
|
+
assert len(tool_results) == 1
|
|
192
|
+
assert tool_results[0].tool_use_id == "immediate_tool"
|
|
193
|
+
assert tool_results[0].content == "Real execution result"
|
|
194
|
+
assert tool_results[0].is_error is False
|
|
195
|
+
# Ensure it's not the dummy
|
|
196
|
+
assert "Operation was interrupted" not in tool_results[0].content
|
|
197
|
+
|
|
198
|
+
def test_delayed_real_result_replaces_dummy(self, base_agent):
|
|
199
|
+
"""Test that a delayed real result replaces a previously added dummy result."""
|
|
200
|
+
# Set up history with tool call
|
|
201
|
+
base_agent.history.extend(
|
|
202
|
+
[
|
|
203
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
204
|
+
Message(role="assistant", content=[ToolCall(id="delayed_tool", name="slow_tool", input={})]),
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Append a text message - history remains invalid
|
|
209
|
+
base_agent.append_user_message([TextBlock(text="Status check")])
|
|
210
|
+
|
|
211
|
+
# Verify history is invalid (no dummy was created)
|
|
212
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
213
|
+
|
|
214
|
+
# Remove the status check message to prepare for the real tool result
|
|
215
|
+
base_agent.history = base_agent.history[:-1]
|
|
216
|
+
|
|
217
|
+
# Manually fix the history to simulate what would happen before sending to LLM
|
|
218
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
|
|
219
|
+
|
|
220
|
+
# Verify dummy was created in the fixed history
|
|
221
|
+
tool_results = []
|
|
222
|
+
for msg in fixed_history:
|
|
223
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
224
|
+
for block in msg.content:
|
|
225
|
+
if isinstance(block, ToolResult):
|
|
226
|
+
tool_results.append(block)
|
|
227
|
+
|
|
228
|
+
assert len(tool_results) == 1
|
|
229
|
+
assert tool_results[0].tool_use_id == "delayed_tool"
|
|
230
|
+
assert "Operation was interrupted" in tool_results[0].content
|
|
231
|
+
assert tool_results[0].is_error is True
|
|
232
|
+
|
|
233
|
+
# Now replace the history with the fixed version to simulate a real scenario
|
|
234
|
+
base_agent.history = fixed_history
|
|
235
|
+
|
|
236
|
+
# Add another assistant message (simulating continued conversation)
|
|
237
|
+
base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Let me continue...")]))
|
|
238
|
+
|
|
239
|
+
# The real result arrives late
|
|
240
|
+
real_result = ToolResult(
|
|
241
|
+
tool_use_id="delayed_tool", name="slow_tool", content="Finally completed!", is_error=False
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
base_agent.append_user_message([real_result])
|
|
245
|
+
|
|
246
|
+
# Our implementation correctly replaces the dummy with the real result
|
|
247
|
+
# even across different messages, ensuring only one result per tool_use_id
|
|
248
|
+
final_tool_results = []
|
|
249
|
+
for msg in base_agent.history:
|
|
250
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
251
|
+
for block in msg.content:
|
|
252
|
+
if isinstance(block, ToolResult) and block.tool_use_id == "delayed_tool":
|
|
253
|
+
final_tool_results.append(block)
|
|
254
|
+
|
|
255
|
+
# We expect only one result - the real one replaced the dummy
|
|
256
|
+
assert len(final_tool_results) == 1
|
|
257
|
+
assert final_tool_results[0].content == "Finally completed!"
|
|
258
|
+
assert final_tool_results[0].is_error is False
|
|
259
|
+
|
|
260
|
+
def test_no_duplicate_when_all_results_provided_immediately(self, base_agent):
|
|
261
|
+
"""Test that no duplicates are created when all results are provided immediately."""
|
|
262
|
+
# Set up history with tool calls
|
|
263
|
+
base_agent.history.extend(
|
|
264
|
+
[
|
|
265
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
266
|
+
Message(
|
|
267
|
+
role="assistant",
|
|
268
|
+
content=[
|
|
269
|
+
ToolCall(id="immediate_1", name="tool1", input={}),
|
|
270
|
+
ToolCall(id="immediate_2", name="tool2", input={}),
|
|
271
|
+
],
|
|
272
|
+
),
|
|
273
|
+
]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Append all results immediately
|
|
277
|
+
results = [
|
|
278
|
+
ToolResult(tool_use_id="immediate_1", name="tool1", content="Result 1", is_error=False),
|
|
279
|
+
ToolResult(tool_use_id="immediate_2", name="tool2", content="Result 2", is_error=False),
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
base_agent.append_user_message(results)
|
|
283
|
+
|
|
284
|
+
# Check that we have exactly the expected results with no duplicates
|
|
285
|
+
tool_results = []
|
|
286
|
+
for msg in base_agent.history:
|
|
287
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
288
|
+
for block in msg.content:
|
|
289
|
+
if isinstance(block, ToolResult):
|
|
290
|
+
tool_results.append(block)
|
|
291
|
+
|
|
292
|
+
assert len(tool_results) == 2
|
|
293
|
+
tool_ids = {r.tool_use_id for r in tool_results}
|
|
294
|
+
assert tool_ids == {"immediate_1", "immediate_2"}
|
|
295
|
+
|
|
296
|
+
def test_real_error_result_not_replaced(self, base_agent):
|
|
297
|
+
"""Test that real error results are replaced by success results."""
|
|
298
|
+
# Set up history with tool call
|
|
299
|
+
base_agent.history.extend(
|
|
300
|
+
[
|
|
301
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
302
|
+
Message(role="assistant", content=[ToolCall(id="error_tool", name="failing_tool", input={})]),
|
|
303
|
+
]
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Append a real error result (not a dummy)
|
|
307
|
+
real_error = ToolResult(
|
|
308
|
+
tool_use_id="error_tool",
|
|
309
|
+
name="failing_tool",
|
|
310
|
+
content="FileNotFoundError: The file does not exist",
|
|
311
|
+
is_error=True,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
base_agent.append_user_message([real_error])
|
|
315
|
+
|
|
316
|
+
# Verify we have the error
|
|
317
|
+
tool_results = []
|
|
318
|
+
for msg in base_agent.history:
|
|
319
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
320
|
+
for block in msg.content:
|
|
321
|
+
if isinstance(block, ToolResult) and block.tool_use_id == "error_tool":
|
|
322
|
+
tool_results.append(block)
|
|
323
|
+
|
|
324
|
+
assert len(tool_results) == 1
|
|
325
|
+
assert tool_results[0].is_error is True
|
|
326
|
+
|
|
327
|
+
# Add an assistant response
|
|
328
|
+
base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Let me try again")]))
|
|
329
|
+
|
|
330
|
+
# Try to append a success result for the same tool
|
|
331
|
+
success_result = ToolResult(
|
|
332
|
+
tool_use_id="error_tool", name="failing_tool", content="Success after retry", is_error=False
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
base_agent.append_user_message([success_result])
|
|
336
|
+
|
|
337
|
+
# Should have only the success result - real error was replaced by success
|
|
338
|
+
final_tool_results = []
|
|
339
|
+
for msg in base_agent.history:
|
|
340
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
341
|
+
for block in msg.content:
|
|
342
|
+
if isinstance(block, ToolResult) and block.tool_use_id == "error_tool":
|
|
343
|
+
final_tool_results.append(block)
|
|
344
|
+
|
|
345
|
+
# We expect only one result - the success replaced the error
|
|
346
|
+
assert len(final_tool_results) == 1
|
|
347
|
+
assert final_tool_results[0].content == "Success after retry"
|
|
348
|
+
assert final_tool_results[0].is_error is False
|
|
349
|
+
|
|
350
|
+
def test_duplicate_success_results_prevented(self, base_agent):
|
|
351
|
+
"""Test that duplicate success results for the same tool ID are prevented."""
|
|
352
|
+
# Set up history with tool call
|
|
353
|
+
base_agent.history.extend(
|
|
354
|
+
[
|
|
355
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
356
|
+
Message(role="assistant", content=[ToolCall(id="success_tool", name="test_tool", input={})]),
|
|
357
|
+
]
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Append first success result
|
|
361
|
+
first_success = ToolResult(
|
|
362
|
+
tool_use_id="success_tool", name="test_tool", content="First successful execution", is_error=False
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
base_agent.append_user_message([first_success])
|
|
366
|
+
|
|
367
|
+
# Add assistant response
|
|
368
|
+
base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Continuing...")]))
|
|
369
|
+
|
|
370
|
+
# Try to append another success result for the same tool
|
|
371
|
+
second_success = ToolResult(
|
|
372
|
+
tool_use_id="success_tool", name="test_tool", content="Second successful execution", is_error=False
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
base_agent.append_user_message([second_success])
|
|
376
|
+
|
|
377
|
+
# Should have only one result - duplicates are prevented
|
|
378
|
+
tool_results = []
|
|
379
|
+
for msg in base_agent.history:
|
|
380
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
381
|
+
for block in msg.content:
|
|
382
|
+
if isinstance(block, ToolResult) and block.tool_use_id == "success_tool":
|
|
383
|
+
tool_results.append(block)
|
|
384
|
+
|
|
385
|
+
# We expect only one result - the first one is kept
|
|
386
|
+
assert len(tool_results) == 1
|
|
387
|
+
assert tool_results[0].content == "First successful execution"
|
|
388
|
+
assert tool_results[0].is_error is False
|
|
389
|
+
|
|
390
|
+
def test_cross_message_tool_results_during_restoration(self, base_agent):
|
|
391
|
+
"""Test that tool results found in non-adjacent messages are handled correctly during restoration."""
|
|
392
|
+
# Create a scenario where tool result is not in the immediately following message
|
|
393
|
+
messages = [
|
|
394
|
+
Message(
|
|
395
|
+
role="assistant",
|
|
396
|
+
content=[
|
|
397
|
+
TextBlock(text="I'll check that file."),
|
|
398
|
+
ToolCall(id="toolu_test123", name="read_file", input={"path": "test.py"}),
|
|
399
|
+
],
|
|
400
|
+
),
|
|
401
|
+
# This message is between the tool call and its result
|
|
402
|
+
Message(role="user", content=[TextBlock(text="Please hurry up!")]),
|
|
403
|
+
# Tool result appears here instead of immediately after tool call
|
|
404
|
+
Message(
|
|
405
|
+
role="user",
|
|
406
|
+
content=[
|
|
407
|
+
ToolResult(
|
|
408
|
+
tool_use_id="toolu_test123",
|
|
409
|
+
content="File contents: print('hello')",
|
|
410
|
+
name="read_file",
|
|
411
|
+
is_error=False,
|
|
412
|
+
)
|
|
413
|
+
],
|
|
414
|
+
),
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
# Test fix_incomplete_tool_calls
|
|
418
|
+
fixed_messages = base_agent.fix_incomplete_tool_calls(messages)
|
|
419
|
+
|
|
420
|
+
# Should have 3 messages: assistant with tool call, user with tool result, user with text
|
|
421
|
+
assert len(fixed_messages) == 3
|
|
422
|
+
|
|
423
|
+
# First message should be the assistant message
|
|
424
|
+
assert fixed_messages[0].role == "assistant"
|
|
425
|
+
assert any(isinstance(block, ToolCall) for block in fixed_messages[0].content)
|
|
426
|
+
|
|
427
|
+
# Second message should have the tool result (moved to correct position)
|
|
428
|
+
assert fixed_messages[1].role == "user"
|
|
429
|
+
tool_results = [block for block in fixed_messages[1].content if isinstance(block, ToolResult)]
|
|
430
|
+
assert len(tool_results) == 1
|
|
431
|
+
assert tool_results[0].tool_use_id == "toolu_test123"
|
|
432
|
+
assert tool_results[0].content == "File contents: print('hello')"
|
|
433
|
+
|
|
434
|
+
# Third message should be the user text message
|
|
435
|
+
assert fixed_messages[2].role == "user"
|
|
436
|
+
assert fixed_messages[2].content[0].text == "Please hurry up!"
|
|
437
|
+
|
|
438
|
+
# Verify no duplicate tool results
|
|
439
|
+
all_tool_results = []
|
|
440
|
+
for msg in fixed_messages:
|
|
441
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
442
|
+
all_tool_results.extend([block for block in msg.content if isinstance(block, ToolResult)])
|
|
443
|
+
|
|
444
|
+
# Should only have one tool result total
|
|
445
|
+
assert len(all_tool_results) == 1
|
|
446
|
+
|
|
447
|
+
def test_multiple_tool_calls_with_scattered_results(self, base_agent):
|
|
448
|
+
"""Test handling multiple tool calls where results are scattered across messages."""
|
|
449
|
+
messages = [
|
|
450
|
+
Message(
|
|
451
|
+
role="assistant",
|
|
452
|
+
content=[
|
|
453
|
+
TextBlock(text="I'll check both files."),
|
|
454
|
+
ToolCall(id="toolu_001", name="read_file", input={"path": "file1.py"}),
|
|
455
|
+
ToolCall(id="toolu_002", name="read_file", input={"path": "file2.py"}),
|
|
456
|
+
],
|
|
457
|
+
),
|
|
458
|
+
# Only one result in the next message
|
|
459
|
+
Message(
|
|
460
|
+
role="user",
|
|
461
|
+
content=[
|
|
462
|
+
ToolResult(tool_use_id="toolu_001", content="File 1 contents", name="read_file", is_error=False)
|
|
463
|
+
],
|
|
464
|
+
),
|
|
465
|
+
# Other user activity
|
|
466
|
+
Message(role="user", content=[TextBlock(text="Also check file2")]),
|
|
467
|
+
# Second result appears later
|
|
468
|
+
Message(
|
|
469
|
+
role="user",
|
|
470
|
+
content=[
|
|
471
|
+
ToolResult(tool_use_id="toolu_002", content="File 2 contents", name="read_file", is_error=False)
|
|
472
|
+
],
|
|
473
|
+
),
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
fixed_messages = base_agent.fix_incomplete_tool_calls(messages)
|
|
477
|
+
|
|
478
|
+
# First message: assistant with tool calls
|
|
479
|
+
assert fixed_messages[0].role == "assistant"
|
|
480
|
+
|
|
481
|
+
# Second message: should have BOTH tool results
|
|
482
|
+
assert fixed_messages[1].role == "user"
|
|
483
|
+
tool_results = [block for block in fixed_messages[1].content if isinstance(block, ToolResult)]
|
|
484
|
+
assert len(tool_results) == 2
|
|
485
|
+
|
|
486
|
+
result_ids = {r.tool_use_id for r in tool_results}
|
|
487
|
+
assert result_ids == {"toolu_001", "toolu_002"}
|
|
488
|
+
|
|
489
|
+
# Remaining messages
|
|
490
|
+
remaining_messages = fixed_messages[2:]
|
|
491
|
+
for msg in remaining_messages:
|
|
492
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
493
|
+
# No tool results should remain in other messages
|
|
494
|
+
tool_results_in_msg = [block for block in msg.content if isinstance(block, ToolResult)]
|
|
495
|
+
assert len(tool_results_in_msg) == 0
|
|
496
|
+
|
|
497
|
+
def test_no_dummy_creation_when_providing_all_results(self, base_agent, monkeypatch):
|
|
498
|
+
"""Test that dummy results are not created when all tool results are provided immediately."""
|
|
499
|
+
# Track calls to fix_incomplete_tool_calls
|
|
500
|
+
fix_calls = []
|
|
501
|
+
original_fix = base_agent.fix_incomplete_tool_calls
|
|
502
|
+
|
|
503
|
+
def mock_fix(messages):
|
|
504
|
+
fix_calls.append(True)
|
|
505
|
+
return original_fix(messages)
|
|
506
|
+
|
|
507
|
+
monkeypatch.setattr(base_agent, "fix_incomplete_tool_calls", mock_fix)
|
|
508
|
+
|
|
509
|
+
# Set up history with tool calls
|
|
510
|
+
base_agent.history.extend(
|
|
511
|
+
[
|
|
512
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
513
|
+
Message(
|
|
514
|
+
role="assistant",
|
|
515
|
+
content=[
|
|
516
|
+
ToolCall(id="tool_1", name="tool1", input={}),
|
|
517
|
+
ToolCall(id="tool_2", name="tool2", input={}),
|
|
518
|
+
],
|
|
519
|
+
),
|
|
520
|
+
]
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
# Append all results immediately - this should NOT trigger fix_incomplete_tool_calls
|
|
524
|
+
results = [
|
|
525
|
+
ToolResult(tool_use_id="tool_1", name="tool1", content="Result 1", is_error=False),
|
|
526
|
+
ToolResult(tool_use_id="tool_2", name="tool2", content="Result 2", is_error=False),
|
|
527
|
+
]
|
|
528
|
+
|
|
529
|
+
base_agent.append_user_message(results)
|
|
530
|
+
|
|
531
|
+
# Verify fix_incomplete_tool_calls was NOT called
|
|
532
|
+
assert len(fix_calls) == 0, "fix_incomplete_tool_calls should not be called when all results are provided"
|
|
533
|
+
|
|
534
|
+
# Verify the results were added correctly
|
|
535
|
+
tool_results = []
|
|
536
|
+
for msg in base_agent.history:
|
|
537
|
+
if msg.role == "user" and isinstance(msg.content, list):
|
|
538
|
+
for block in msg.content:
|
|
539
|
+
if isinstance(block, ToolResult):
|
|
540
|
+
tool_results.append(block)
|
|
541
|
+
|
|
542
|
+
assert len(tool_results) == 2
|
|
543
|
+
tool_ids = {r.tool_use_id for r in tool_results}
|
|
544
|
+
assert tool_ids == {"tool_1", "tool_2"}
|
|
545
|
+
|
|
546
|
+
# Note: When partial tool results are provided, the current implementation still creates
|
|
547
|
+
# dummy results for ALL missing tool calls first, then immediately replaces the ones we
|
|
548
|
+
# have results for. This results in log messages like:
|
|
549
|
+
# - "Adding placeholder result for missing tool call: X"
|
|
550
|
+
# - "Replaced tool result for tool_use_id: X"
|
|
551
|
+
# This is an acceptable trade-off for the simplicity of the implementation, and it only
|
|
552
|
+
# happens when some (but not all) tool results are provided, which is an edge case.
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
if __name__ == "__main__":
|
|
556
|
+
pytest.main([__file__, "-v"])
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Tests for Conversation empty message handling."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from kolega_code.agent.conversation import Conversation
|
|
6
|
+
from kolega_code.llm.models import Message, TextBlock
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_append_assistant_message_with_empty_content(caplog):
|
|
10
|
+
"""Empty assistant messages get placeholder text."""
|
|
11
|
+
conversation = Conversation()
|
|
12
|
+
|
|
13
|
+
empty_message = Message(role="assistant", content=[])
|
|
14
|
+
|
|
15
|
+
with caplog.at_level(logging.WARNING, logger="kolega_code.agent.conversation"):
|
|
16
|
+
conversation.append_assistant(empty_message)
|
|
17
|
+
|
|
18
|
+
assert "Assistant message has empty content" in caplog.text
|
|
19
|
+
|
|
20
|
+
assert len(conversation.history) == 1
|
|
21
|
+
appended_msg = conversation.history[0]
|
|
22
|
+
assert appended_msg.role == "assistant"
|
|
23
|
+
assert len(appended_msg.content) == 1
|
|
24
|
+
assert appended_msg.content[0].text == "[Assistant returned no message content]"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_append_user_message_with_empty_content(caplog):
|
|
28
|
+
"""Empty user messages get placeholder text."""
|
|
29
|
+
conversation = Conversation()
|
|
30
|
+
|
|
31
|
+
with caplog.at_level(logging.WARNING, logger="kolega_code.agent.conversation"):
|
|
32
|
+
conversation.append_user([])
|
|
33
|
+
|
|
34
|
+
assert "User message has empty content" in caplog.text
|
|
35
|
+
|
|
36
|
+
assert len(conversation.history) == 1
|
|
37
|
+
appended_msg = conversation.history[0]
|
|
38
|
+
assert appended_msg.role == "user"
|
|
39
|
+
assert appended_msg.content[0].text == "[User provided no message content]"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_append_assistant_message_with_content_is_untouched():
|
|
43
|
+
conversation = Conversation()
|
|
44
|
+
|
|
45
|
+
message = Message(role="assistant", content=[TextBlock(text="hello")])
|
|
46
|
+
conversation.append_assistant(message)
|
|
47
|
+
|
|
48
|
+
assert conversation.history[-1] is message
|