kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. kolega_code/__init__.py +151 -0
  2. kolega_code/agent/__init__.py +42 -0
  3. kolega_code/agent/baseagent.py +998 -0
  4. kolega_code/agent/browseragent.py +123 -0
  5. kolega_code/agent/coder.py +157 -0
  6. kolega_code/agent/common.py +41 -0
  7. kolega_code/agent/compression.py +81 -0
  8. kolega_code/agent/context.py +112 -0
  9. kolega_code/agent/conversation.py +408 -0
  10. kolega_code/agent/generalagent.py +146 -0
  11. kolega_code/agent/investigationagent.py +123 -0
  12. kolega_code/agent/planningagent.py +187 -0
  13. kolega_code/agent/prompt_provider.py +196 -0
  14. kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
  15. kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
  16. kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
  17. kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
  18. kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
  19. kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
  20. kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
  21. kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
  22. kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
  23. kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
  24. kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
  25. kolega_code/agent/prompts.py +192 -0
  26. kolega_code/agent/tests/__init__.py +0 -0
  27. kolega_code/agent/tests/llm/__init__.py +0 -0
  28. kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
  29. kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
  30. kolega_code/agent/tests/llm/test_client.py +773 -0
  31. kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
  32. kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
  33. kolega_code/agent/tests/llm/test_exceptions.py +249 -0
  34. kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
  35. kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
  36. kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
  37. kolega_code/agent/tests/llm/test_model_specs.py +17 -0
  38. kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
  39. kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
  40. kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
  41. kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
  42. kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
  43. kolega_code/agent/tests/services/__init__.py +1 -0
  44. kolega_code/agent/tests/services/test_browser.py +447 -0
  45. kolega_code/agent/tests/services/test_browser_parity.py +353 -0
  46. kolega_code/agent/tests/services/test_file_system.py +699 -0
  47. kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
  48. kolega_code/agent/tests/services/test_terminal.py +154 -0
  49. kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
  50. kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
  51. kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
  52. kolega_code/agent/tests/test_base_agent.py +1942 -0
  53. kolega_code/agent/tests/test_coder_attachments.py +330 -0
  54. kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
  55. kolega_code/agent/tests/test_commands.py +179 -0
  56. kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
  57. kolega_code/agent/tests/test_empty_message_handling.py +48 -0
  58. kolega_code/agent/tests/test_general_agent.py +242 -0
  59. kolega_code/agent/tests/test_html.py +320 -0
  60. kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
  61. kolega_code/agent/tests/test_planning_agent.py +227 -0
  62. kolega_code/agent/tests/test_prompt_provider.py +271 -0
  63. kolega_code/agent/tests/test_tool_registry.py +102 -0
  64. kolega_code/agent/tests/test_tools.py +549 -0
  65. kolega_code/agent/tests/tool_backend/__init__.py +0 -0
  66. kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
  67. kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
  68. kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
  69. kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
  70. kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
  71. kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
  72. kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
  73. kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
  74. kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
  75. kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
  76. kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
  77. kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
  78. kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
  79. kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
  80. kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
  81. kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
  82. kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
  83. kolega_code/agent/tool_backend/agent_tool.py +414 -0
  84. kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
  85. kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
  86. kolega_code/agent/tool_backend/base_tool.py +217 -0
  87. kolega_code/agent/tool_backend/browser_tool.py +271 -0
  88. kolega_code/agent/tool_backend/build_tool.py +93 -0
  89. kolega_code/agent/tool_backend/create_file_tool.py +52 -0
  90. kolega_code/agent/tool_backend/glob_tool.py +323 -0
  91. kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
  92. kolega_code/agent/tool_backend/memory_tool.py +79 -0
  93. kolega_code/agent/tool_backend/read_file_tool.py +119 -0
  94. kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
  95. kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
  96. kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
  97. kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
  98. kolega_code/agent/tool_backend/streaming_tool.py +47 -0
  99. kolega_code/agent/tool_backend/terminal_tool.py +643 -0
  100. kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
  101. kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
  102. kolega_code/agent/tools.py +1704 -0
  103. kolega_code/agent/utils/commands.py +94 -0
  104. kolega_code/cli/__init__.py +1 -0
  105. kolega_code/cli/app.py +2756 -0
  106. kolega_code/cli/config.py +280 -0
  107. kolega_code/cli/connection.py +49 -0
  108. kolega_code/cli/file_index.py +147 -0
  109. kolega_code/cli/main.py +564 -0
  110. kolega_code/cli/mentions.py +155 -0
  111. kolega_code/cli/messages.py +89 -0
  112. kolega_code/cli/provider_registry.py +96 -0
  113. kolega_code/cli/session_store.py +207 -0
  114. kolega_code/cli/settings.py +87 -0
  115. kolega_code/cli/skills.py +409 -0
  116. kolega_code/cli/slash_commands.py +108 -0
  117. kolega_code/cli/tests/__init__.py +1 -0
  118. kolega_code/cli/tests/test_app.py +4251 -0
  119. kolega_code/cli/tests/test_cli_config.py +171 -0
  120. kolega_code/cli/tests/test_connection.py +26 -0
  121. kolega_code/cli/tests/test_file_index.py +103 -0
  122. kolega_code/cli/tests/test_main.py +455 -0
  123. kolega_code/cli/tests/test_mentions.py +108 -0
  124. kolega_code/cli/tests/test_session_store.py +67 -0
  125. kolega_code/cli/tests/test_settings.py +62 -0
  126. kolega_code/cli/tests/test_skills.py +157 -0
  127. kolega_code/cli/tests/test_slash_commands.py +88 -0
  128. kolega_code/cli/theme.py +180 -0
  129. kolega_code/config.py +154 -0
  130. kolega_code/events.py +202 -0
  131. kolega_code/llm/client.py +300 -0
  132. kolega_code/llm/exceptions.py +285 -0
  133. kolega_code/llm/instrumented_client.py +520 -0
  134. kolega_code/llm/models.py +1368 -0
  135. kolega_code/llm/providers/__init__.py +0 -0
  136. kolega_code/llm/providers/anthropic.py +387 -0
  137. kolega_code/llm/providers/base.py +71 -0
  138. kolega_code/llm/providers/google.py +157 -0
  139. kolega_code/llm/providers/models.py +37 -0
  140. kolega_code/llm/providers/openai.py +363 -0
  141. kolega_code/llm/ratelimit.py +40 -0
  142. kolega_code/llm/specs.py +67 -0
  143. kolega_code/llm/tool_execution_ids.py +18 -0
  144. kolega_code/models/__init__.py +9 -0
  145. kolega_code/models/sandbox_terminal_state.py +47 -0
  146. kolega_code/runtime.py +50 -0
  147. kolega_code/sandbox/README.md +200 -0
  148. kolega_code/sandbox/__init__.py +21 -0
  149. kolega_code/sandbox/async_filesystem.py +475 -0
  150. kolega_code/sandbox/base.py +297 -0
  151. kolega_code/sandbox/browser.py +25 -0
  152. kolega_code/sandbox/event_loop.py +43 -0
  153. kolega_code/sandbox/filesystem.py +341 -0
  154. kolega_code/sandbox/local.py +118 -0
  155. kolega_code/sandbox/serializer.py +175 -0
  156. kolega_code/sandbox/terminal.py +868 -0
  157. kolega_code/sandbox/utils.py +216 -0
  158. kolega_code/services/base.py +255 -0
  159. kolega_code/services/browser.py +444 -0
  160. kolega_code/services/file_system.py +749 -0
  161. kolega_code/services/html.py +221 -0
  162. kolega_code/services/terminal.py +903 -0
  163. kolega_code/tools/__init__.py +22 -0
  164. kolega_code/tools/core.py +33 -0
  165. kolega_code/tools/definitions.py +81 -0
  166. kolega_code/tools/registry.py +73 -0
  167. kolega_code-0.1.0.dist-info/METADATA +157 -0
  168. kolega_code-0.1.0.dist-info/RECORD +171 -0
  169. kolega_code-0.1.0.dist-info/WHEEL +4 -0
  170. kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
  171. kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,556 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test duplicate tool result prevention logic.
4
+ """
5
+
6
+ import pytest
7
+ from unittest.mock import Mock, patch, MagicMock
8
+ from kolega_code.agent.baseagent import BaseAgent
9
+ from kolega_code.llm.models import Message, TextBlock, ToolCall, ToolResult
10
+ from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
11
+ from kolega_code.services.file_system import LocalFileSystem
12
+
13
+
14
+ class TestDuplicateToolResultPrevention:
15
+ """Test that duplicate tool results are properly handled."""
16
+
17
+ @pytest.fixture
18
+ def base_agent(self):
19
+ """Create a base agent instance for testing."""
20
+ config = AgentConfig(
21
+ anthropic_api_key="test-key",
22
+ openai_api_key="test-key",
23
+ long_context_config=ModelConfig(
24
+ provider=ModelProvider.ANTHROPIC, model="test-model", rate_limits=RateLimitConfig()
25
+ ),
26
+ fast_config=ModelConfig(
27
+ provider=ModelProvider.ANTHROPIC, model="test-model", rate_limits=RateLimitConfig()
28
+ ),
29
+ thinking_config=ModelConfig(
30
+ provider=ModelProvider.ANTHROPIC,
31
+ model="test-model",
32
+ rate_limits=RateLimitConfig(),
33
+ thinking_tokens=1024,
34
+ ),
35
+ )
36
+
37
+ with patch("kolega_code.agent.baseagent.AgentConnectionManager"), patch(
38
+ "kolega_code.agent.baseagent.get_model_specs"
39
+ ) as mock_get_model_specs, patch("kolega_code.agent.context.LocalTerminalManager"), patch(
40
+ "kolega_code.agent.context.PlaywrightBrowserManager"
41
+ ), patch(
42
+ "kolega_code.agent.context.LLMClient"
43
+ ), patch(
44
+ "kolega_code.agent.baseagent.ToolCollection"
45
+ ), patch(
46
+ "kolega_code.agent.context.LocalFileSystem"
47
+ ) as mock_filesystem_class:
48
+
49
+ # Mock get_model_specs to return reasonable values
50
+ mock_get_model_specs.return_value = {"context_length": 100000, "max_completion_tokens": 4096}
51
+
52
+ # Create mock filesystem instance
53
+ mock_filesystem = Mock()
54
+ mock_filesystem.exists.return_value = True
55
+ mock_filesystem.is_dir.return_value = True
56
+ mock_filesystem_class.return_value = mock_filesystem
57
+
58
+ agent = BaseAgent(
59
+ project_path="/test/path",
60
+ workspace_id="test-workspace",
61
+ thread_id="test-thread",
62
+ connection_manager=Mock(),
63
+ config=config,
64
+ )
65
+ agent.llm = Mock()
66
+ return agent
67
+
68
+ def test_replace_dummy_result_with_real_result(self, base_agent):
69
+ """Test that dummy 'Operation was interrupted' results are replaced with real results."""
70
+ # Set up history with tool call
71
+ base_agent.history.extend(
72
+ [
73
+ Message(role="user", content=[TextBlock(text="Do something")]),
74
+ Message(role="assistant", content=[ToolCall(id="tool_123", name="test_tool", input={})]),
75
+ ]
76
+ )
77
+
78
+ # The append_user_message will trigger fix_incomplete_tool_calls
79
+ # which adds a dummy result, then the real result should replace it
80
+ real_result = ToolResult(
81
+ tool_use_id="tool_123", name="test_tool", content="Real tool execution result", is_error=False
82
+ )
83
+
84
+ base_agent.append_user_message([real_result])
85
+
86
+ # Check that we have exactly one user message with the real result
87
+ user_messages = [msg for msg in base_agent.history if msg.role == "user"]
88
+ assert len(user_messages) == 2 # Original user message + tool result message
89
+
90
+ # Check the tool result message
91
+ tool_result_msg = user_messages[-1]
92
+ assert len(tool_result_msg.content) == 1
93
+ assert tool_result_msg.content[0].tool_use_id == "tool_123"
94
+ assert tool_result_msg.content[0].content == "Real tool execution result"
95
+ assert tool_result_msg.content[0].is_error is False
96
+
97
+ def test_multiple_tool_calls_with_partial_real_results(self, base_agent):
98
+ """Test handling multiple tool calls where some have real results and others are interrupted."""
99
+ # Set up history with multiple tool calls
100
+ base_agent.history.extend(
101
+ [
102
+ Message(role="user", content=[TextBlock(text="Do multiple things")]),
103
+ Message(
104
+ role="assistant",
105
+ content=[
106
+ ToolCall(id="tool_1", name="tool1", input={}),
107
+ ToolCall(id="tool_2", name="tool2", input={}),
108
+ ToolCall(id="tool_3", name="tool3", input={}),
109
+ ],
110
+ ),
111
+ ]
112
+ )
113
+
114
+ # Append real results for tool_1 and tool_3, but tool_2 was interrupted
115
+ real_results = [
116
+ ToolResult(tool_use_id="tool_1", name="tool1", content="Tool 1 completed successfully", is_error=False),
117
+ ToolResult(tool_use_id="tool_3", name="tool3", content="Tool 3 completed successfully", is_error=False),
118
+ ]
119
+
120
+ base_agent.append_user_message(real_results)
121
+
122
+ # History is now invalid - append doesn't fix
123
+ assert not base_agent._is_history_valid_for_anthropic()
124
+
125
+ # Check that we have only the provided results
126
+ user_messages = [msg for msg in base_agent.history if msg.role == "user"]
127
+ tool_result_msg = user_messages[-1]
128
+
129
+ # Should have only 2 results (what was provided)
130
+ assert len(tool_result_msg.content) == 2
131
+
132
+ # But when we fix the history, it should have all 3
133
+ fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
134
+
135
+ # Find the tool result message in fixed history
136
+ fixed_user_messages = [msg for msg in fixed_history if msg.role == "user"]
137
+ fixed_tool_result_msg = fixed_user_messages[-1]
138
+
139
+ # Should have 3 results total: 2 real + 1 dummy for tool_2
140
+ assert len(fixed_tool_result_msg.content) == 3
141
+
142
+ # Check each tool result
143
+ tool_results_by_id = {r.tool_use_id: r for r in fixed_tool_result_msg.content}
144
+
145
+ # tool_1 should have real result
146
+ assert tool_results_by_id["tool_1"].content == "Tool 1 completed successfully"
147
+ assert tool_results_by_id["tool_1"].is_error is False
148
+
149
+ # tool_2 should have dummy result
150
+ assert "Operation was interrupted" in tool_results_by_id["tool_2"].content
151
+ assert tool_results_by_id["tool_2"].is_error is True
152
+
153
+ # tool_3 should have real result
154
+ assert tool_results_by_id["tool_3"].content == "Tool 3 completed successfully"
155
+ assert tool_results_by_id["tool_3"].is_error is False
156
+
157
+ def test_immediate_real_result_replaces_dummy_same_operation(self, base_agent):
158
+ """Test that real results replace dummies when appended in the same operation that creates the dummy."""
159
+ # Set up history with tool call
160
+ base_agent.history.extend(
161
+ [
162
+ Message(role="user", content=[TextBlock(text="Do something")]),
163
+ Message(role="assistant", content=[ToolCall(id="immediate_tool", name="test_tool", input={})]),
164
+ ]
165
+ )
166
+
167
+ # This simulates the actual flow where:
168
+ # 1. Assistant message with tool call exists
169
+ # 2. Tool execution completes (possibly after brief interruption)
170
+ # 3. append_user_message is called with the real result
171
+ # 4. _needs_tool_call_fix() returns True, so dummy is created
172
+ # 5. But then the real result replaces the dummy
173
+
174
+ real_result = ToolResult(
175
+ tool_use_id="immediate_tool", name="test_tool", content="Real execution result", is_error=False
176
+ )
177
+
178
+ # This single append will:
179
+ # 1. Detect incomplete tool calls and add dummy
180
+ # 2. Replace the dummy with the real result
181
+ base_agent.append_user_message([real_result])
182
+
183
+ # Verify only the real result exists, not the dummy
184
+ tool_results = []
185
+ for msg in base_agent.history:
186
+ if msg.role == "user" and isinstance(msg.content, list):
187
+ for block in msg.content:
188
+ if isinstance(block, ToolResult):
189
+ tool_results.append(block)
190
+
191
+ assert len(tool_results) == 1
192
+ assert tool_results[0].tool_use_id == "immediate_tool"
193
+ assert tool_results[0].content == "Real execution result"
194
+ assert tool_results[0].is_error is False
195
+ # Ensure it's not the dummy
196
+ assert "Operation was interrupted" not in tool_results[0].content
197
+
198
+ def test_delayed_real_result_replaces_dummy(self, base_agent):
199
+ """Test that a delayed real result replaces a previously added dummy result."""
200
+ # Set up history with tool call
201
+ base_agent.history.extend(
202
+ [
203
+ Message(role="user", content=[TextBlock(text="Do something")]),
204
+ Message(role="assistant", content=[ToolCall(id="delayed_tool", name="slow_tool", input={})]),
205
+ ]
206
+ )
207
+
208
+ # Append a text message - history remains invalid
209
+ base_agent.append_user_message([TextBlock(text="Status check")])
210
+
211
+ # Verify history is invalid (no dummy was created)
212
+ assert not base_agent._is_history_valid_for_anthropic()
213
+
214
+ # Remove the status check message to prepare for the real tool result
215
+ base_agent.history = base_agent.history[:-1]
216
+
217
+ # Manually fix the history to simulate what would happen before sending to LLM
218
+ fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
219
+
220
+ # Verify dummy was created in the fixed history
221
+ tool_results = []
222
+ for msg in fixed_history:
223
+ if msg.role == "user" and isinstance(msg.content, list):
224
+ for block in msg.content:
225
+ if isinstance(block, ToolResult):
226
+ tool_results.append(block)
227
+
228
+ assert len(tool_results) == 1
229
+ assert tool_results[0].tool_use_id == "delayed_tool"
230
+ assert "Operation was interrupted" in tool_results[0].content
231
+ assert tool_results[0].is_error is True
232
+
233
+ # Now replace the history with the fixed version to simulate a real scenario
234
+ base_agent.history = fixed_history
235
+
236
+ # Add another assistant message (simulating continued conversation)
237
+ base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Let me continue...")]))
238
+
239
+ # The real result arrives late
240
+ real_result = ToolResult(
241
+ tool_use_id="delayed_tool", name="slow_tool", content="Finally completed!", is_error=False
242
+ )
243
+
244
+ base_agent.append_user_message([real_result])
245
+
246
+ # Our implementation correctly replaces the dummy with the real result
247
+ # even across different messages, ensuring only one result per tool_use_id
248
+ final_tool_results = []
249
+ for msg in base_agent.history:
250
+ if msg.role == "user" and isinstance(msg.content, list):
251
+ for block in msg.content:
252
+ if isinstance(block, ToolResult) and block.tool_use_id == "delayed_tool":
253
+ final_tool_results.append(block)
254
+
255
+ # We expect only one result - the real one replaced the dummy
256
+ assert len(final_tool_results) == 1
257
+ assert final_tool_results[0].content == "Finally completed!"
258
+ assert final_tool_results[0].is_error is False
259
+
260
+ def test_no_duplicate_when_all_results_provided_immediately(self, base_agent):
261
+ """Test that no duplicates are created when all results are provided immediately."""
262
+ # Set up history with tool calls
263
+ base_agent.history.extend(
264
+ [
265
+ Message(role="user", content=[TextBlock(text="Do something")]),
266
+ Message(
267
+ role="assistant",
268
+ content=[
269
+ ToolCall(id="immediate_1", name="tool1", input={}),
270
+ ToolCall(id="immediate_2", name="tool2", input={}),
271
+ ],
272
+ ),
273
+ ]
274
+ )
275
+
276
+ # Append all results immediately
277
+ results = [
278
+ ToolResult(tool_use_id="immediate_1", name="tool1", content="Result 1", is_error=False),
279
+ ToolResult(tool_use_id="immediate_2", name="tool2", content="Result 2", is_error=False),
280
+ ]
281
+
282
+ base_agent.append_user_message(results)
283
+
284
+ # Check that we have exactly the expected results with no duplicates
285
+ tool_results = []
286
+ for msg in base_agent.history:
287
+ if msg.role == "user" and isinstance(msg.content, list):
288
+ for block in msg.content:
289
+ if isinstance(block, ToolResult):
290
+ tool_results.append(block)
291
+
292
+ assert len(tool_results) == 2
293
+ tool_ids = {r.tool_use_id for r in tool_results}
294
+ assert tool_ids == {"immediate_1", "immediate_2"}
295
+
296
+ def test_real_error_result_not_replaced(self, base_agent):
297
+ """Test that real error results are replaced by success results."""
298
+ # Set up history with tool call
299
+ base_agent.history.extend(
300
+ [
301
+ Message(role="user", content=[TextBlock(text="Do something")]),
302
+ Message(role="assistant", content=[ToolCall(id="error_tool", name="failing_tool", input={})]),
303
+ ]
304
+ )
305
+
306
+ # Append a real error result (not a dummy)
307
+ real_error = ToolResult(
308
+ tool_use_id="error_tool",
309
+ name="failing_tool",
310
+ content="FileNotFoundError: The file does not exist",
311
+ is_error=True,
312
+ )
313
+
314
+ base_agent.append_user_message([real_error])
315
+
316
+ # Verify we have the error
317
+ tool_results = []
318
+ for msg in base_agent.history:
319
+ if msg.role == "user" and isinstance(msg.content, list):
320
+ for block in msg.content:
321
+ if isinstance(block, ToolResult) and block.tool_use_id == "error_tool":
322
+ tool_results.append(block)
323
+
324
+ assert len(tool_results) == 1
325
+ assert tool_results[0].is_error is True
326
+
327
+ # Add an assistant response
328
+ base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Let me try again")]))
329
+
330
+ # Try to append a success result for the same tool
331
+ success_result = ToolResult(
332
+ tool_use_id="error_tool", name="failing_tool", content="Success after retry", is_error=False
333
+ )
334
+
335
+ base_agent.append_user_message([success_result])
336
+
337
+ # Should have only the success result - real error was replaced by success
338
+ final_tool_results = []
339
+ for msg in base_agent.history:
340
+ if msg.role == "user" and isinstance(msg.content, list):
341
+ for block in msg.content:
342
+ if isinstance(block, ToolResult) and block.tool_use_id == "error_tool":
343
+ final_tool_results.append(block)
344
+
345
+ # We expect only one result - the success replaced the error
346
+ assert len(final_tool_results) == 1
347
+ assert final_tool_results[0].content == "Success after retry"
348
+ assert final_tool_results[0].is_error is False
349
+
350
+ def test_duplicate_success_results_prevented(self, base_agent):
351
+ """Test that duplicate success results for the same tool ID are prevented."""
352
+ # Set up history with tool call
353
+ base_agent.history.extend(
354
+ [
355
+ Message(role="user", content=[TextBlock(text="Do something")]),
356
+ Message(role="assistant", content=[ToolCall(id="success_tool", name="test_tool", input={})]),
357
+ ]
358
+ )
359
+
360
+ # Append first success result
361
+ first_success = ToolResult(
362
+ tool_use_id="success_tool", name="test_tool", content="First successful execution", is_error=False
363
+ )
364
+
365
+ base_agent.append_user_message([first_success])
366
+
367
+ # Add assistant response
368
+ base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Continuing...")]))
369
+
370
+ # Try to append another success result for the same tool
371
+ second_success = ToolResult(
372
+ tool_use_id="success_tool", name="test_tool", content="Second successful execution", is_error=False
373
+ )
374
+
375
+ base_agent.append_user_message([second_success])
376
+
377
+ # Should have only one result - duplicates are prevented
378
+ tool_results = []
379
+ for msg in base_agent.history:
380
+ if msg.role == "user" and isinstance(msg.content, list):
381
+ for block in msg.content:
382
+ if isinstance(block, ToolResult) and block.tool_use_id == "success_tool":
383
+ tool_results.append(block)
384
+
385
+ # We expect only one result - the first one is kept
386
+ assert len(tool_results) == 1
387
+ assert tool_results[0].content == "First successful execution"
388
+ assert tool_results[0].is_error is False
389
+
390
+ def test_cross_message_tool_results_during_restoration(self, base_agent):
391
+ """Test that tool results found in non-adjacent messages are handled correctly during restoration."""
392
+ # Create a scenario where tool result is not in the immediately following message
393
+ messages = [
394
+ Message(
395
+ role="assistant",
396
+ content=[
397
+ TextBlock(text="I'll check that file."),
398
+ ToolCall(id="toolu_test123", name="read_file", input={"path": "test.py"}),
399
+ ],
400
+ ),
401
+ # This message is between the tool call and its result
402
+ Message(role="user", content=[TextBlock(text="Please hurry up!")]),
403
+ # Tool result appears here instead of immediately after tool call
404
+ Message(
405
+ role="user",
406
+ content=[
407
+ ToolResult(
408
+ tool_use_id="toolu_test123",
409
+ content="File contents: print('hello')",
410
+ name="read_file",
411
+ is_error=False,
412
+ )
413
+ ],
414
+ ),
415
+ ]
416
+
417
+ # Test fix_incomplete_tool_calls
418
+ fixed_messages = base_agent.fix_incomplete_tool_calls(messages)
419
+
420
+ # Should have 3 messages: assistant with tool call, user with tool result, user with text
421
+ assert len(fixed_messages) == 3
422
+
423
+ # First message should be the assistant message
424
+ assert fixed_messages[0].role == "assistant"
425
+ assert any(isinstance(block, ToolCall) for block in fixed_messages[0].content)
426
+
427
+ # Second message should have the tool result (moved to correct position)
428
+ assert fixed_messages[1].role == "user"
429
+ tool_results = [block for block in fixed_messages[1].content if isinstance(block, ToolResult)]
430
+ assert len(tool_results) == 1
431
+ assert tool_results[0].tool_use_id == "toolu_test123"
432
+ assert tool_results[0].content == "File contents: print('hello')"
433
+
434
+ # Third message should be the user text message
435
+ assert fixed_messages[2].role == "user"
436
+ assert fixed_messages[2].content[0].text == "Please hurry up!"
437
+
438
+ # Verify no duplicate tool results
439
+ all_tool_results = []
440
+ for msg in fixed_messages:
441
+ if msg.role == "user" and isinstance(msg.content, list):
442
+ all_tool_results.extend([block for block in msg.content if isinstance(block, ToolResult)])
443
+
444
+ # Should only have one tool result total
445
+ assert len(all_tool_results) == 1
446
+
447
+ def test_multiple_tool_calls_with_scattered_results(self, base_agent):
448
+ """Test handling multiple tool calls where results are scattered across messages."""
449
+ messages = [
450
+ Message(
451
+ role="assistant",
452
+ content=[
453
+ TextBlock(text="I'll check both files."),
454
+ ToolCall(id="toolu_001", name="read_file", input={"path": "file1.py"}),
455
+ ToolCall(id="toolu_002", name="read_file", input={"path": "file2.py"}),
456
+ ],
457
+ ),
458
+ # Only one result in the next message
459
+ Message(
460
+ role="user",
461
+ content=[
462
+ ToolResult(tool_use_id="toolu_001", content="File 1 contents", name="read_file", is_error=False)
463
+ ],
464
+ ),
465
+ # Other user activity
466
+ Message(role="user", content=[TextBlock(text="Also check file2")]),
467
+ # Second result appears later
468
+ Message(
469
+ role="user",
470
+ content=[
471
+ ToolResult(tool_use_id="toolu_002", content="File 2 contents", name="read_file", is_error=False)
472
+ ],
473
+ ),
474
+ ]
475
+
476
+ fixed_messages = base_agent.fix_incomplete_tool_calls(messages)
477
+
478
+ # First message: assistant with tool calls
479
+ assert fixed_messages[0].role == "assistant"
480
+
481
+ # Second message: should have BOTH tool results
482
+ assert fixed_messages[1].role == "user"
483
+ tool_results = [block for block in fixed_messages[1].content if isinstance(block, ToolResult)]
484
+ assert len(tool_results) == 2
485
+
486
+ result_ids = {r.tool_use_id for r in tool_results}
487
+ assert result_ids == {"toolu_001", "toolu_002"}
488
+
489
+ # Remaining messages
490
+ remaining_messages = fixed_messages[2:]
491
+ for msg in remaining_messages:
492
+ if msg.role == "user" and isinstance(msg.content, list):
493
+ # No tool results should remain in other messages
494
+ tool_results_in_msg = [block for block in msg.content if isinstance(block, ToolResult)]
495
+ assert len(tool_results_in_msg) == 0
496
+
497
+ def test_no_dummy_creation_when_providing_all_results(self, base_agent, monkeypatch):
498
+ """Test that dummy results are not created when all tool results are provided immediately."""
499
+ # Track calls to fix_incomplete_tool_calls
500
+ fix_calls = []
501
+ original_fix = base_agent.fix_incomplete_tool_calls
502
+
503
+ def mock_fix(messages):
504
+ fix_calls.append(True)
505
+ return original_fix(messages)
506
+
507
+ monkeypatch.setattr(base_agent, "fix_incomplete_tool_calls", mock_fix)
508
+
509
+ # Set up history with tool calls
510
+ base_agent.history.extend(
511
+ [
512
+ Message(role="user", content=[TextBlock(text="Do something")]),
513
+ Message(
514
+ role="assistant",
515
+ content=[
516
+ ToolCall(id="tool_1", name="tool1", input={}),
517
+ ToolCall(id="tool_2", name="tool2", input={}),
518
+ ],
519
+ ),
520
+ ]
521
+ )
522
+
523
+ # Append all results immediately - this should NOT trigger fix_incomplete_tool_calls
524
+ results = [
525
+ ToolResult(tool_use_id="tool_1", name="tool1", content="Result 1", is_error=False),
526
+ ToolResult(tool_use_id="tool_2", name="tool2", content="Result 2", is_error=False),
527
+ ]
528
+
529
+ base_agent.append_user_message(results)
530
+
531
+ # Verify fix_incomplete_tool_calls was NOT called
532
+ assert len(fix_calls) == 0, "fix_incomplete_tool_calls should not be called when all results are provided"
533
+
534
+ # Verify the results were added correctly
535
+ tool_results = []
536
+ for msg in base_agent.history:
537
+ if msg.role == "user" and isinstance(msg.content, list):
538
+ for block in msg.content:
539
+ if isinstance(block, ToolResult):
540
+ tool_results.append(block)
541
+
542
+ assert len(tool_results) == 2
543
+ tool_ids = {r.tool_use_id for r in tool_results}
544
+ assert tool_ids == {"tool_1", "tool_2"}
545
+
546
+ # Note: When partial tool results are provided, the current implementation still creates
547
+ # dummy results for ALL missing tool calls first, then immediately replaces the ones we
548
+ # have results for. This results in log messages like:
549
+ # - "Adding placeholder result for missing tool call: X"
550
+ # - "Replaced tool result for tool_use_id: X"
551
+ # This is an acceptable trade-off for the simplicity of the implementation, and it only
552
+ # happens when some (but not all) tool results are provided, which is an edge case.
553
+
554
+
555
+ if __name__ == "__main__":
556
+ pytest.main([__file__, "-v"])
@@ -0,0 +1,48 @@
1
+ """Tests for Conversation empty message handling."""
2
+
3
+ import logging
4
+
5
+ from kolega_code.agent.conversation import Conversation
6
+ from kolega_code.llm.models import Message, TextBlock
7
+
8
+
9
+ def test_append_assistant_message_with_empty_content(caplog):
10
+ """Empty assistant messages get placeholder text."""
11
+ conversation = Conversation()
12
+
13
+ empty_message = Message(role="assistant", content=[])
14
+
15
+ with caplog.at_level(logging.WARNING, logger="kolega_code.agent.conversation"):
16
+ conversation.append_assistant(empty_message)
17
+
18
+ assert "Assistant message has empty content" in caplog.text
19
+
20
+ assert len(conversation.history) == 1
21
+ appended_msg = conversation.history[0]
22
+ assert appended_msg.role == "assistant"
23
+ assert len(appended_msg.content) == 1
24
+ assert appended_msg.content[0].text == "[Assistant returned no message content]"
25
+
26
+
27
+ def test_append_user_message_with_empty_content(caplog):
28
+ """Empty user messages get placeholder text."""
29
+ conversation = Conversation()
30
+
31
+ with caplog.at_level(logging.WARNING, logger="kolega_code.agent.conversation"):
32
+ conversation.append_user([])
33
+
34
+ assert "User message has empty content" in caplog.text
35
+
36
+ assert len(conversation.history) == 1
37
+ appended_msg = conversation.history[0]
38
+ assert appended_msg.role == "user"
39
+ assert appended_msg.content[0].text == "[User provided no message content]"
40
+
41
+
42
+ def test_append_assistant_message_with_content_is_untouched():
43
+ conversation = Conversation()
44
+
45
+ message = Message(role="assistant", content=[TextBlock(text="hello")])
46
+ conversation.append_assistant(message)
47
+
48
+ assert conversation.history[-1] is message