kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. kolega_code/__init__.py +151 -0
  2. kolega_code/agent/__init__.py +42 -0
  3. kolega_code/agent/baseagent.py +998 -0
  4. kolega_code/agent/browseragent.py +123 -0
  5. kolega_code/agent/coder.py +157 -0
  6. kolega_code/agent/common.py +41 -0
  7. kolega_code/agent/compression.py +81 -0
  8. kolega_code/agent/context.py +112 -0
  9. kolega_code/agent/conversation.py +408 -0
  10. kolega_code/agent/generalagent.py +146 -0
  11. kolega_code/agent/investigationagent.py +123 -0
  12. kolega_code/agent/planningagent.py +187 -0
  13. kolega_code/agent/prompt_provider.py +196 -0
  14. kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
  15. kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
  16. kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
  17. kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
  18. kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
  19. kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
  20. kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
  21. kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
  22. kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
  23. kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
  24. kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
  25. kolega_code/agent/prompts.py +192 -0
  26. kolega_code/agent/tests/__init__.py +0 -0
  27. kolega_code/agent/tests/llm/__init__.py +0 -0
  28. kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
  29. kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
  30. kolega_code/agent/tests/llm/test_client.py +773 -0
  31. kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
  32. kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
  33. kolega_code/agent/tests/llm/test_exceptions.py +249 -0
  34. kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
  35. kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
  36. kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
  37. kolega_code/agent/tests/llm/test_model_specs.py +17 -0
  38. kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
  39. kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
  40. kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
  41. kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
  42. kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
  43. kolega_code/agent/tests/services/__init__.py +1 -0
  44. kolega_code/agent/tests/services/test_browser.py +447 -0
  45. kolega_code/agent/tests/services/test_browser_parity.py +353 -0
  46. kolega_code/agent/tests/services/test_file_system.py +699 -0
  47. kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
  48. kolega_code/agent/tests/services/test_terminal.py +154 -0
  49. kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
  50. kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
  51. kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
  52. kolega_code/agent/tests/test_base_agent.py +1942 -0
  53. kolega_code/agent/tests/test_coder_attachments.py +330 -0
  54. kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
  55. kolega_code/agent/tests/test_commands.py +179 -0
  56. kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
  57. kolega_code/agent/tests/test_empty_message_handling.py +48 -0
  58. kolega_code/agent/tests/test_general_agent.py +242 -0
  59. kolega_code/agent/tests/test_html.py +320 -0
  60. kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
  61. kolega_code/agent/tests/test_planning_agent.py +227 -0
  62. kolega_code/agent/tests/test_prompt_provider.py +271 -0
  63. kolega_code/agent/tests/test_tool_registry.py +102 -0
  64. kolega_code/agent/tests/test_tools.py +549 -0
  65. kolega_code/agent/tests/tool_backend/__init__.py +0 -0
  66. kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
  67. kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
  68. kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
  69. kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
  70. kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
  71. kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
  72. kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
  73. kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
  74. kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
  75. kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
  76. kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
  77. kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
  78. kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
  79. kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
  80. kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
  81. kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
  82. kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
  83. kolega_code/agent/tool_backend/agent_tool.py +414 -0
  84. kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
  85. kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
  86. kolega_code/agent/tool_backend/base_tool.py +217 -0
  87. kolega_code/agent/tool_backend/browser_tool.py +271 -0
  88. kolega_code/agent/tool_backend/build_tool.py +93 -0
  89. kolega_code/agent/tool_backend/create_file_tool.py +52 -0
  90. kolega_code/agent/tool_backend/glob_tool.py +323 -0
  91. kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
  92. kolega_code/agent/tool_backend/memory_tool.py +79 -0
  93. kolega_code/agent/tool_backend/read_file_tool.py +119 -0
  94. kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
  95. kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
  96. kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
  97. kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
  98. kolega_code/agent/tool_backend/streaming_tool.py +47 -0
  99. kolega_code/agent/tool_backend/terminal_tool.py +643 -0
  100. kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
  101. kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
  102. kolega_code/agent/tools.py +1704 -0
  103. kolega_code/agent/utils/commands.py +94 -0
  104. kolega_code/cli/__init__.py +1 -0
  105. kolega_code/cli/app.py +2756 -0
  106. kolega_code/cli/config.py +280 -0
  107. kolega_code/cli/connection.py +49 -0
  108. kolega_code/cli/file_index.py +147 -0
  109. kolega_code/cli/main.py +564 -0
  110. kolega_code/cli/mentions.py +155 -0
  111. kolega_code/cli/messages.py +89 -0
  112. kolega_code/cli/provider_registry.py +96 -0
  113. kolega_code/cli/session_store.py +207 -0
  114. kolega_code/cli/settings.py +87 -0
  115. kolega_code/cli/skills.py +409 -0
  116. kolega_code/cli/slash_commands.py +108 -0
  117. kolega_code/cli/tests/__init__.py +1 -0
  118. kolega_code/cli/tests/test_app.py +4251 -0
  119. kolega_code/cli/tests/test_cli_config.py +171 -0
  120. kolega_code/cli/tests/test_connection.py +26 -0
  121. kolega_code/cli/tests/test_file_index.py +103 -0
  122. kolega_code/cli/tests/test_main.py +455 -0
  123. kolega_code/cli/tests/test_mentions.py +108 -0
  124. kolega_code/cli/tests/test_session_store.py +67 -0
  125. kolega_code/cli/tests/test_settings.py +62 -0
  126. kolega_code/cli/tests/test_skills.py +157 -0
  127. kolega_code/cli/tests/test_slash_commands.py +88 -0
  128. kolega_code/cli/theme.py +180 -0
  129. kolega_code/config.py +154 -0
  130. kolega_code/events.py +202 -0
  131. kolega_code/llm/client.py +300 -0
  132. kolega_code/llm/exceptions.py +285 -0
  133. kolega_code/llm/instrumented_client.py +520 -0
  134. kolega_code/llm/models.py +1368 -0
  135. kolega_code/llm/providers/__init__.py +0 -0
  136. kolega_code/llm/providers/anthropic.py +387 -0
  137. kolega_code/llm/providers/base.py +71 -0
  138. kolega_code/llm/providers/google.py +157 -0
  139. kolega_code/llm/providers/models.py +37 -0
  140. kolega_code/llm/providers/openai.py +363 -0
  141. kolega_code/llm/ratelimit.py +40 -0
  142. kolega_code/llm/specs.py +67 -0
  143. kolega_code/llm/tool_execution_ids.py +18 -0
  144. kolega_code/models/__init__.py +9 -0
  145. kolega_code/models/sandbox_terminal_state.py +47 -0
  146. kolega_code/runtime.py +50 -0
  147. kolega_code/sandbox/README.md +200 -0
  148. kolega_code/sandbox/__init__.py +21 -0
  149. kolega_code/sandbox/async_filesystem.py +475 -0
  150. kolega_code/sandbox/base.py +297 -0
  151. kolega_code/sandbox/browser.py +25 -0
  152. kolega_code/sandbox/event_loop.py +43 -0
  153. kolega_code/sandbox/filesystem.py +341 -0
  154. kolega_code/sandbox/local.py +118 -0
  155. kolega_code/sandbox/serializer.py +175 -0
  156. kolega_code/sandbox/terminal.py +868 -0
  157. kolega_code/sandbox/utils.py +216 -0
  158. kolega_code/services/base.py +255 -0
  159. kolega_code/services/browser.py +444 -0
  160. kolega_code/services/file_system.py +749 -0
  161. kolega_code/services/html.py +221 -0
  162. kolega_code/services/terminal.py +903 -0
  163. kolega_code/tools/__init__.py +22 -0
  164. kolega_code/tools/core.py +33 -0
  165. kolega_code/tools/definitions.py +81 -0
  166. kolega_code/tools/registry.py +73 -0
  167. kolega_code-0.1.0.dist-info/METADATA +157 -0
  168. kolega_code-0.1.0.dist-info/RECORD +171 -0
  169. kolega_code-0.1.0.dist-info/WHEEL +4 -0
  170. kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
  171. kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1942 @@
1
+ import os
2
+ import uuid
3
+ from types import SimpleNamespace
4
+ from unittest.mock import AsyncMock, patch
5
+
6
+ import pytest
7
+ from dotenv import load_dotenv
8
+
9
+ from kolega_code.agent.baseagent import BaseAgent
10
+ from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
11
+ from kolega_code.events import AgentConnectionManager
12
+ from kolega_code.llm.models import (
13
+ Message,
14
+ MessageHistory,
15
+ RedactedThinkingBlock,
16
+ TextBlock,
17
+ ThinkingBlock,
18
+ ToolCall,
19
+ ToolResult,
20
+ )
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+
25
+
26
+ @pytest.fixture
27
+ def agent_config():
28
+ return AgentConfig(
29
+ anthropic_api_key=os.getenv("ANTHROPIC_API_KEY", "test_key"), # Use actual API key from environment
30
+ openai_api_key="test-key",
31
+ long_context_config=ModelConfig(
32
+ provider=ModelProvider.ANTHROPIC,
33
+ model="claude-haiku-4-5-20251001", # Using a valid model name
34
+ rate_limits=RateLimitConfig(),
35
+ ),
36
+ fast_config=ModelConfig(
37
+ provider=ModelProvider.ANTHROPIC,
38
+ model="claude-haiku-4-5-20251001", # Using a valid model name
39
+ rate_limits=RateLimitConfig(),
40
+ ),
41
+ thinking_config=ModelConfig(
42
+ provider=ModelProvider.ANTHROPIC,
43
+ model="claude-haiku-4-5-20251001", # Using a valid model name
44
+ rate_limits=RateLimitConfig(),
45
+ thinking_tokens=1024,
46
+ ),
47
+ )
48
+
49
+
50
+ @pytest.fixture
51
+ def mock_connection_manager():
52
+ return AsyncMock(spec=AgentConnectionManager)
53
+
54
+
55
+ @pytest.fixture
56
+ def base_agent(tmp_path, mock_connection_manager, agent_config):
57
+ return BaseAgent(
58
+ project_path=tmp_path,
59
+ workspace_id="test_workspace",
60
+ thread_id=str(uuid.uuid4()), # Add thread_id
61
+ connection_manager=mock_connection_manager,
62
+ config=agent_config,
63
+ )
64
+
65
+
66
+ class TestBaseAgent:
67
+ @pytest.mark.asyncio
68
+ async def test_execute_single_tool_uses_execution_id_for_app_events_and_provider_id_for_result(self, base_agent):
69
+ class TestTools:
70
+ def get_tool_list(self):
71
+ return [SimpleNamespace(name="dispatch_investigation_agent")]
72
+
73
+ def registry(self):
74
+ from kolega_code.agent.tools import ToolCollection
75
+ from kolega_code.llm.models import ToolDefinition
76
+ from kolega_code.tools import Tool, ToolRegistry
77
+
78
+ parallel = set(ToolCollection.read_only_tools) | set(ToolCollection.agent_dispatch_tools)
79
+ registry = ToolRegistry()
80
+ for spec in self.get_tool_list():
81
+ registry.add(
82
+ Tool(
83
+ name=spec.name,
84
+ definition=ToolDefinition(name=spec.name, description="", parameters=[]),
85
+ handler=getattr(self, spec.name),
86
+ parallel_safe=spec.name in parallel,
87
+ )
88
+ )
89
+ return registry
90
+
91
+ async def dispatch_investigation_agent(self, **_inputs):
92
+ return "investigation complete"
93
+
94
+ tool_call = ToolCall(
95
+ id="dispatch_investigation_agent_0",
96
+ name="dispatch_investigation_agent",
97
+ input={"task": "check this"},
98
+ execution_id="tool_exec_unique_123",
99
+ )
100
+ base_agent.tool_collection = TestTools()
101
+ base_agent.send_chat_message = AsyncMock()
102
+ base_agent.log_info = AsyncMock()
103
+
104
+ result = await base_agent.execute_single_tool(tool_call)
105
+
106
+ assert result.tool_use_id == "dispatch_investigation_agent_0"
107
+ assert result.execution_id == "tool_exec_unique_123"
108
+ assert base_agent.send_chat_message.call_args_list[0].kwargs["tool_call_id"] == "tool_exec_unique_123"
109
+ assert base_agent.send_chat_message.call_args_list[1].kwargs["tool_call_id"] == "tool_exec_unique_123"
110
+ assert base_agent.current_tool_call_id is None
111
+ assert base_agent.current_tool_execution_id is None
112
+ assert base_agent.current_provider_tool_call_id is None
113
+
114
+ @pytest.mark.asyncio
115
+ async def testcompress_history(self, base_agent):
116
+ # Setup test data
117
+ conversation = [
118
+ ("user", "Message 1"),
119
+ ("assistant", "Response 1"),
120
+ ("user", "Message 2"),
121
+ ("assistant", "Response 2"),
122
+ ("user", "Message 3"),
123
+ ("assistant", "Response 3"),
124
+ ("user", "Message 4"),
125
+ ("assistant", "Response 4"),
126
+ ("user", "Message 5"),
127
+ ("assistant", "Response 5"),
128
+ ]
129
+
130
+ base_agent.history = MessageHistory(
131
+ [Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
132
+ )
133
+
134
+ # Mock the LLM response
135
+ mock_response = Message(
136
+ role="assistant", content=[TextBlock(text="This is a compressed summary of the conversation")]
137
+ )
138
+
139
+ # Mock the LLM client's generate method
140
+ with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
141
+ mock_generate.return_value = mock_response
142
+
143
+ # Call the method (non-destructive)
144
+ await base_agent.compress_history()
145
+
146
+ # Verify full history retained plus appended summary
147
+ assert len(base_agent.history) == len(conversation) + 1
148
+ # Verify markers set and effective history contains summary only (single-message effective)
149
+ assert base_agent.last_compression_index == len(conversation) - 1
150
+ effective = base_agent.get_effective_history_for_llm()
151
+ assert len(effective) == 1 # only the summary is used for LLM
152
+
153
+ # Verify the LLM was called with correct parameters
154
+ mock_generate.assert_called_once()
155
+ call_args = mock_generate.call_args[1]
156
+ assert call_args["model"] == base_agent.config.long_context_config.model
157
+ assert (
158
+ call_args["max_completion_tokens"] == base_agent.model_completion_tokens
159
+ ) # Use the model's actual limit
160
+
161
+ @pytest.mark.asyncio
162
+ async def testcompress_history_insufficient_history(self, base_agent):
163
+ # Setup test data with less than 5 messages
164
+ conversation = [
165
+ ("user", "Message 1"),
166
+ ("assistant", "Response 1"),
167
+ ("user", "Message 2"),
168
+ ("assistant", "Response 2"),
169
+ ]
170
+
171
+ base_agent.history = MessageHistory(
172
+ [Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
173
+ )
174
+
175
+ # Mock the LLM client's generate method
176
+ with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
177
+ # Call the method
178
+ await base_agent.compress_history()
179
+
180
+ # Verify the history was not compressed
181
+ assert len(base_agent.history) == 4
182
+ assert all(isinstance(msg, Message) for msg in base_agent.history)
183
+ assert base_agent.history == base_agent.history # History unchanged
184
+ mock_generate.assert_not_called()
185
+
186
+ @pytest.mark.asyncio
187
+ async def testcompress_history_error_handling(self, base_agent):
188
+ # Setup test data
189
+ conversation = [
190
+ ("user", "Message 1"),
191
+ ("assistant", "Response 1"),
192
+ ("user", "Message 2"),
193
+ ("assistant", "Response 2"),
194
+ ("user", "Message 3"),
195
+ ("assistant", "Response 3"),
196
+ ("user", "Message 4"),
197
+ ("assistant", "Response 4"),
198
+ ("user", "Message 5"),
199
+ ("assistant", "Response 5"),
200
+ ]
201
+
202
+ base_agent.history = MessageHistory(
203
+ [Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
204
+ )
205
+
206
+ # Mock the LLM client's generate method to raise an exception
207
+ with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
208
+ mock_generate.side_effect = Exception("Test error")
209
+
210
+ # Call the method
211
+ await base_agent.compress_history()
212
+
213
+ # Verify the history was not modified
214
+ assert len(base_agent.history) == 10
215
+ assert all(isinstance(msg, Message) for msg in base_agent.history)
216
+ assert base_agent.history == base_agent.history # History unchanged
217
+
218
+ @pytest.mark.slow
219
+ @pytest.mark.integration
220
+ @pytest.mark.asyncio
221
+ async def testcompress_history_with_real_llm(self, base_agent):
222
+ """Integration test using the real LLM client to test message compression.
223
+
224
+ Note: This test requires a valid API key to be set in the environment.
225
+ It will be skipped if the API key is not available.
226
+ """
227
+ # Skip if no API key is available
228
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
229
+ if not api_key or api_key == "test_key":
230
+ pytest.skip("No valid API key available for LLM provider")
231
+
232
+ # Setup test data with a realistic conversation using Message objects
233
+ conversation = [
234
+ ("user", "What is Python?"),
235
+ (
236
+ "assistant",
237
+ "Python is a high-level, interpreted programming language known for its simplicity and readability.",
238
+ ),
239
+ ("user", "What are its main features?"),
240
+ (
241
+ "assistant",
242
+ "Python features include dynamic typing, automatic memory management, and a comprehensive standard library.",
243
+ ),
244
+ ("user", "How do I write a function in Python?"),
245
+ (
246
+ "assistant",
247
+ "You can define a function using the def keyword, followed by the function name and parameters in parentheses.",
248
+ ),
249
+ ("user", "What is a decorator?"),
250
+ (
251
+ "assistant",
252
+ "A decorator is a design pattern that allows you to modify the behavior of functions or classes.",
253
+ ),
254
+ ("user", "Show me an example of a decorator."),
255
+ ("assistant", "Here is a simple decorator example: @property def name(self): return self._name"),
256
+ ]
257
+
258
+ base_agent.history = MessageHistory(
259
+ [Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
260
+ )
261
+
262
+ # Store the last two messages for comparison
263
+ last_two_messages = base_agent.history[-2:]
264
+
265
+ try:
266
+ # Call the method with real LLM
267
+ await base_agent.compress_history()
268
+
269
+ # Verify the summary was appended (allowing for environments where real LLM may be skipped)
270
+ assert len(base_agent.history) >= len(conversation)
271
+
272
+ # Verify the summary message was appended at the end
273
+ summary_message = base_agent.history[-1]
274
+ assert isinstance(summary_message, Message)
275
+ assert summary_message.role == "user"
276
+ summary_text = summary_message.content[0].text
277
+ assert ("CONVERSATION HISTORY SUMMARY" in summary_text) or ("## Analysis Section" in summary_text)
278
+
279
+ # Verify the last two messages are still present just before the summary
280
+ assert base_agent.history[-3:-1] == last_two_messages
281
+ except Exception as e:
282
+ pytest.fail(f"Test failed with error: {str(e)}")
283
+
284
+ # Tests for dump/restore message history
285
+ def test_dump_message_history_empty(self, base_agent):
286
+ """Test dumping an empty message history."""
287
+ base_agent.history = MessageHistory()
288
+ dumped_history = base_agent.dump_message_history()
289
+ assert dumped_history == []
290
+
291
+ def test_dump_message_history_populated(self, base_agent):
292
+ """Test dumping a history with various message types using custom to_dict."""
293
+ original_history = MessageHistory(
294
+ [
295
+ Message(role="user", content=[TextBlock(text="Hello")]),
296
+ Message(role="assistant", content=[TextBlock(text="Hi there!")]),
297
+ Message(role="assistant", content=[ToolCall(id="tool1", name="read_file", input={"path": "a.txt"})]),
298
+ Message(
299
+ role="user",
300
+ content=[ToolResult(tool_use_id="tool1", name="read_file", content="File content", is_error=False)],
301
+ ),
302
+ ]
303
+ )
304
+ base_agent.history = original_history
305
+ dumped_history = base_agent.dump_message_history()
306
+
307
+ assert len(dumped_history) == 4
308
+ assert isinstance(dumped_history[0], dict)
309
+ assert dumped_history[0]["role"] == "user"
310
+ assert isinstance(dumped_history[0]["content"], list)
311
+ assert dumped_history[0]["content"][0]["type"] == "text"
312
+ assert dumped_history[0]["content"][0]["text"] == "Hello"
313
+ assert dumped_history[0]["content"][0]["cache_checkpoint"] is False # Verify default
314
+
315
+ assert isinstance(dumped_history[1], dict)
316
+ assert dumped_history[1]["role"] == "assistant"
317
+ assert dumped_history[1]["content"][0]["type"] == "text"
318
+ assert dumped_history[1]["content"][0]["text"] == "Hi there!"
319
+
320
+ assert isinstance(dumped_history[2], dict)
321
+ assert dumped_history[2]["role"] == "assistant"
322
+ assert dumped_history[2]["content"][0]["type"] == "tool_call"
323
+ assert dumped_history[2]["content"][0]["id"] == "tool1"
324
+ assert dumped_history[2]["content"][0]["name"] == "read_file"
325
+ assert dumped_history[2]["content"][0]["input"] == {"path": "a.txt"}
326
+
327
+ assert isinstance(dumped_history[3], dict)
328
+ assert dumped_history[3]["role"] == "user" # Role for ToolResult message
329
+ assert dumped_history[3]["content"][0]["type"] == "tool_result"
330
+ assert dumped_history[3]["content"][0]["tool_use_id"] == "tool1"
331
+ assert dumped_history[3]["content"][0]["content"] == "File content"
332
+ assert dumped_history[3]["content"][0]["name"] == "read_file"
333
+ assert dumped_history[3]["content"][0]["is_error"] is False
334
+
335
+ # Check against the actual to_dict output for exact structure validation
336
+ expected_dump = [msg.to_dict() for msg in original_history]
337
+ assert dumped_history == expected_dump
338
+
339
+ def test_restore_message_history_empty(self, base_agent):
340
+ """Test restoring an empty message history using custom from_dict."""
341
+ serialized_history = []
342
+ base_agent.restore_message_history(serialized_history)
343
+ assert isinstance(base_agent.history, MessageHistory)
344
+ assert len(base_agent.history) == 0
345
+
346
+ def test_restore_message_history_populated(self, base_agent):
347
+ """Test restoring a history with various message types using custom from_dict."""
348
+ # Use the structure produced by to_dict
349
+ serialized_history = [
350
+ {
351
+ "role": "user",
352
+ "content": [{"type": "text", "text": "Another query", "cache_checkpoint": False}],
353
+ "stop_reason": None,
354
+ },
355
+ {
356
+ "role": "assistant",
357
+ "content": [
358
+ {
359
+ "type": "tool_call",
360
+ "id": "tool2",
361
+ "name": "list_dir",
362
+ "input": {"path": "/tmp"},
363
+ "cache_checkpoint": False,
364
+ }
365
+ ],
366
+ "stop_reason": "tool_use",
367
+ },
368
+ {
369
+ "role": "user",
370
+ "content": [
371
+ {
372
+ "type": "tool_result",
373
+ "tool_use_id": "tool2",
374
+ "content": "[file1, file2]",
375
+ "name": "list_dir",
376
+ "is_error": False,
377
+ "cache_checkpoint": False,
378
+ }
379
+ ],
380
+ "stop_reason": None,
381
+ },
382
+ ]
383
+
384
+ base_agent.restore_message_history(serialized_history)
385
+
386
+ assert isinstance(base_agent.history, MessageHistory)
387
+ assert len(base_agent.history) == 3
388
+
389
+ # Validate first message
390
+ msg1 = base_agent.history[0]
391
+ assert isinstance(msg1, Message)
392
+ assert msg1.role == "user"
393
+ assert isinstance(msg1.content[0], TextBlock)
394
+ assert msg1.content[0].text == "Another query"
395
+ assert msg1.stop_reason is None
396
+
397
+ # Validate second message (ToolCall)
398
+ msg2 = base_agent.history[1]
399
+ assert isinstance(msg2, Message)
400
+ assert msg2.role == "assistant"
401
+ assert isinstance(msg2.content[0], ToolCall)
402
+ assert msg2.content[0].id == "tool2"
403
+ assert msg2.content[0].name == "list_dir"
404
+ assert msg2.content[0].input == {"path": "/tmp"}
405
+ assert msg2.stop_reason == "tool_use"
406
+ # Check tool_calls attribute is populated correctly
407
+ assert len(msg2.tool_calls) == 1
408
+ assert msg2.tool_calls[0] == msg2.content[0]
409
+
410
+ # Validate third message (ToolResult)
411
+ msg3 = base_agent.history[2]
412
+ assert isinstance(msg3, Message)
413
+ assert msg3.role == "user"
414
+ assert isinstance(msg3.content[0], ToolResult)
415
+ assert msg3.content[0].tool_use_id == "tool2"
416
+ assert msg3.content[0].content == "[file1, file2]"
417
+ assert msg3.content[0].name == "list_dir"
418
+ assert msg3.content[0].is_error is False
419
+ assert msg3.stop_reason is None
420
+
421
+ def test_restore_message_history_sanitizes_oversized_tool_results(self, base_agent):
422
+ oversized_content = "x" * 100_001
423
+ serialized_history = [
424
+ {
425
+ "role": "user",
426
+ "content": [
427
+ {
428
+ "type": "tool_result",
429
+ "tool_use_id": "read_entire_file_24",
430
+ "content": oversized_content,
431
+ "name": "read_entire_file",
432
+ "is_error": False,
433
+ "cache_checkpoint": False,
434
+ }
435
+ ],
436
+ "stop_reason": None,
437
+ }
438
+ ]
439
+
440
+ base_agent.restore_message_history(serialized_history)
441
+
442
+ result = base_agent.history[0].content[0]
443
+ assert isinstance(result, ToolResult)
444
+ assert result.tool_use_id == "read_entire_file_24"
445
+ assert result.name == "read_entire_file"
446
+ assert result.is_error is False
447
+ assert len(result.content) < 500
448
+ assert "Tool result omitted from history" in result.content
449
+
450
+ def test_dump_restore_cycle(self, base_agent):
451
+ """Test that dumping and then restoring results in the original history using custom methods."""
452
+ original_history = MessageHistory(
453
+ [
454
+ Message(role="user", content=[TextBlock(text="Cycle Test")]),
455
+ Message(
456
+ role="assistant",
457
+ content=[
458
+ ThinkingBlock(thinking="reasoning", signature="provider-signature"),
459
+ RedactedThinkingBlock(data="encrypted-redacted-reasoning"),
460
+ TextBlock(text="Acknowledged."),
461
+ ],
462
+ ),
463
+ Message(role="assistant", content=[ToolCall(id="tool3", name="dummy_tool", input={})]),
464
+ Message(
465
+ role="user",
466
+ content=[ToolResult(tool_use_id="tool3", name="dummy_tool", content="Success", is_error=False)],
467
+ ),
468
+ ]
469
+ )
470
+ base_agent.history = original_history
471
+
472
+ # Dump the history
473
+ dumped_data = base_agent.dump_message_history()
474
+
475
+ # Restore the history
476
+ base_agent.restore_message_history(dumped_data)
477
+
478
+ # Assert the restored history matches the original content structure
479
+ # We need a more nuanced comparison since direct object comparison might fail
480
+ # due to new object instances, even if structurally identical.
481
+ assert len(base_agent.history) == len(original_history)
482
+ for restored_msg, original_msg in zip(base_agent.history, original_history):
483
+ # Use the to_dict method for comparing structure
484
+ assert restored_msg.to_dict() == original_msg.to_dict()
485
+
486
+ # Tests for history validation methods
487
+ def test_is_history_valid_for_anthropic_valid_history(self, base_agent):
488
+ """Test validation with a valid history containing tool calls and results."""
489
+ valid_history = [
490
+ Message(role="user", content=[TextBlock(text="Test message")]),
491
+ Message(role="assistant", content=[TextBlock(text="Response")]),
492
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
493
+ Message(
494
+ role="user",
495
+ content=[ToolResult(tool_use_id="tool1", name="test_tool", content="Success", is_error=False)],
496
+ ),
497
+ ]
498
+
499
+ assert base_agent._is_history_valid_for_anthropic(valid_history) is True
500
+
501
+ def test_is_history_valid_for_anthropic_valid_history_no_tools(self, base_agent):
502
+ """Test validation with a valid history containing no tool calls."""
503
+ valid_history = [
504
+ Message(role="user", content=[TextBlock(text="Test message")]),
505
+ Message(role="assistant", content=[TextBlock(text="Response")]),
506
+ Message(role="user", content=[TextBlock(text="Another message")]),
507
+ Message(role="assistant", content=[TextBlock(text="Another response")]),
508
+ ]
509
+
510
+ assert base_agent._is_history_valid_for_anthropic(valid_history) is True
511
+
512
+ def test_is_history_valid_for_anthropic_missing_tool_result(self, base_agent):
513
+ """Test validation fails when tool call has no corresponding result."""
514
+ invalid_history = [
515
+ Message(role="user", content=[TextBlock(text="Test message")]),
516
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
517
+ # Missing tool result message
518
+ ]
519
+
520
+ assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
521
+
522
+ def test_is_history_valid_for_anthropic_incomplete_tool_results(self, base_agent):
523
+ """Test validation fails when some tool calls don't have results."""
524
+ invalid_history = [
525
+ Message(role="user", content=[TextBlock(text="Test message")]),
526
+ Message(
527
+ role="assistant",
528
+ content=[
529
+ ToolCall(id="tool1", name="test_tool1", input={}),
530
+ ToolCall(id="tool2", name="test_tool2", input={}),
531
+ ],
532
+ ),
533
+ Message(
534
+ role="user",
535
+ content=[
536
+ ToolResult(tool_use_id="tool1", name="test_tool1", content="Success", is_error=False)
537
+ # Missing tool2 result
538
+ ],
539
+ ),
540
+ ]
541
+
542
+ assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
543
+
544
+ def test_is_history_valid_for_anthropic_wrong_role_sequence(self, base_agent):
545
+ """Test validation fails when tool call is followed by non-user message."""
546
+ invalid_history = [
547
+ Message(role="user", content=[TextBlock(text="Test message")]),
548
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
549
+ Message(role="assistant", content=[TextBlock(text="Another assistant message")]), # Should be user
550
+ ]
551
+
552
+ assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
553
+
554
+ def test_is_history_valid_for_anthropic_empty_history(self, base_agent):
555
+ """Test validation passes for empty history."""
556
+ assert base_agent._is_history_valid_for_anthropic([]) is True
557
+
558
+ def test_is_history_valid_for_anthropic_uses_self_history(self, base_agent):
559
+ """Test validation uses self.history when no messages parameter provided."""
560
+ base_agent.history = MessageHistory(
561
+ [
562
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
563
+ # Missing tool result
564
+ ]
565
+ )
566
+
567
+ assert base_agent._is_history_valid_for_anthropic() is False
568
+
569
+ def testfix_incomplete_tool_calls_no_changes_needed(self, base_agent):
570
+ """Test fix method doesn't modify valid history."""
571
+ valid_history = [
572
+ Message(role="user", content=[TextBlock(text="Test message")]),
573
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
574
+ Message(
575
+ role="user",
576
+ content=[ToolResult(tool_use_id="tool1", name="test_tool", content="Success", is_error=False)],
577
+ ),
578
+ ]
579
+
580
+ fixed_history = base_agent.fix_incomplete_tool_calls(valid_history)
581
+
582
+ assert len(fixed_history) == 3
583
+ assert fixed_history[0].to_dict() == valid_history[0].to_dict()
584
+ assert fixed_history[1].to_dict() == valid_history[1].to_dict()
585
+ assert fixed_history[2].to_dict() == valid_history[2].to_dict()
586
+
587
+ def testfix_incomplete_tool_calls_adds_placeholder_result(self, base_agent):
588
+ """Test fix method adds placeholder result for orphaned tool call."""
589
+ incomplete_history = [
590
+ Message(role="user", content=[TextBlock(text="Test message")]),
591
+ Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
592
+ # Missing tool result
593
+ ]
594
+
595
+ fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
596
+
597
+ assert len(fixed_history) == 3 # Original 2 + 1 placeholder
598
+
599
+ # Check original messages are preserved
600
+ assert fixed_history[0].to_dict() == incomplete_history[0].to_dict()
601
+ assert fixed_history[1].to_dict() == incomplete_history[1].to_dict()
602
+
603
+ # Check placeholder was added
604
+ placeholder_msg = fixed_history[2]
605
+ assert placeholder_msg.role == "user"
606
+ assert len(placeholder_msg.content) == 1
607
+ assert isinstance(placeholder_msg.content[0], ToolResult)
608
+ assert placeholder_msg.content[0].tool_use_id == "tool1"
609
+ assert placeholder_msg.content[0].name == "test_tool"
610
+ assert placeholder_msg.content[0].is_error is True
611
+ assert "interrupted" in placeholder_msg.content[0].content.lower()
612
+
613
+ def testfix_incomplete_tool_calls_multiple_tools(self, base_agent):
614
+ """Test fix method handles multiple incomplete tool calls."""
615
+ incomplete_history = [
616
+ Message(
617
+ role="assistant",
618
+ content=[
619
+ ToolCall(id="tool1", name="test_tool1", input={}),
620
+ ToolCall(id="tool2", name="test_tool2", input={}),
621
+ ],
622
+ ),
623
+ # Missing tool results
624
+ ]
625
+
626
+ fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
627
+
628
+ assert len(fixed_history) == 2 # Original 1 + 1 placeholder
629
+
630
+ # Check placeholder message has results for both tools
631
+ placeholder_msg = fixed_history[1]
632
+ assert placeholder_msg.role == "user"
633
+ assert len(placeholder_msg.content) == 2
634
+
635
+ tool_result_ids = {result.tool_use_id for result in placeholder_msg.content}
636
+ assert tool_result_ids == {"tool1", "tool2"}
637
+
638
+ for result in placeholder_msg.content:
639
+ assert isinstance(result, ToolResult)
640
+ assert result.is_error is True
641
+
642
+ def testfix_incomplete_tool_calls_partial_results(self, base_agent):
643
+ """Test fix method handles partial tool results correctly."""
644
+ incomplete_history = [
645
+ Message(
646
+ role="assistant",
647
+ content=[
648
+ ToolCall(id="tool1", name="test_tool1", input={}),
649
+ ToolCall(id="tool2", name="test_tool2", input={}),
650
+ ],
651
+ ),
652
+ Message(
653
+ role="user",
654
+ content=[
655
+ ToolResult(tool_use_id="tool1", name="test_tool1", content="Success", is_error=False)
656
+ # Missing tool2 result
657
+ ],
658
+ ),
659
+ ]
660
+
661
+ fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
662
+
663
+ # Should have same length since placeholder is merged into existing user message
664
+ assert len(fixed_history) == 2 # Same as original
665
+
666
+ # Check that the user message now has both tool results
667
+ user_message = fixed_history[1]
668
+ assert user_message.role == "user"
669
+ assert len(user_message.content) == 2 # Now has both tool results
670
+
671
+ # Check tool result IDs
672
+ tool_result_ids = {result.tool_use_id for result in user_message.content if isinstance(result, ToolResult)}
673
+ assert tool_result_ids == {"tool1", "tool2"}
674
+
675
+ # Check that placeholder was added for tool2
676
+ tool2_result = next(result for result in user_message.content if result.tool_use_id == "tool2")
677
+ assert tool2_result.is_error is True
678
+ assert "interrupted" in tool2_result.content.lower()
679
+
680
+ # Verify the fixed history is valid
681
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
682
+
683
+ def testfix_incomplete_tool_calls_empty_history(self, base_agent):
684
+ """Test fix method handles empty history."""
685
+ fixed_history = base_agent.fix_incomplete_tool_calls([])
686
+ assert fixed_history == []
687
+
688
+ def test_restore_message_history_with_incomplete_tool_calls(self, base_agent):
689
+ """Test restore method does NOT automatically fix incomplete tool calls."""
690
+ # Serialized history with incomplete tool call (simulating interrupted state)
691
+ serialized_incomplete_history = [
692
+ {
693
+ "role": "user",
694
+ "content": [{"type": "text", "text": "Test message", "cache_checkpoint": False}],
695
+ "stop_reason": None,
696
+ },
697
+ {
698
+ "role": "assistant",
699
+ "content": [
700
+ {
701
+ "type": "tool_call",
702
+ "id": "tool1",
703
+ "name": "test_tool",
704
+ "input": {"param": "value"},
705
+ "cache_checkpoint": False,
706
+ }
707
+ ],
708
+ "stop_reason": "tool_use",
709
+ },
710
+ # Missing tool result message (simulating interruption)
711
+ ]
712
+
713
+ base_agent.restore_message_history(serialized_incomplete_history)
714
+
715
+ # Verify history was NOT fixed - restore should preserve authentic history
716
+ assert len(base_agent.history) == 2 # Original 2 messages unchanged
717
+
718
+ # Check original messages are preserved as-is
719
+ assert base_agent.history[0].role == "user"
720
+ assert base_agent.history[1].role == "assistant"
721
+ assert len(base_agent.history[1].tool_calls) == 1
722
+
723
+ # Verify the history is still invalid for Anthropic (not fixed)
724
+ assert base_agent._is_history_valid_for_anthropic() is False
725
+
726
+ # But verify that fix_incomplete_tool_calls can fix it
727
+ fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
728
+ assert len(fixed_history) == 3 # Now fixed with placeholder
729
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
730
+
731
+ # Tests for robustness - incomplete tool calls at various positions
732
+ def testfix_incomplete_tool_calls_at_beginning_of_history(self, base_agent):
733
+ """Test fix method handles incomplete tool calls at the beginning of message history."""
734
+ corrupted_history = [
735
+ # Incomplete tool call sequence at the beginning
736
+ Message(
737
+ role="assistant",
738
+ content=[
739
+ ToolCall(id="early_tool1", name="early_tool", input={}),
740
+ ToolCall(id="early_tool2", name="another_early_tool", input={}),
741
+ ],
742
+ ),
743
+ Message(
744
+ role="user",
745
+ content=[
746
+ ToolResult(tool_use_id="early_tool1", name="early_tool", content="Success", is_error=False)
747
+ # Missing early_tool2 result
748
+ ],
749
+ ),
750
+ # Normal conversation continues
751
+ Message(role="user", content=[TextBlock(text="How are things?")]),
752
+ Message(role="assistant", content=[TextBlock(text="Things are going well.")]),
753
+ # Complete tool call sequence later
754
+ Message(role="assistant", content=[ToolCall(id="later_tool", name="later_tool", input={})]),
755
+ Message(
756
+ role="user",
757
+ content=[
758
+ ToolResult(tool_use_id="later_tool", name="later_tool", content="Later success", is_error=False)
759
+ ],
760
+ ),
761
+ ]
762
+
763
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
764
+
765
+ # Should have same length since placeholder is merged into existing user message
766
+ assert len(fixed_history) == 6 # Same as original
767
+
768
+ # Verify the early incomplete sequence was fixed by merging placeholder into existing user message
769
+ assert fixed_history[0].role == "assistant" # Original tool call message
770
+ assert fixed_history[1].role == "user" # User message now has both results
771
+ assert len(fixed_history[1].content) == 2 # Now has both tool results
772
+
773
+ # Check that both tool results are present
774
+ tool_result_ids = {result.tool_use_id for result in fixed_history[1].content if isinstance(result, ToolResult)}
775
+ assert tool_result_ids == {"early_tool1", "early_tool2"}
776
+
777
+ # Verify the placeholder result is marked as error
778
+ placeholder_result = next(result for result in fixed_history[1].content if result.tool_use_id == "early_tool2")
779
+ assert placeholder_result.is_error is True
780
+ assert "interrupted" in placeholder_result.content.lower()
781
+
782
+ # Verify rest of history is preserved
783
+ assert fixed_history[2].role == "user" # "How are things?"
784
+ assert fixed_history[3].role == "assistant" # "Things are going well."
785
+ assert fixed_history[4].role == "assistant" # later_tool call
786
+ assert fixed_history[5].role == "user" # later_tool result
787
+
788
+ # Verify final history is valid
789
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
790
+
791
+ def testfix_incomplete_tool_calls_in_middle_of_history(self, base_agent):
792
+ """Test fix method handles incomplete tool calls in the middle of message history."""
793
+ corrupted_history = [
794
+ # Normal conversation start
795
+ Message(role="user", content=[TextBlock(text="Hello")]),
796
+ Message(role="assistant", content=[TextBlock(text="Hi there!")]),
797
+ # Incomplete tool call sequence in the middle
798
+ Message(
799
+ role="assistant",
800
+ content=[
801
+ ToolCall(id="middle_tool1", name="middle_tool", input={}),
802
+ ToolCall(id="middle_tool2", name="another_middle_tool", input={}),
803
+ ToolCall(id="middle_tool3", name="third_middle_tool", input={}),
804
+ ],
805
+ ),
806
+ Message(
807
+ role="user",
808
+ content=[
809
+ ToolResult(tool_use_id="middle_tool1", name="middle_tool", content="Success", is_error=False),
810
+ ToolResult(tool_use_id="middle_tool3", name="third_middle_tool", content="Success", is_error=False),
811
+ # Missing middle_tool2 result
812
+ ],
813
+ ),
814
+ # Normal conversation continues
815
+ Message(role="assistant", content=[TextBlock(text="Let me continue...")]),
816
+ Message(role="user", content=[TextBlock(text="Sounds good")]),
817
+ ]
818
+
819
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
820
+
821
+ # Should have same length since placeholder is merged into existing user message
822
+ assert len(fixed_history) == 6 # Same as original
823
+
824
+ # Verify the middle incomplete sequence was fixed
825
+ assert fixed_history[2].role == "assistant" # Tool call message
826
+ assert fixed_history[3].role == "user" # User message now has all 3 results
827
+ assert len(fixed_history[3].content) == 3 # Now has all 3 tool results
828
+
829
+ # Check that all tool results are present
830
+ tool_result_ids = {result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)}
831
+ assert tool_result_ids == {"middle_tool1", "middle_tool2", "middle_tool3"}
832
+
833
+ # Verify the placeholder result is marked as error
834
+ placeholder_result = next(result for result in fixed_history[3].content if result.tool_use_id == "middle_tool2")
835
+ assert placeholder_result.is_error is True
836
+
837
+ # Verify rest of history is preserved
838
+ assert fixed_history[4].role == "assistant" # "Let me continue..."
839
+ assert fixed_history[5].role == "user" # "Sounds good"
840
+
841
+ # Verify final history is valid
842
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
843
+
844
+ def test_fix_multiple_incomplete_tool_call_sequences(self, base_agent):
845
+ """Test fix method handles multiple incomplete tool call sequences in the same history."""
846
+ corrupted_history = [
847
+ # First incomplete sequence
848
+ Message(
849
+ role="assistant",
850
+ content=[
851
+ ToolCall(id="seq1_tool1", name="tool1", input={}),
852
+ ToolCall(id="seq1_tool2", name="tool2", input={}),
853
+ ],
854
+ ),
855
+ Message(
856
+ role="user",
857
+ content=[
858
+ ToolResult(tool_use_id="seq1_tool1", name="tool1", content="Success", is_error=False)
859
+ # Missing seq1_tool2
860
+ ],
861
+ ),
862
+ # Normal conversation
863
+ Message(role="user", content=[TextBlock(text="Continue")]),
864
+ Message(role="assistant", content=[TextBlock(text="Continuing...")]),
865
+ # Second incomplete sequence
866
+ Message(
867
+ role="assistant",
868
+ content=[
869
+ ToolCall(id="seq2_tool1", name="tool3", input={}),
870
+ ToolCall(id="seq2_tool2", name="tool4", input={}),
871
+ ToolCall(id="seq2_tool3", name="tool5", input={}),
872
+ ],
873
+ ),
874
+ Message(
875
+ role="user",
876
+ content=[
877
+ ToolResult(tool_use_id="seq2_tool2", name="tool4", content="Success", is_error=False)
878
+ # Missing seq2_tool1 and seq2_tool3
879
+ ],
880
+ ),
881
+ # End conversation
882
+ Message(role="assistant", content=[TextBlock(text="Done")]),
883
+ ]
884
+
885
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
886
+
887
+ # Should have same length since placeholders are merged into existing user messages
888
+ assert len(fixed_history) == 7 # Same as original
889
+
890
+ # Verify first incomplete sequence was fixed
891
+ assert fixed_history[1].role == "user"
892
+ assert len(fixed_history[1].content) == 2 # Now has both tool results
893
+ first_tool_result_ids = {
894
+ result.tool_use_id for result in fixed_history[1].content if isinstance(result, ToolResult)
895
+ }
896
+ assert first_tool_result_ids == {"seq1_tool1", "seq1_tool2"}
897
+
898
+ # Verify second incomplete sequence was fixed
899
+ assert fixed_history[5].role == "user"
900
+ assert len(fixed_history[5].content) == 3 # Now has all 3 tool results
901
+ second_tool_result_ids = {
902
+ result.tool_use_id for result in fixed_history[5].content if isinstance(result, ToolResult)
903
+ }
904
+ assert second_tool_result_ids == {"seq2_tool1", "seq2_tool2", "seq2_tool3"}
905
+
906
+ # Verify final history is valid
907
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
908
+
909
+ def testfix_incomplete_tool_calls_at_end_with_no_user_message(self, base_agent):
910
+ """Test fix method handles incomplete tool calls at the very end with no following user message."""
911
+ corrupted_history = [
912
+ Message(role="user", content=[TextBlock(text="Do something")]),
913
+ Message(role="assistant", content=[TextBlock(text="Sure, let me help.")]),
914
+ # Tool calls at the end with no user response (simulates interruption)
915
+ Message(
916
+ role="assistant",
917
+ content=[
918
+ ToolCall(id="end_tool1", name="end_tool", input={}),
919
+ ToolCall(id="end_tool2", name="another_end_tool", input={}),
920
+ ],
921
+ ),
922
+ # No user message follows (interrupted)
923
+ ]
924
+
925
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
926
+
927
+ # Should have added 1 new user message for the missing tools
928
+ assert len(fixed_history) == 4 # Original 3 + 1 new user message
929
+
930
+ # Verify placeholder was added at the end
931
+ assert fixed_history[3].role == "user"
932
+ assert len(fixed_history[3].content) == 2
933
+ placeholder_ids = {result.tool_use_id for result in fixed_history[3].content}
934
+ assert placeholder_ids == {"end_tool1", "end_tool2"}
935
+
936
+ for result in fixed_history[3].content:
937
+ assert result.is_error is True
938
+ assert "interrupted" in result.content.lower()
939
+
940
+ # Verify final history is valid
941
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
942
+
943
+ def test_fix_consecutive_incomplete_tool_sequences(self, base_agent):
944
+ """Test fix method handles consecutive incomplete tool call sequences."""
945
+ corrupted_history = [
946
+ # First assistant message with tool calls
947
+ Message(role="assistant", content=[ToolCall(id="consec1_tool", name="tool1", input={})]),
948
+ # Partial results
949
+ Message(
950
+ role="user",
951
+ content=[ToolResult(tool_use_id="consec1_tool", name="tool1", content="Success", is_error=False)],
952
+ ),
953
+ # Immediately another assistant message with incomplete tools
954
+ Message(
955
+ role="assistant",
956
+ content=[
957
+ ToolCall(id="consec2_tool1", name="tool2", input={}),
958
+ ToolCall(id="consec2_tool2", name="tool3", input={}),
959
+ ],
960
+ ),
961
+ Message(
962
+ role="user",
963
+ content=[
964
+ ToolResult(tool_use_id="consec2_tool1", name="tool2", content="Success", is_error=False)
965
+ # Missing consec2_tool2
966
+ ],
967
+ ),
968
+ # Third consecutive assistant message
969
+ Message(role="assistant", content=[ToolCall(id="consec3_tool", name="tool4", input={})]),
970
+ # No user message (interrupted)
971
+ ]
972
+
973
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
974
+
975
+ # Should have same length since one placeholder is merged, one new message is added
976
+ assert len(fixed_history) == 6 # Same as original (merge + add)
977
+
978
+ # First sequence is complete, no changes
979
+ assert fixed_history[0].role == "assistant"
980
+ assert fixed_history[1].role == "user"
981
+
982
+ # Second sequence should have placeholder merged into existing user message
983
+ assert fixed_history[2].role == "assistant"
984
+ assert fixed_history[3].role == "user" # Original partial results now complete
985
+ assert len(fixed_history[3].content) == 2 # Now has both tool results
986
+ second_tool_result_ids = {
987
+ result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)
988
+ }
989
+ assert second_tool_result_ids == {"consec2_tool1", "consec2_tool2"}
990
+
991
+ # Third sequence should have new user message added
992
+ assert fixed_history[4].role == "assistant"
993
+ assert fixed_history[5].role == "user" # NEW: User message for consec3_tool
994
+ assert fixed_history[5].content[0].tool_use_id == "consec3_tool"
995
+
996
+ # Verify final history is valid
997
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
998
+
999
+ def test_fix_mixed_complete_and_incomplete_sequences(self, base_agent):
1000
+ """Test fix method handles a mix of complete and incomplete tool call sequences."""
1001
+ mixed_history = [
1002
+ # Complete sequence 1
1003
+ Message(role="assistant", content=[ToolCall(id="complete1", name="complete_tool", input={})]),
1004
+ Message(
1005
+ role="user",
1006
+ content=[ToolResult(tool_use_id="complete1", name="complete_tool", content="Success", is_error=False)],
1007
+ ),
1008
+ # Incomplete sequence
1009
+ Message(
1010
+ role="assistant",
1011
+ content=[
1012
+ ToolCall(id="incomplete1", name="incomplete_tool", input={}),
1013
+ ToolCall(id="incomplete2", name="another_incomplete", input={}),
1014
+ ],
1015
+ ),
1016
+ Message(
1017
+ role="user",
1018
+ content=[
1019
+ ToolResult(tool_use_id="incomplete1", name="incomplete_tool", content="Success", is_error=False)
1020
+ # Missing incomplete2
1021
+ ],
1022
+ ),
1023
+ # Complete sequence 2
1024
+ Message(role="assistant", content=[ToolCall(id="complete2", name="another_complete", input={})]),
1025
+ Message(
1026
+ role="user",
1027
+ content=[
1028
+ ToolResult(tool_use_id="complete2", name="another_complete", content="Success", is_error=False)
1029
+ ],
1030
+ ),
1031
+ # Normal text
1032
+ Message(role="user", content=[TextBlock(text="All done")]),
1033
+ Message(role="assistant", content=[TextBlock(text="Great work!")]),
1034
+ ]
1035
+
1036
+ fixed_history = base_agent.fix_incomplete_tool_calls(mixed_history)
1037
+
1038
+ # Should have same length since placeholder is merged into existing user message
1039
+ assert len(fixed_history) == 8 # Same as original
1040
+
1041
+ # Verify complete sequences are unchanged
1042
+ assert fixed_history[0].role == "assistant" # complete1 tool call
1043
+ assert fixed_history[1].role == "user" # complete1 result
1044
+
1045
+ # Verify incomplete sequence was fixed by merging placeholder
1046
+ assert fixed_history[2].role == "assistant" # incomplete tools call
1047
+ assert fixed_history[3].role == "user" # user message now has both results
1048
+ assert len(fixed_history[3].content) == 2 # Now has both tool results
1049
+ incomplete_tool_result_ids = {
1050
+ result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)
1051
+ }
1052
+ assert incomplete_tool_result_ids == {"incomplete1", "incomplete2"}
1053
+
1054
+ # Verify rest is unchanged
1055
+ assert fixed_history[4].role == "assistant" # complete2 tool call
1056
+ assert fixed_history[5].role == "user" # complete2 result
1057
+ assert fixed_history[6].role == "user" # "All done"
1058
+ assert fixed_history[7].role == "assistant" # "Great work!"
1059
+
1060
+ # Verify final history is valid
1061
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1062
+
1063
+ def test_complex_corrupted_history_recovery(self, base_agent):
1064
+ """Test fix method can recover from a complex, heavily corrupted message history."""
1065
+ heavily_corrupted_history = [
1066
+ # Start with incomplete sequence
1067
+ Message(
1068
+ role="assistant",
1069
+ content=[
1070
+ ToolCall(id="start_tool1", name="start1", input={}),
1071
+ ToolCall(id="start_tool2", name="start2", input={}),
1072
+ ToolCall(id="start_tool3", name="start3", input={}),
1073
+ ],
1074
+ ),
1075
+ Message(
1076
+ role="user",
1077
+ content=[
1078
+ ToolResult(tool_use_id="start_tool2", name="start2", content="Success", is_error=False)
1079
+ # Missing start_tool1 and start_tool3
1080
+ ],
1081
+ ),
1082
+ # Some normal conversation
1083
+ Message(role="user", content=[TextBlock(text="What about the other tasks?")]),
1084
+ Message(role="assistant", content=[TextBlock(text="Let me check on those.")]),
1085
+ # Another incomplete sequence
1086
+ Message(
1087
+ role="assistant",
1088
+ content=[
1089
+ ToolCall(id="mid_tool1", name="mid1", input={}),
1090
+ ToolCall(id="mid_tool2", name="mid2", input={}),
1091
+ ToolCall(id="mid_tool3", name="mid3", input={}),
1092
+ ToolCall(id="mid_tool4", name="mid4", input={}),
1093
+ ],
1094
+ ),
1095
+ Message(
1096
+ role="user",
1097
+ content=[
1098
+ ToolResult(tool_use_id="mid_tool1", name="mid1", content="Success", is_error=False),
1099
+ ToolResult(tool_use_id="mid_tool4", name="mid4", content="Success", is_error=False),
1100
+ # Missing mid_tool2 and mid_tool3
1101
+ ],
1102
+ ),
1103
+ # Complete sequence (should be left alone)
1104
+ Message(role="assistant", content=[ToolCall(id="good_tool", name="good", input={})]),
1105
+ Message(
1106
+ role="user",
1107
+ content=[ToolResult(tool_use_id="good_tool", name="good", content="Success", is_error=False)],
1108
+ ),
1109
+ # Final incomplete sequence at the end
1110
+ Message(
1111
+ role="assistant",
1112
+ content=[
1113
+ ToolCall(id="end_tool1", name="end1", input={}),
1114
+ ToolCall(id="end_tool2", name="end2", input={}),
1115
+ ],
1116
+ ),
1117
+ # No user response (interrupted at the very end)
1118
+ ]
1119
+
1120
+ # Verify original history is invalid
1121
+ assert base_agent._is_history_valid_for_anthropic(heavily_corrupted_history) is False
1122
+
1123
+ fixed_history = base_agent.fix_incomplete_tool_calls(heavily_corrupted_history)
1124
+
1125
+ # Should have same length since 2 placeholders are merged, 1 new message is added
1126
+ assert len(fixed_history) == 10 # Same as original (2 merges + 1 add = net 0 change)
1127
+
1128
+ # Verify all incomplete sequences were fixed
1129
+
1130
+ # First sequence: placeholders merged into existing user message
1131
+ assert len(fixed_history[1].content) == 3 # Now has all 3 tool results
1132
+ first_placeholders = {r.tool_use_id for r in fixed_history[1].content if isinstance(r, ToolResult)}
1133
+ assert first_placeholders == {"start_tool1", "start_tool2", "start_tool3"}
1134
+
1135
+ # Second sequence: placeholders merged into existing user message
1136
+ assert len(fixed_history[5].content) == 4 # Now has all 4 tool results
1137
+ second_placeholders = {r.tool_use_id for r in fixed_history[5].content if isinstance(r, ToolResult)}
1138
+ assert second_placeholders == {"mid_tool1", "mid_tool2", "mid_tool3", "mid_tool4"}
1139
+
1140
+ # Third sequence: new user message created
1141
+ assert fixed_history[9].role == "user"
1142
+ assert len(fixed_history[9].content) == 2
1143
+ third_placeholders = {r.tool_use_id for r in fixed_history[9].content}
1144
+ assert third_placeholders == {"end_tool1", "end_tool2"}
1145
+
1146
+ # Verify all placeholders are marked as errors (check a few samples)
1147
+ start_placeholder = next(r for r in fixed_history[1].content if r.tool_use_id == "start_tool1")
1148
+ assert start_placeholder.is_error is True
1149
+ assert "interrupted" in start_placeholder.content.lower()
1150
+
1151
+ # Most importantly: verify the fixed history is now valid for Anthropic
1152
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1153
+
1154
+ # Integration tests with real Anthropic API for message history corruption recovery
1155
+ @pytest.mark.slow
1156
+ @pytest.mark.integration
1157
+ @pytest.mark.asyncio
1158
+ async def testfix_incomplete_tool_calls_with_real_api_simple_case(self, base_agent):
1159
+ """Integration test: Fix simple incomplete tool call and verify it works with real Anthropic API."""
1160
+ # Skip if no API key is available
1161
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1162
+ if not api_key or api_key == "test_key":
1163
+ pytest.skip("No valid API key available for LLM provider")
1164
+
1165
+ # Create a corrupted history with incomplete tool call (simulating interruption)
1166
+ corrupted_history = [
1167
+ Message(role="user", content=[TextBlock(text="Can you help me with a simple task?")]),
1168
+ Message(role="assistant", content=[TextBlock(text="Of course! I'd be happy to help you.")]),
1169
+ Message(
1170
+ role="assistant",
1171
+ content=[ToolCall(id="interrupted_tool", name="read_file", input={"path": "example.txt"})],
1172
+ ),
1173
+ # Missing tool result - simulates interruption during tool execution
1174
+ ]
1175
+
1176
+ # Verify the corrupted history is invalid
1177
+ assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
1178
+
1179
+ # Fix the corrupted history
1180
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
1181
+
1182
+ # Verify the fix worked
1183
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1184
+ assert len(fixed_history) == 4 # Original 3 + 1 placeholder user message
1185
+
1186
+ # Set up the fixed history in the agent
1187
+ base_agent.history = MessageHistory(fixed_history)
1188
+
1189
+ try:
1190
+ # Test that the fixed history works with real Anthropic API
1191
+ # by sending a follow-up message
1192
+ system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
1193
+
1194
+ # Add a new user message to continue the conversation
1195
+ base_agent.history.append(Message(role="user", content=[TextBlock(text="What should I do next?")]))
1196
+
1197
+ # Call the real LLM API with the fixed history
1198
+ response = await base_agent.llm.generate(
1199
+ messages=base_agent.history,
1200
+ system=system_message,
1201
+ model=base_agent.config.long_context_config.model,
1202
+ max_completion_tokens=100, # Keep it small for testing
1203
+ )
1204
+
1205
+ # Verify we got a valid response
1206
+ assert response is not None
1207
+ response_text = response.get_text_content()
1208
+ assert isinstance(response_text, str)
1209
+ assert len(response_text.strip()) > 0
1210
+
1211
+ except Exception as e:
1212
+ pytest.fail(f"Real API call failed with fixed history: {str(e)}")
1213
+
1214
+ @pytest.mark.slow
1215
+ @pytest.mark.integration
1216
+ @pytest.mark.asyncio
1217
+ async def test_fix_multiple_incomplete_tool_calls_with_real_api(self, base_agent):
1218
+ """Integration test: Fix multiple incomplete tool calls and verify with real Anthropic API."""
1219
+ # Skip if no API key is available
1220
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1221
+ if not api_key or api_key == "test_key":
1222
+ pytest.skip("No valid API key available for LLM provider")
1223
+
1224
+ # Create a heavily corrupted history with multiple incomplete sequences
1225
+ corrupted_history = [
1226
+ Message(role="user", content=[TextBlock(text="I need help with several file operations.")]),
1227
+ Message(
1228
+ role="assistant",
1229
+ content=[TextBlock(text="I can help you with file operations. Let me start working on those.")],
1230
+ ),
1231
+ # First incomplete sequence
1232
+ Message(
1233
+ role="assistant",
1234
+ content=[
1235
+ ToolCall(id="tool1", name="read_file", input={"path": "file1.txt"}),
1236
+ ToolCall(id="tool2", name="read_file", input={"path": "file2.txt"}),
1237
+ ToolCall(id="tool3", name="list_dir", input={"path": "."}),
1238
+ ],
1239
+ ),
1240
+ Message(
1241
+ role="user",
1242
+ content=[
1243
+ ToolResult(tool_use_id="tool1", name="read_file", content="Content of file1", is_error=False)
1244
+ # Missing tool2 and tool3 results
1245
+ ],
1246
+ ),
1247
+ # Normal conversation
1248
+ Message(role="user", content=[TextBlock(text="What about the other operations?")]),
1249
+ Message(role="assistant", content=[TextBlock(text="Let me continue with the remaining operations.")]),
1250
+ # Second incomplete sequence
1251
+ Message(
1252
+ role="assistant",
1253
+ content=[
1254
+ ToolCall(id="tool4", name="write_file", input={"path": "output.txt", "content": "test"}),
1255
+ ToolCall(id="tool5", name="read_file", input={"path": "config.json"}),
1256
+ ],
1257
+ ),
1258
+ # No user message - interrupted at the end
1259
+ ]
1260
+
1261
+ # Verify the corrupted history is invalid
1262
+ assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
1263
+
1264
+ # Fix the corrupted history
1265
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
1266
+
1267
+ # Verify the fix worked
1268
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1269
+
1270
+ # Set up the fixed history in the agent
1271
+ base_agent.history = MessageHistory(fixed_history)
1272
+
1273
+ try:
1274
+ # Test that the fixed history works with real Anthropic API
1275
+ system_message = Message(
1276
+ role="system", content=[TextBlock(text="You are a helpful assistant for file operations.")]
1277
+ )
1278
+
1279
+ # Add a new user message to continue the conversation
1280
+ base_agent.history.append(
1281
+ Message(role="user", content=[TextBlock(text="Can you summarize what operations were attempted?")])
1282
+ )
1283
+
1284
+ # Call the real LLM API with the fixed history
1285
+ response = await base_agent.llm.generate(
1286
+ messages=base_agent.history,
1287
+ system=system_message,
1288
+ model=base_agent.config.long_context_config.model,
1289
+ max_completion_tokens=150,
1290
+ )
1291
+
1292
+ # Verify we got a valid response
1293
+ assert response is not None
1294
+ response_text = response.get_text_content()
1295
+ assert isinstance(response_text, str)
1296
+ assert len(response_text.strip()) > 0
1297
+
1298
+ # The response should acknowledge the interrupted operations
1299
+ response_lower = response_text.lower()
1300
+ assert any(word in response_lower for word in ["interrupt", "error", "operation", "attempt"])
1301
+
1302
+ except Exception as e:
1303
+ pytest.fail(f"Real API call failed with fixed history containing multiple corruptions: {str(e)}")
1304
+
1305
+ @pytest.mark.slow
1306
+ @pytest.mark.integration
1307
+ @pytest.mark.asyncio
1308
+ async def test_fix_corrupted_serialized_history_with_real_api(self, base_agent):
1309
+ """Integration test: Fix corrupted serialized history before API call works."""
1310
+ # Skip if no API key is available
1311
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1312
+ if not api_key or api_key == "test_key":
1313
+ pytest.skip("No valid API key available for LLM provider")
1314
+
1315
+ # Create a serialized corrupted history (simulating what would be saved to database)
1316
+ serialized_corrupted_history = [
1317
+ {
1318
+ "role": "user",
1319
+ "content": [{"type": "text", "text": "Please analyze this data for me.", "cache_checkpoint": False}],
1320
+ "stop_reason": None,
1321
+ },
1322
+ {
1323
+ "role": "assistant",
1324
+ "content": [
1325
+ {
1326
+ "type": "text",
1327
+ "text": "I'll analyze the data for you. Let me start by reading the files.",
1328
+ "cache_checkpoint": False,
1329
+ }
1330
+ ],
1331
+ "stop_reason": None,
1332
+ },
1333
+ {
1334
+ "role": "assistant",
1335
+ "content": [
1336
+ {
1337
+ "type": "tool_call",
1338
+ "id": "analysis_tool1",
1339
+ "name": "read_file",
1340
+ "input": {"path": "data.csv"},
1341
+ "cache_checkpoint": False,
1342
+ },
1343
+ {
1344
+ "type": "tool_call",
1345
+ "id": "analysis_tool2",
1346
+ "name": "read_file",
1347
+ "input": {"path": "metadata.json"},
1348
+ "cache_checkpoint": False,
1349
+ },
1350
+ {
1351
+ "type": "tool_call",
1352
+ "id": "analysis_tool3",
1353
+ "name": "list_dir",
1354
+ "input": {"path": "analysis_results"},
1355
+ "cache_checkpoint": False,
1356
+ },
1357
+ ],
1358
+ "stop_reason": "tool_use",
1359
+ },
1360
+ {
1361
+ "role": "user",
1362
+ "content": [
1363
+ {
1364
+ "type": "tool_result",
1365
+ "tool_use_id": "analysis_tool1",
1366
+ "content": "CSV data with 1000 rows, 5 columns",
1367
+ "name": "read_file",
1368
+ "is_error": False,
1369
+ "cache_checkpoint": False,
1370
+ }
1371
+ # Missing analysis_tool2 and analysis_tool3 results (interrupted)
1372
+ ],
1373
+ "stop_reason": None,
1374
+ },
1375
+ {
1376
+ "role": "user",
1377
+ "content": [{"type": "text", "text": "What did you find in the analysis?", "cache_checkpoint": False}],
1378
+ "stop_reason": None,
1379
+ },
1380
+ ]
1381
+
1382
+ try:
1383
+ # Restore the corrupted history (this should NOT fix it)
1384
+ base_agent.restore_message_history(serialized_corrupted_history)
1385
+
1386
+ # Verify the restored history is still invalid
1387
+ assert base_agent._is_history_valid_for_anthropic() is False
1388
+
1389
+ # Fix the history manually
1390
+ fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
1391
+
1392
+ # Verify the fix was applied correctly
1393
+ # Should have merged placeholders for missing tool results
1394
+ tool_result_message = None
1395
+ for msg in fixed_history:
1396
+ if msg.role == "user" and any(isinstance(block, ToolResult) for block in msg.content):
1397
+ tool_result_message = msg
1398
+ break
1399
+
1400
+ assert tool_result_message is not None
1401
+ tool_results = [block for block in tool_result_message.content if isinstance(block, ToolResult)]
1402
+ assert len(tool_results) == 3 # Should now have all 3 tool results
1403
+
1404
+ # Check that placeholders were added for missing results
1405
+ tool_result_ids = {result.tool_use_id for result in tool_results}
1406
+ assert tool_result_ids == {"analysis_tool1", "analysis_tool2", "analysis_tool3"}
1407
+
1408
+ # Test that the fixed history works with real Anthropic API
1409
+ system_message = Message(role="system", content=[TextBlock(text="You are a data analysis assistant.")])
1410
+
1411
+ # Call the real LLM API with the fixed history
1412
+ response = await base_agent.llm.generate(
1413
+ messages=fixed_history,
1414
+ system=system_message,
1415
+ model=base_agent.config.long_context_config.model,
1416
+ max_completion_tokens=200,
1417
+ )
1418
+
1419
+ # Verify we got a valid response
1420
+ assert response is not None
1421
+ response_text = response.get_text_content()
1422
+ assert isinstance(response_text, str)
1423
+ assert len(response_text.strip()) > 0
1424
+
1425
+ except Exception as e:
1426
+ pytest.fail(f"Real API call failed with restored corrupted history: {str(e)}")
1427
+
1428
+ @pytest.mark.slow
1429
+ @pytest.mark.integration
1430
+ @pytest.mark.asyncio
1431
+ async def test_fix_consecutive_tool_interruptions_with_real_api(self, base_agent):
1432
+ """Integration test: Fix consecutive tool call interruptions and verify with real API."""
1433
+ # Skip if no API key is available
1434
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1435
+ if not api_key or api_key == "test_key":
1436
+ pytest.skip("No valid API key available for LLM provider")
1437
+
1438
+ # Create a corrupted history with consecutive interruptions
1439
+ corrupted_history = [
1440
+ Message(role="user", content=[TextBlock(text="Help me manage multiple files.")]),
1441
+ Message(role="assistant", content=[TextBlock(text="I'll help you manage your files systematically.")]),
1442
+ # First tool call sequence
1443
+ Message(role="assistant", content=[ToolCall(id="seq1_tool", name="list_dir", input={"path": "."})]),
1444
+ Message(
1445
+ role="user",
1446
+ content=[
1447
+ ToolResult(
1448
+ tool_use_id="seq1_tool", name="list_dir", content="file1.txt, file2.txt, dir1/", is_error=False
1449
+ )
1450
+ ],
1451
+ ),
1452
+ # Second tool call sequence - partially interrupted
1453
+ Message(
1454
+ role="assistant",
1455
+ content=[
1456
+ ToolCall(id="seq2_tool1", name="read_file", input={"path": "file1.txt"}),
1457
+ ToolCall(id="seq2_tool2", name="read_file", input={"path": "file2.txt"}),
1458
+ ],
1459
+ ),
1460
+ Message(
1461
+ role="user",
1462
+ content=[
1463
+ ToolResult(tool_use_id="seq2_tool1", name="read_file", content="Content of file1", is_error=False)
1464
+ # Missing seq2_tool2 result
1465
+ ],
1466
+ ),
1467
+ # Third tool call sequence - completely interrupted
1468
+ Message(
1469
+ role="assistant",
1470
+ content=[
1471
+ ToolCall(id="seq3_tool1", name="write_file", input={"path": "summary.txt", "content": "Summary"}),
1472
+ ToolCall(id="seq3_tool2", name="list_dir", input={"path": "dir1"}),
1473
+ ],
1474
+ ),
1475
+ # No user message for third sequence (interrupted)
1476
+ ]
1477
+
1478
+ # Verify the corrupted history is invalid
1479
+ assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
1480
+
1481
+ # Fix the corrupted history
1482
+ fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
1483
+
1484
+ # Verify the fix worked
1485
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1486
+
1487
+ # Set up the fixed history in the agent
1488
+ base_agent.history = MessageHistory(fixed_history)
1489
+
1490
+ try:
1491
+ # Test with a follow-up conversation
1492
+ system_message = Message(
1493
+ role="system",
1494
+ content=[
1495
+ TextBlock(
1496
+ text="You are a file management assistant. When operations are interrupted, acknowledge this and offer to retry."
1497
+ )
1498
+ ],
1499
+ )
1500
+
1501
+ # Add a new user message
1502
+ base_agent.history.append(
1503
+ Message(
1504
+ role="user",
1505
+ content=[
1506
+ TextBlock(
1507
+ text="Some operations seem to have been interrupted. Can you tell me what happened and what we should do next?"
1508
+ )
1509
+ ],
1510
+ )
1511
+ )
1512
+
1513
+ # Call the real LLM API
1514
+ response = await base_agent.llm.generate(
1515
+ messages=base_agent.history,
1516
+ system=system_message,
1517
+ model=base_agent.config.long_context_config.model,
1518
+ max_completion_tokens=250,
1519
+ )
1520
+
1521
+ # Verify we got a valid response
1522
+ assert response is not None
1523
+ response_text = response.get_text_content()
1524
+ assert isinstance(response_text, str)
1525
+ assert len(response_text.strip()) > 0
1526
+
1527
+ # The response should acknowledge the interruptions
1528
+ response_lower = response_text.lower()
1529
+ assert any(word in response_lower for word in ["interrupt", "error", "retry", "again", "issue"])
1530
+
1531
+ except Exception as e:
1532
+ pytest.fail(f"Real API call failed with consecutive tool interruptions: {str(e)}")
1533
+
1534
+ @pytest.mark.slow
1535
+ @pytest.mark.integration
1536
+ @pytest.mark.asyncio
1537
+ async def test_edge_case_tool_corruption_with_real_api(self, base_agent):
1538
+ """Integration test: Test edge case corruptions that might break the API."""
1539
+ # Skip if no API key is available
1540
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1541
+ if not api_key or api_key == "test_key":
1542
+ pytest.skip("No valid API key available for LLM provider")
1543
+
1544
+ # Create an edge case: assistant message with tools followed by another assistant message
1545
+ edge_case_history = [
1546
+ Message(role="user", content=[TextBlock(text="Process this complex workflow.")]),
1547
+ Message(role="assistant", content=[TextBlock(text="I'll process the workflow step by step.")]),
1548
+ # Assistant with tools
1549
+ Message(
1550
+ role="assistant",
1551
+ content=[
1552
+ ToolCall(id="workflow_step1", name="read_file", input={"path": "config.yaml"}),
1553
+ ToolCall(id="workflow_step2", name="validate_data", input={"data": "test"}),
1554
+ ToolCall(id="workflow_step3", name="process_workflow", input={"step": 1}),
1555
+ ],
1556
+ ),
1557
+ # Another assistant message (invalid - should have user message with tool results first)
1558
+ Message(role="assistant", content=[TextBlock(text="Let me continue with the next steps.")]),
1559
+ # User asking about status
1560
+ Message(role="user", content=[TextBlock(text="How is the workflow going?")]),
1561
+ ]
1562
+
1563
+ # Verify this edge case is invalid
1564
+ assert base_agent._is_history_valid_for_anthropic(edge_case_history) is False
1565
+
1566
+ # Fix the edge case
1567
+ fixed_history = base_agent.fix_incomplete_tool_calls(edge_case_history)
1568
+
1569
+ # Verify the fix worked
1570
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1571
+
1572
+ # Set up the fixed history
1573
+ base_agent.history = MessageHistory(fixed_history)
1574
+
1575
+ try:
1576
+ # Test with real API
1577
+ system_message = Message(
1578
+ role="system", content=[TextBlock(text="You are a workflow processing assistant.")]
1579
+ )
1580
+
1581
+ # Call the real LLM API
1582
+ response = await base_agent.llm.generate(
1583
+ messages=base_agent.history,
1584
+ system=system_message,
1585
+ model=base_agent.config.long_context_config.model,
1586
+ max_completion_tokens=150,
1587
+ )
1588
+
1589
+ # Verify we got a valid response
1590
+ assert response is not None
1591
+ response_text = response.get_text_content()
1592
+ assert isinstance(response_text, str)
1593
+ assert len(response_text.strip()) > 0
1594
+
1595
+ except Exception as e:
1596
+ pytest.fail(f"Real API call failed with edge case corruption: {str(e)}")
1597
+
1598
+ # Tests for new safe append methods
1599
+ def test_append_user_message_with_incomplete_tool_calls(self, base_agent):
1600
+ """Test that append_user_message does NOT fix incomplete tool calls."""
1601
+ # Add assistant message with tool calls
1602
+ base_agent.history.append(Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]))
1603
+
1604
+ # Append user message - should NOT fix the history
1605
+ base_agent.append_user_message("New user message")
1606
+
1607
+ # Verify history was NOT fixed - append should preserve authentic history
1608
+ assert len(base_agent.history) == 2 # assistant, user (new message)
1609
+ assert not base_agent._is_history_valid_for_anthropic() # Still invalid
1610
+
1611
+ # Check the new message was added
1612
+ new_msg = base_agent.history[1]
1613
+ assert new_msg.role == "user"
1614
+ assert new_msg.content[0].text == "New user message"
1615
+
1616
+ # But verify that fix_incomplete_tool_calls can fix it
1617
+ fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
1618
+ assert len(fixed_history) == 3 # assistant, user (tool result), user (new message)
1619
+ assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
1620
+
1621
+ def test_append_user_message_no_fix_needed(self, base_agent):
1622
+ """Test that append_user_message works normally when no fix needed."""
1623
+ # Add a normal message
1624
+ base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
1625
+
1626
+ # Append another user message
1627
+ base_agent.append_user_message("Another message")
1628
+
1629
+ # Should just append normally
1630
+ assert len(base_agent.history) == 2
1631
+ assert base_agent.history[1].content[0].text == "Another message"
1632
+
1633
+ def test_append_user_message_with_list_content(self, base_agent):
1634
+ """Test append_user_message with list of ContentBlocks."""
1635
+ content_blocks = [TextBlock(text="Message part 1"), TextBlock(text="Message part 2")]
1636
+
1637
+ base_agent.append_user_message(content_blocks)
1638
+
1639
+ assert len(base_agent.history) == 1
1640
+ assert len(base_agent.history[0].content) == 2
1641
+ assert base_agent.history[0].content[0].text == "Message part 1"
1642
+ assert base_agent.history[0].content[1].text == "Message part 2"
1643
+
1644
+ def test_append_user_message_with_single_block(self, base_agent):
1645
+ """Test append_user_message with single ContentBlock."""
1646
+ content_block = TextBlock(text="Single block message")
1647
+
1648
+ base_agent.append_user_message(content_block)
1649
+
1650
+ assert len(base_agent.history) == 1
1651
+ assert len(base_agent.history[0].content) == 1
1652
+ assert base_agent.history[0].content[0].text == "Single block message"
1653
+
1654
+ def test_append_assistant_message(self, base_agent):
1655
+ """Test that append_assistant_message works correctly."""
1656
+ # Add a user message first
1657
+ base_agent.append_user_message("User question")
1658
+
1659
+ # Add assistant message
1660
+ assistant_msg = Message(role="assistant", content=[TextBlock(text="Assistant response")])
1661
+ base_agent.append_assistant_message(assistant_msg)
1662
+
1663
+ assert len(base_agent.history) == 2
1664
+ assert base_agent.history[1].role == "assistant"
1665
+ assert base_agent.history[1].content[0].text == "Assistant response"
1666
+
1667
+ def test_get_effective_history_preserves_thinking_blocks(self, base_agent):
1668
+ base_agent.history = MessageHistory(
1669
+ [
1670
+ Message(
1671
+ role="assistant",
1672
+ content=[
1673
+ ThinkingBlock(thinking="unsigned thinking"),
1674
+ ThinkingBlock(thinking="signed thinking", signature="sig"),
1675
+ RedactedThinkingBlock(data="encrypted-redacted-thinking"),
1676
+ TextBlock(text="final answer"),
1677
+ ],
1678
+ )
1679
+ ]
1680
+ )
1681
+
1682
+ effective = base_agent.get_effective_history_for_llm()
1683
+
1684
+ assert len(effective) == 1
1685
+ assert [block.type for block in effective[0].content] == [
1686
+ "thinking",
1687
+ "thinking",
1688
+ "redacted_thinking",
1689
+ "text",
1690
+ ]
1691
+ assert effective[0].content[0].thinking == "unsigned thinking"
1692
+ assert effective[0].content[1].thinking == "signed thinking"
1693
+ assert effective[0].content[1].signature == "sig"
1694
+ assert effective[0].content[2].data == "encrypted-redacted-thinking"
1695
+
1696
+ def test_extend_history_no_fix_needed(self, base_agent):
1697
+ """Test extend_history works normally when no fix needed."""
1698
+ # Start with valid history
1699
+ base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
1700
+ base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Hi there")]))
1701
+
1702
+ # Extend with more messages
1703
+ new_messages = [
1704
+ Message(role="user", content=[TextBlock(text="How are you?")]),
1705
+ Message(role="assistant", content=[TextBlock(text="I'm doing well")]),
1706
+ ]
1707
+
1708
+ base_agent.extend_history(new_messages)
1709
+
1710
+ assert len(base_agent.history) == 4
1711
+ assert base_agent.history[2].content[0].text == "How are you?"
1712
+ assert base_agent.history[3].content[0].text == "I'm doing well"
1713
+
1714
+ def test_needs_tool_call_fix(self, base_agent):
1715
+ """Test the _needs_tool_call_fix method."""
1716
+ # Empty history
1717
+ assert not base_agent._needs_tool_call_fix()
1718
+
1719
+ # User message last
1720
+ base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
1721
+ assert not base_agent._needs_tool_call_fix()
1722
+
1723
+ # Assistant message without tools
1724
+ base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Hi")]))
1725
+ assert not base_agent._needs_tool_call_fix()
1726
+
1727
+ # Assistant message with tools
1728
+ base_agent.history.append(Message(role="assistant", content=[ToolCall(id="tool1", name="test", input={})]))
1729
+ assert base_agent._needs_tool_call_fix()
1730
+
1731
+ def test_needs_tool_call_fix_with_mixed_content(self, base_agent):
1732
+ """Test _needs_tool_call_fix with mixed content blocks."""
1733
+ # Assistant message with text and tool calls
1734
+ base_agent.history.append(
1735
+ Message(
1736
+ role="assistant",
1737
+ content=[
1738
+ TextBlock(text="Let me help you with that."),
1739
+ ToolCall(id="tool1", name="read_file", input={"path": "test.txt"}),
1740
+ ],
1741
+ )
1742
+ )
1743
+
1744
+ assert base_agent._needs_tool_call_fix()
1745
+
1746
+ def test_needs_tool_call_fix_string_content(self, base_agent):
1747
+ """Test _needs_tool_call_fix with string content (edge case)."""
1748
+ # This shouldn't happen in practice, but test the edge case
1749
+ base_agent.history.append(Message(role="assistant", content="Just a string"))
1750
+
1751
+ assert not base_agent._needs_tool_call_fix()
1752
+
1753
+ @pytest.mark.slow
1754
+ @pytest.mark.integration
1755
+ @pytest.mark.asyncio
1756
+ async def test_safe_append_with_real_api(self, base_agent):
1757
+ """Test that append methods work with real Anthropic API when history is fixed."""
1758
+ # Skip if no API key is available
1759
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1760
+ if not api_key or api_key == "test_key":
1761
+ pytest.skip("No valid API key available for LLM provider")
1762
+
1763
+ # Set up history with tool calls
1764
+ base_agent.history.append(Message(role="user", content=[TextBlock(text="Read the README file")]))
1765
+ base_agent.history.append(
1766
+ Message(
1767
+ role="assistant",
1768
+ content=[
1769
+ TextBlock(text="I'll read the README file for you."),
1770
+ ToolCall(id="tool1", name="read_file", input={"path": "README.md"}),
1771
+ ],
1772
+ )
1773
+ )
1774
+
1775
+ # Append user message - history is now invalid
1776
+ base_agent.append_user_message("What does it say?")
1777
+
1778
+ # Verify history is invalid
1779
+ assert not base_agent._is_history_valid_for_anthropic()
1780
+
1781
+ # Fix history before API call
1782
+ fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
1783
+
1784
+ # Verify we can make an API call with fixed history
1785
+ system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
1786
+
1787
+ try:
1788
+ response = await base_agent.llm.generate(
1789
+ messages=fixed_history,
1790
+ system=system_message,
1791
+ model=base_agent.config.long_context_config.model,
1792
+ max_completion_tokens=200,
1793
+ )
1794
+
1795
+ assert response is not None
1796
+ assert response.get_text_content()
1797
+ except Exception as e:
1798
+ pytest.fail(f"API call failed after fixing history: {str(e)}")
1799
+
1800
+ def test_append_user_message_multiple_incomplete_sequences(self, base_agent):
1801
+ """Test append_user_message does NOT fix multiple incomplete sequences."""
1802
+ # Create history with multiple incomplete tool sequences
1803
+ base_agent.history = MessageHistory(
1804
+ [
1805
+ Message(role="user", content=[TextBlock(text="Initial request")]),
1806
+ Message(
1807
+ role="assistant",
1808
+ content=[
1809
+ ToolCall(id="tool1", name="first_tool", input={}),
1810
+ ToolCall(id="tool2", name="second_tool", input={}),
1811
+ ],
1812
+ ),
1813
+ Message(
1814
+ role="user",
1815
+ content=[
1816
+ ToolResult(tool_use_id="tool1", name="first_tool", content="Result 1", is_error=False)
1817
+ # Missing tool2 result
1818
+ ],
1819
+ ),
1820
+ Message(role="assistant", content=[ToolCall(id="tool3", name="third_tool", input={})]),
1821
+ # Missing tool3 result
1822
+ ]
1823
+ )
1824
+
1825
+ # Append new user message
1826
+ base_agent.append_user_message("Continue with the task")
1827
+
1828
+ # History should still be invalid - append doesn't fix
1829
+ assert not base_agent._is_history_valid_for_anthropic()
1830
+
1831
+ # But fix_incomplete_tool_calls should be able to fix it
1832
+ fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
1833
+ assert base_agent._is_history_valid_for_anthropic(fixed_history)
1834
+
1835
+ # Verify the fixed history has all tool results
1836
+ tool_results = []
1837
+ for msg in fixed_history:
1838
+ if msg.role == "user":
1839
+ tool_results.extend([b for b in msg.content if isinstance(b, ToolResult)])
1840
+
1841
+ tool_result_ids = {r.tool_use_id for r in tool_results}
1842
+ assert "tool2" in tool_result_ids # Should have placeholder for tool2
1843
+ assert "tool3" in tool_result_ids # Should have placeholder for tool3
1844
+
1845
+ @pytest.mark.slow
1846
+ @pytest.mark.integration
1847
+ @pytest.mark.asyncio
1848
+ async def test_restore_and_append_scenario(self, base_agent):
1849
+ """Test the scenario where restore and append don't fix, but LLM call fixes."""
1850
+ # Skip if no API key is available
1851
+ api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
1852
+ if not api_key or api_key == "test_key":
1853
+ pytest.skip("No valid API key available for LLM provider")
1854
+
1855
+ # Create a serialized history that ends with tool calls (simulating what's in the DB)
1856
+ serialized_history = [
1857
+ {
1858
+ "role": "user",
1859
+ "content": [{"type": "text", "text": "Help me with a task", "cache_checkpoint": False}],
1860
+ "stop_reason": None,
1861
+ },
1862
+ {
1863
+ "role": "assistant",
1864
+ "content": [{"type": "text", "text": "I'll help you with that task.", "cache_checkpoint": False}],
1865
+ "stop_reason": None,
1866
+ },
1867
+ {
1868
+ "role": "assistant",
1869
+ "content": [
1870
+ {
1871
+ "type": "tool_call",
1872
+ "id": "tool_1",
1873
+ "name": "read_file",
1874
+ "input": {"path": "task.txt"},
1875
+ "cache_checkpoint": False,
1876
+ }
1877
+ ],
1878
+ "stop_reason": "tool_use",
1879
+ },
1880
+ # Missing tool result - simulating an interrupted session
1881
+ ]
1882
+
1883
+ # Restore the history (should NOT auto-fix)
1884
+ base_agent.restore_message_history(serialized_history)
1885
+
1886
+ # Verify history is still invalid
1887
+ assert not base_agent._is_history_valid_for_anthropic()
1888
+
1889
+ # Add a new user message (history remains invalid)
1890
+ base_agent.append_user_message("What's the status of my task?")
1891
+
1892
+ # Verify the history is still invalid
1893
+ assert not base_agent._is_history_valid_for_anthropic()
1894
+
1895
+ # Fix history before API call
1896
+ fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
1897
+
1898
+ # Test with real API
1899
+ system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
1900
+
1901
+ try:
1902
+ response = await base_agent.llm.generate(
1903
+ messages=fixed_history,
1904
+ system=system_message,
1905
+ model=base_agent.config.long_context_config.model,
1906
+ max_completion_tokens=100,
1907
+ )
1908
+
1909
+ assert response is not None
1910
+ assert response.get_text_content()
1911
+ except Exception as e:
1912
+ # If this fails with the tool_use_id error, our fix didn't work
1913
+ pytest.fail(f"API call failed with fixed history: {str(e)}")
1914
+
1915
+ def test_get_effective_history_falls_back_when_no_compression(self, base_agent):
1916
+ # With no compression, effective == full history
1917
+ base_agent.history = MessageHistory(
1918
+ [
1919
+ Message(role="user", content=[TextBlock(text="hi")]),
1920
+ Message(role="assistant", content=[TextBlock(text="yo")]),
1921
+ ]
1922
+ )
1923
+ eff = base_agent.get_effective_history_for_llm()
1924
+ assert len(eff) == 2
1925
+
1926
+ def test_get_effective_history_after_markers(self, base_agent):
1927
+ base_agent.history = MessageHistory(
1928
+ [
1929
+ Message(role="user", content=[TextBlock(text="a")]),
1930
+ Message(role="assistant", content=[TextBlock(text="b")]),
1931
+ Message(role="user", content=[TextBlock(text="c")]),
1932
+ ]
1933
+ )
1934
+ base_agent.last_compression_index = 2
1935
+ # Append a summary message as it would be after compression
1936
+ base_agent.history.append(
1937
+ Message(role="user", content=[TextBlock(text="CONVERSATION HISTORY SUMMARY (compressed at ...)")])
1938
+ )
1939
+ eff = base_agent.get_effective_history_for_llm()
1940
+ # boundary is 2, so tail is after index 2 -> empty, but we still have summary
1941
+ assert len(eff) == 1
1942
+ assert "CONVERSATION HISTORY SUMMARY" in eff[0].content[0].text