kolega-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolega_code/__init__.py +151 -0
- kolega_code/agent/__init__.py +42 -0
- kolega_code/agent/baseagent.py +998 -0
- kolega_code/agent/browseragent.py +123 -0
- kolega_code/agent/coder.py +157 -0
- kolega_code/agent/common.py +41 -0
- kolega_code/agent/compression.py +81 -0
- kolega_code/agent/context.py +112 -0
- kolega_code/agent/conversation.py +408 -0
- kolega_code/agent/generalagent.py +146 -0
- kolega_code/agent/investigationagent.py +123 -0
- kolega_code/agent/planningagent.py +187 -0
- kolega_code/agent/prompt_provider.py +196 -0
- kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
- kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
- kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
- kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
- kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
- kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
- kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
- kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
- kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
- kolega_code/agent/prompts.py +192 -0
- kolega_code/agent/tests/__init__.py +0 -0
- kolega_code/agent/tests/llm/__init__.py +0 -0
- kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
- kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
- kolega_code/agent/tests/llm/test_client.py +773 -0
- kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
- kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
- kolega_code/agent/tests/llm/test_exceptions.py +249 -0
- kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
- kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
- kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
- kolega_code/agent/tests/llm/test_model_specs.py +17 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
- kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
- kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
- kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
- kolega_code/agent/tests/services/__init__.py +1 -0
- kolega_code/agent/tests/services/test_browser.py +447 -0
- kolega_code/agent/tests/services/test_browser_parity.py +353 -0
- kolega_code/agent/tests/services/test_file_system.py +699 -0
- kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
- kolega_code/agent/tests/services/test_terminal.py +154 -0
- kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
- kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
- kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
- kolega_code/agent/tests/test_base_agent.py +1942 -0
- kolega_code/agent/tests/test_coder_attachments.py +330 -0
- kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
- kolega_code/agent/tests/test_commands.py +179 -0
- kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
- kolega_code/agent/tests/test_empty_message_handling.py +48 -0
- kolega_code/agent/tests/test_general_agent.py +242 -0
- kolega_code/agent/tests/test_html.py +320 -0
- kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
- kolega_code/agent/tests/test_planning_agent.py +227 -0
- kolega_code/agent/tests/test_prompt_provider.py +271 -0
- kolega_code/agent/tests/test_tool_registry.py +102 -0
- kolega_code/agent/tests/test_tools.py +549 -0
- kolega_code/agent/tests/tool_backend/__init__.py +0 -0
- kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
- kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
- kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
- kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
- kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
- kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
- kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
- kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
- kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
- kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
- kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
- kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
- kolega_code/agent/tool_backend/agent_tool.py +414 -0
- kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
- kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
- kolega_code/agent/tool_backend/base_tool.py +217 -0
- kolega_code/agent/tool_backend/browser_tool.py +271 -0
- kolega_code/agent/tool_backend/build_tool.py +93 -0
- kolega_code/agent/tool_backend/create_file_tool.py +52 -0
- kolega_code/agent/tool_backend/glob_tool.py +323 -0
- kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
- kolega_code/agent/tool_backend/memory_tool.py +79 -0
- kolega_code/agent/tool_backend/read_file_tool.py +119 -0
- kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
- kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
- kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
- kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
- kolega_code/agent/tool_backend/streaming_tool.py +47 -0
- kolega_code/agent/tool_backend/terminal_tool.py +643 -0
- kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
- kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
- kolega_code/agent/tools.py +1704 -0
- kolega_code/agent/utils/commands.py +94 -0
- kolega_code/cli/__init__.py +1 -0
- kolega_code/cli/app.py +2756 -0
- kolega_code/cli/config.py +280 -0
- kolega_code/cli/connection.py +49 -0
- kolega_code/cli/file_index.py +147 -0
- kolega_code/cli/main.py +564 -0
- kolega_code/cli/mentions.py +155 -0
- kolega_code/cli/messages.py +89 -0
- kolega_code/cli/provider_registry.py +96 -0
- kolega_code/cli/session_store.py +207 -0
- kolega_code/cli/settings.py +87 -0
- kolega_code/cli/skills.py +409 -0
- kolega_code/cli/slash_commands.py +108 -0
- kolega_code/cli/tests/__init__.py +1 -0
- kolega_code/cli/tests/test_app.py +4251 -0
- kolega_code/cli/tests/test_cli_config.py +171 -0
- kolega_code/cli/tests/test_connection.py +26 -0
- kolega_code/cli/tests/test_file_index.py +103 -0
- kolega_code/cli/tests/test_main.py +455 -0
- kolega_code/cli/tests/test_mentions.py +108 -0
- kolega_code/cli/tests/test_session_store.py +67 -0
- kolega_code/cli/tests/test_settings.py +62 -0
- kolega_code/cli/tests/test_skills.py +157 -0
- kolega_code/cli/tests/test_slash_commands.py +88 -0
- kolega_code/cli/theme.py +180 -0
- kolega_code/config.py +154 -0
- kolega_code/events.py +202 -0
- kolega_code/llm/client.py +300 -0
- kolega_code/llm/exceptions.py +285 -0
- kolega_code/llm/instrumented_client.py +520 -0
- kolega_code/llm/models.py +1368 -0
- kolega_code/llm/providers/__init__.py +0 -0
- kolega_code/llm/providers/anthropic.py +387 -0
- kolega_code/llm/providers/base.py +71 -0
- kolega_code/llm/providers/google.py +157 -0
- kolega_code/llm/providers/models.py +37 -0
- kolega_code/llm/providers/openai.py +363 -0
- kolega_code/llm/ratelimit.py +40 -0
- kolega_code/llm/specs.py +67 -0
- kolega_code/llm/tool_execution_ids.py +18 -0
- kolega_code/models/__init__.py +9 -0
- kolega_code/models/sandbox_terminal_state.py +47 -0
- kolega_code/runtime.py +50 -0
- kolega_code/sandbox/README.md +200 -0
- kolega_code/sandbox/__init__.py +21 -0
- kolega_code/sandbox/async_filesystem.py +475 -0
- kolega_code/sandbox/base.py +297 -0
- kolega_code/sandbox/browser.py +25 -0
- kolega_code/sandbox/event_loop.py +43 -0
- kolega_code/sandbox/filesystem.py +341 -0
- kolega_code/sandbox/local.py +118 -0
- kolega_code/sandbox/serializer.py +175 -0
- kolega_code/sandbox/terminal.py +868 -0
- kolega_code/sandbox/utils.py +216 -0
- kolega_code/services/base.py +255 -0
- kolega_code/services/browser.py +444 -0
- kolega_code/services/file_system.py +749 -0
- kolega_code/services/html.py +221 -0
- kolega_code/services/terminal.py +903 -0
- kolega_code/tools/__init__.py +22 -0
- kolega_code/tools/core.py +33 -0
- kolega_code/tools/definitions.py +81 -0
- kolega_code/tools/registry.py +73 -0
- kolega_code-0.1.0.dist-info/METADATA +157 -0
- kolega_code-0.1.0.dist-info/RECORD +171 -0
- kolega_code-0.1.0.dist-info/WHEEL +4 -0
- kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
- kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1942 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import uuid
|
|
3
|
+
from types import SimpleNamespace
|
|
4
|
+
from unittest.mock import AsyncMock, patch
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from kolega_code.agent.baseagent import BaseAgent
|
|
10
|
+
from kolega_code.config import AgentConfig, ModelConfig, ModelProvider, RateLimitConfig
|
|
11
|
+
from kolega_code.events import AgentConnectionManager
|
|
12
|
+
from kolega_code.llm.models import (
|
|
13
|
+
Message,
|
|
14
|
+
MessageHistory,
|
|
15
|
+
RedactedThinkingBlock,
|
|
16
|
+
TextBlock,
|
|
17
|
+
ThinkingBlock,
|
|
18
|
+
ToolCall,
|
|
19
|
+
ToolResult,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Load environment variables
|
|
23
|
+
load_dotenv()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture
|
|
27
|
+
def agent_config():
|
|
28
|
+
return AgentConfig(
|
|
29
|
+
anthropic_api_key=os.getenv("ANTHROPIC_API_KEY", "test_key"), # Use actual API key from environment
|
|
30
|
+
openai_api_key="test-key",
|
|
31
|
+
long_context_config=ModelConfig(
|
|
32
|
+
provider=ModelProvider.ANTHROPIC,
|
|
33
|
+
model="claude-haiku-4-5-20251001", # Using a valid model name
|
|
34
|
+
rate_limits=RateLimitConfig(),
|
|
35
|
+
),
|
|
36
|
+
fast_config=ModelConfig(
|
|
37
|
+
provider=ModelProvider.ANTHROPIC,
|
|
38
|
+
model="claude-haiku-4-5-20251001", # Using a valid model name
|
|
39
|
+
rate_limits=RateLimitConfig(),
|
|
40
|
+
),
|
|
41
|
+
thinking_config=ModelConfig(
|
|
42
|
+
provider=ModelProvider.ANTHROPIC,
|
|
43
|
+
model="claude-haiku-4-5-20251001", # Using a valid model name
|
|
44
|
+
rate_limits=RateLimitConfig(),
|
|
45
|
+
thinking_tokens=1024,
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def mock_connection_manager():
|
|
52
|
+
return AsyncMock(spec=AgentConnectionManager)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.fixture
|
|
56
|
+
def base_agent(tmp_path, mock_connection_manager, agent_config):
|
|
57
|
+
return BaseAgent(
|
|
58
|
+
project_path=tmp_path,
|
|
59
|
+
workspace_id="test_workspace",
|
|
60
|
+
thread_id=str(uuid.uuid4()), # Add thread_id
|
|
61
|
+
connection_manager=mock_connection_manager,
|
|
62
|
+
config=agent_config,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestBaseAgent:
|
|
67
|
+
@pytest.mark.asyncio
|
|
68
|
+
async def test_execute_single_tool_uses_execution_id_for_app_events_and_provider_id_for_result(self, base_agent):
|
|
69
|
+
class TestTools:
|
|
70
|
+
def get_tool_list(self):
|
|
71
|
+
return [SimpleNamespace(name="dispatch_investigation_agent")]
|
|
72
|
+
|
|
73
|
+
def registry(self):
|
|
74
|
+
from kolega_code.agent.tools import ToolCollection
|
|
75
|
+
from kolega_code.llm.models import ToolDefinition
|
|
76
|
+
from kolega_code.tools import Tool, ToolRegistry
|
|
77
|
+
|
|
78
|
+
parallel = set(ToolCollection.read_only_tools) | set(ToolCollection.agent_dispatch_tools)
|
|
79
|
+
registry = ToolRegistry()
|
|
80
|
+
for spec in self.get_tool_list():
|
|
81
|
+
registry.add(
|
|
82
|
+
Tool(
|
|
83
|
+
name=spec.name,
|
|
84
|
+
definition=ToolDefinition(name=spec.name, description="", parameters=[]),
|
|
85
|
+
handler=getattr(self, spec.name),
|
|
86
|
+
parallel_safe=spec.name in parallel,
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
return registry
|
|
90
|
+
|
|
91
|
+
async def dispatch_investigation_agent(self, **_inputs):
|
|
92
|
+
return "investigation complete"
|
|
93
|
+
|
|
94
|
+
tool_call = ToolCall(
|
|
95
|
+
id="dispatch_investigation_agent_0",
|
|
96
|
+
name="dispatch_investigation_agent",
|
|
97
|
+
input={"task": "check this"},
|
|
98
|
+
execution_id="tool_exec_unique_123",
|
|
99
|
+
)
|
|
100
|
+
base_agent.tool_collection = TestTools()
|
|
101
|
+
base_agent.send_chat_message = AsyncMock()
|
|
102
|
+
base_agent.log_info = AsyncMock()
|
|
103
|
+
|
|
104
|
+
result = await base_agent.execute_single_tool(tool_call)
|
|
105
|
+
|
|
106
|
+
assert result.tool_use_id == "dispatch_investigation_agent_0"
|
|
107
|
+
assert result.execution_id == "tool_exec_unique_123"
|
|
108
|
+
assert base_agent.send_chat_message.call_args_list[0].kwargs["tool_call_id"] == "tool_exec_unique_123"
|
|
109
|
+
assert base_agent.send_chat_message.call_args_list[1].kwargs["tool_call_id"] == "tool_exec_unique_123"
|
|
110
|
+
assert base_agent.current_tool_call_id is None
|
|
111
|
+
assert base_agent.current_tool_execution_id is None
|
|
112
|
+
assert base_agent.current_provider_tool_call_id is None
|
|
113
|
+
|
|
114
|
+
@pytest.mark.asyncio
|
|
115
|
+
async def testcompress_history(self, base_agent):
|
|
116
|
+
# Setup test data
|
|
117
|
+
conversation = [
|
|
118
|
+
("user", "Message 1"),
|
|
119
|
+
("assistant", "Response 1"),
|
|
120
|
+
("user", "Message 2"),
|
|
121
|
+
("assistant", "Response 2"),
|
|
122
|
+
("user", "Message 3"),
|
|
123
|
+
("assistant", "Response 3"),
|
|
124
|
+
("user", "Message 4"),
|
|
125
|
+
("assistant", "Response 4"),
|
|
126
|
+
("user", "Message 5"),
|
|
127
|
+
("assistant", "Response 5"),
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
base_agent.history = MessageHistory(
|
|
131
|
+
[Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Mock the LLM response
|
|
135
|
+
mock_response = Message(
|
|
136
|
+
role="assistant", content=[TextBlock(text="This is a compressed summary of the conversation")]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Mock the LLM client's generate method
|
|
140
|
+
with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
|
|
141
|
+
mock_generate.return_value = mock_response
|
|
142
|
+
|
|
143
|
+
# Call the method (non-destructive)
|
|
144
|
+
await base_agent.compress_history()
|
|
145
|
+
|
|
146
|
+
# Verify full history retained plus appended summary
|
|
147
|
+
assert len(base_agent.history) == len(conversation) + 1
|
|
148
|
+
# Verify markers set and effective history contains summary only (single-message effective)
|
|
149
|
+
assert base_agent.last_compression_index == len(conversation) - 1
|
|
150
|
+
effective = base_agent.get_effective_history_for_llm()
|
|
151
|
+
assert len(effective) == 1 # only the summary is used for LLM
|
|
152
|
+
|
|
153
|
+
# Verify the LLM was called with correct parameters
|
|
154
|
+
mock_generate.assert_called_once()
|
|
155
|
+
call_args = mock_generate.call_args[1]
|
|
156
|
+
assert call_args["model"] == base_agent.config.long_context_config.model
|
|
157
|
+
assert (
|
|
158
|
+
call_args["max_completion_tokens"] == base_agent.model_completion_tokens
|
|
159
|
+
) # Use the model's actual limit
|
|
160
|
+
|
|
161
|
+
@pytest.mark.asyncio
|
|
162
|
+
async def testcompress_history_insufficient_history(self, base_agent):
|
|
163
|
+
# Setup test data with less than 5 messages
|
|
164
|
+
conversation = [
|
|
165
|
+
("user", "Message 1"),
|
|
166
|
+
("assistant", "Response 1"),
|
|
167
|
+
("user", "Message 2"),
|
|
168
|
+
("assistant", "Response 2"),
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
base_agent.history = MessageHistory(
|
|
172
|
+
[Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Mock the LLM client's generate method
|
|
176
|
+
with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
|
|
177
|
+
# Call the method
|
|
178
|
+
await base_agent.compress_history()
|
|
179
|
+
|
|
180
|
+
# Verify the history was not compressed
|
|
181
|
+
assert len(base_agent.history) == 4
|
|
182
|
+
assert all(isinstance(msg, Message) for msg in base_agent.history)
|
|
183
|
+
assert base_agent.history == base_agent.history # History unchanged
|
|
184
|
+
mock_generate.assert_not_called()
|
|
185
|
+
|
|
186
|
+
@pytest.mark.asyncio
|
|
187
|
+
async def testcompress_history_error_handling(self, base_agent):
|
|
188
|
+
# Setup test data
|
|
189
|
+
conversation = [
|
|
190
|
+
("user", "Message 1"),
|
|
191
|
+
("assistant", "Response 1"),
|
|
192
|
+
("user", "Message 2"),
|
|
193
|
+
("assistant", "Response 2"),
|
|
194
|
+
("user", "Message 3"),
|
|
195
|
+
("assistant", "Response 3"),
|
|
196
|
+
("user", "Message 4"),
|
|
197
|
+
("assistant", "Response 4"),
|
|
198
|
+
("user", "Message 5"),
|
|
199
|
+
("assistant", "Response 5"),
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
base_agent.history = MessageHistory(
|
|
203
|
+
[Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Mock the LLM client's generate method to raise an exception
|
|
207
|
+
with patch.object(base_agent.llm, "generate", new_callable=AsyncMock) as mock_generate:
|
|
208
|
+
mock_generate.side_effect = Exception("Test error")
|
|
209
|
+
|
|
210
|
+
# Call the method
|
|
211
|
+
await base_agent.compress_history()
|
|
212
|
+
|
|
213
|
+
# Verify the history was not modified
|
|
214
|
+
assert len(base_agent.history) == 10
|
|
215
|
+
assert all(isinstance(msg, Message) for msg in base_agent.history)
|
|
216
|
+
assert base_agent.history == base_agent.history # History unchanged
|
|
217
|
+
|
|
218
|
+
@pytest.mark.slow
|
|
219
|
+
@pytest.mark.integration
|
|
220
|
+
@pytest.mark.asyncio
|
|
221
|
+
async def testcompress_history_with_real_llm(self, base_agent):
|
|
222
|
+
"""Integration test using the real LLM client to test message compression.
|
|
223
|
+
|
|
224
|
+
Note: This test requires a valid API key to be set in the environment.
|
|
225
|
+
It will be skipped if the API key is not available.
|
|
226
|
+
"""
|
|
227
|
+
# Skip if no API key is available
|
|
228
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
229
|
+
if not api_key or api_key == "test_key":
|
|
230
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
231
|
+
|
|
232
|
+
# Setup test data with a realistic conversation using Message objects
|
|
233
|
+
conversation = [
|
|
234
|
+
("user", "What is Python?"),
|
|
235
|
+
(
|
|
236
|
+
"assistant",
|
|
237
|
+
"Python is a high-level, interpreted programming language known for its simplicity and readability.",
|
|
238
|
+
),
|
|
239
|
+
("user", "What are its main features?"),
|
|
240
|
+
(
|
|
241
|
+
"assistant",
|
|
242
|
+
"Python features include dynamic typing, automatic memory management, and a comprehensive standard library.",
|
|
243
|
+
),
|
|
244
|
+
("user", "How do I write a function in Python?"),
|
|
245
|
+
(
|
|
246
|
+
"assistant",
|
|
247
|
+
"You can define a function using the def keyword, followed by the function name and parameters in parentheses.",
|
|
248
|
+
),
|
|
249
|
+
("user", "What is a decorator?"),
|
|
250
|
+
(
|
|
251
|
+
"assistant",
|
|
252
|
+
"A decorator is a design pattern that allows you to modify the behavior of functions or classes.",
|
|
253
|
+
),
|
|
254
|
+
("user", "Show me an example of a decorator."),
|
|
255
|
+
("assistant", "Here is a simple decorator example: @property def name(self): return self._name"),
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
base_agent.history = MessageHistory(
|
|
259
|
+
[Message(role=role, content=[TextBlock(text=text)]) for role, text in conversation]
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Store the last two messages for comparison
|
|
263
|
+
last_two_messages = base_agent.history[-2:]
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
# Call the method with real LLM
|
|
267
|
+
await base_agent.compress_history()
|
|
268
|
+
|
|
269
|
+
# Verify the summary was appended (allowing for environments where real LLM may be skipped)
|
|
270
|
+
assert len(base_agent.history) >= len(conversation)
|
|
271
|
+
|
|
272
|
+
# Verify the summary message was appended at the end
|
|
273
|
+
summary_message = base_agent.history[-1]
|
|
274
|
+
assert isinstance(summary_message, Message)
|
|
275
|
+
assert summary_message.role == "user"
|
|
276
|
+
summary_text = summary_message.content[0].text
|
|
277
|
+
assert ("CONVERSATION HISTORY SUMMARY" in summary_text) or ("## Analysis Section" in summary_text)
|
|
278
|
+
|
|
279
|
+
# Verify the last two messages are still present just before the summary
|
|
280
|
+
assert base_agent.history[-3:-1] == last_two_messages
|
|
281
|
+
except Exception as e:
|
|
282
|
+
pytest.fail(f"Test failed with error: {str(e)}")
|
|
283
|
+
|
|
284
|
+
# Tests for dump/restore message history
|
|
285
|
+
def test_dump_message_history_empty(self, base_agent):
|
|
286
|
+
"""Test dumping an empty message history."""
|
|
287
|
+
base_agent.history = MessageHistory()
|
|
288
|
+
dumped_history = base_agent.dump_message_history()
|
|
289
|
+
assert dumped_history == []
|
|
290
|
+
|
|
291
|
+
def test_dump_message_history_populated(self, base_agent):
|
|
292
|
+
"""Test dumping a history with various message types using custom to_dict."""
|
|
293
|
+
original_history = MessageHistory(
|
|
294
|
+
[
|
|
295
|
+
Message(role="user", content=[TextBlock(text="Hello")]),
|
|
296
|
+
Message(role="assistant", content=[TextBlock(text="Hi there!")]),
|
|
297
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="read_file", input={"path": "a.txt"})]),
|
|
298
|
+
Message(
|
|
299
|
+
role="user",
|
|
300
|
+
content=[ToolResult(tool_use_id="tool1", name="read_file", content="File content", is_error=False)],
|
|
301
|
+
),
|
|
302
|
+
]
|
|
303
|
+
)
|
|
304
|
+
base_agent.history = original_history
|
|
305
|
+
dumped_history = base_agent.dump_message_history()
|
|
306
|
+
|
|
307
|
+
assert len(dumped_history) == 4
|
|
308
|
+
assert isinstance(dumped_history[0], dict)
|
|
309
|
+
assert dumped_history[0]["role"] == "user"
|
|
310
|
+
assert isinstance(dumped_history[0]["content"], list)
|
|
311
|
+
assert dumped_history[0]["content"][0]["type"] == "text"
|
|
312
|
+
assert dumped_history[0]["content"][0]["text"] == "Hello"
|
|
313
|
+
assert dumped_history[0]["content"][0]["cache_checkpoint"] is False # Verify default
|
|
314
|
+
|
|
315
|
+
assert isinstance(dumped_history[1], dict)
|
|
316
|
+
assert dumped_history[1]["role"] == "assistant"
|
|
317
|
+
assert dumped_history[1]["content"][0]["type"] == "text"
|
|
318
|
+
assert dumped_history[1]["content"][0]["text"] == "Hi there!"
|
|
319
|
+
|
|
320
|
+
assert isinstance(dumped_history[2], dict)
|
|
321
|
+
assert dumped_history[2]["role"] == "assistant"
|
|
322
|
+
assert dumped_history[2]["content"][0]["type"] == "tool_call"
|
|
323
|
+
assert dumped_history[2]["content"][0]["id"] == "tool1"
|
|
324
|
+
assert dumped_history[2]["content"][0]["name"] == "read_file"
|
|
325
|
+
assert dumped_history[2]["content"][0]["input"] == {"path": "a.txt"}
|
|
326
|
+
|
|
327
|
+
assert isinstance(dumped_history[3], dict)
|
|
328
|
+
assert dumped_history[3]["role"] == "user" # Role for ToolResult message
|
|
329
|
+
assert dumped_history[3]["content"][0]["type"] == "tool_result"
|
|
330
|
+
assert dumped_history[3]["content"][0]["tool_use_id"] == "tool1"
|
|
331
|
+
assert dumped_history[3]["content"][0]["content"] == "File content"
|
|
332
|
+
assert dumped_history[3]["content"][0]["name"] == "read_file"
|
|
333
|
+
assert dumped_history[3]["content"][0]["is_error"] is False
|
|
334
|
+
|
|
335
|
+
# Check against the actual to_dict output for exact structure validation
|
|
336
|
+
expected_dump = [msg.to_dict() for msg in original_history]
|
|
337
|
+
assert dumped_history == expected_dump
|
|
338
|
+
|
|
339
|
+
def test_restore_message_history_empty(self, base_agent):
|
|
340
|
+
"""Test restoring an empty message history using custom from_dict."""
|
|
341
|
+
serialized_history = []
|
|
342
|
+
base_agent.restore_message_history(serialized_history)
|
|
343
|
+
assert isinstance(base_agent.history, MessageHistory)
|
|
344
|
+
assert len(base_agent.history) == 0
|
|
345
|
+
|
|
346
|
+
def test_restore_message_history_populated(self, base_agent):
|
|
347
|
+
"""Test restoring a history with various message types using custom from_dict."""
|
|
348
|
+
# Use the structure produced by to_dict
|
|
349
|
+
serialized_history = [
|
|
350
|
+
{
|
|
351
|
+
"role": "user",
|
|
352
|
+
"content": [{"type": "text", "text": "Another query", "cache_checkpoint": False}],
|
|
353
|
+
"stop_reason": None,
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
"role": "assistant",
|
|
357
|
+
"content": [
|
|
358
|
+
{
|
|
359
|
+
"type": "tool_call",
|
|
360
|
+
"id": "tool2",
|
|
361
|
+
"name": "list_dir",
|
|
362
|
+
"input": {"path": "/tmp"},
|
|
363
|
+
"cache_checkpoint": False,
|
|
364
|
+
}
|
|
365
|
+
],
|
|
366
|
+
"stop_reason": "tool_use",
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
"role": "user",
|
|
370
|
+
"content": [
|
|
371
|
+
{
|
|
372
|
+
"type": "tool_result",
|
|
373
|
+
"tool_use_id": "tool2",
|
|
374
|
+
"content": "[file1, file2]",
|
|
375
|
+
"name": "list_dir",
|
|
376
|
+
"is_error": False,
|
|
377
|
+
"cache_checkpoint": False,
|
|
378
|
+
}
|
|
379
|
+
],
|
|
380
|
+
"stop_reason": None,
|
|
381
|
+
},
|
|
382
|
+
]
|
|
383
|
+
|
|
384
|
+
base_agent.restore_message_history(serialized_history)
|
|
385
|
+
|
|
386
|
+
assert isinstance(base_agent.history, MessageHistory)
|
|
387
|
+
assert len(base_agent.history) == 3
|
|
388
|
+
|
|
389
|
+
# Validate first message
|
|
390
|
+
msg1 = base_agent.history[0]
|
|
391
|
+
assert isinstance(msg1, Message)
|
|
392
|
+
assert msg1.role == "user"
|
|
393
|
+
assert isinstance(msg1.content[0], TextBlock)
|
|
394
|
+
assert msg1.content[0].text == "Another query"
|
|
395
|
+
assert msg1.stop_reason is None
|
|
396
|
+
|
|
397
|
+
# Validate second message (ToolCall)
|
|
398
|
+
msg2 = base_agent.history[1]
|
|
399
|
+
assert isinstance(msg2, Message)
|
|
400
|
+
assert msg2.role == "assistant"
|
|
401
|
+
assert isinstance(msg2.content[0], ToolCall)
|
|
402
|
+
assert msg2.content[0].id == "tool2"
|
|
403
|
+
assert msg2.content[0].name == "list_dir"
|
|
404
|
+
assert msg2.content[0].input == {"path": "/tmp"}
|
|
405
|
+
assert msg2.stop_reason == "tool_use"
|
|
406
|
+
# Check tool_calls attribute is populated correctly
|
|
407
|
+
assert len(msg2.tool_calls) == 1
|
|
408
|
+
assert msg2.tool_calls[0] == msg2.content[0]
|
|
409
|
+
|
|
410
|
+
# Validate third message (ToolResult)
|
|
411
|
+
msg3 = base_agent.history[2]
|
|
412
|
+
assert isinstance(msg3, Message)
|
|
413
|
+
assert msg3.role == "user"
|
|
414
|
+
assert isinstance(msg3.content[0], ToolResult)
|
|
415
|
+
assert msg3.content[0].tool_use_id == "tool2"
|
|
416
|
+
assert msg3.content[0].content == "[file1, file2]"
|
|
417
|
+
assert msg3.content[0].name == "list_dir"
|
|
418
|
+
assert msg3.content[0].is_error is False
|
|
419
|
+
assert msg3.stop_reason is None
|
|
420
|
+
|
|
421
|
+
def test_restore_message_history_sanitizes_oversized_tool_results(self, base_agent):
|
|
422
|
+
oversized_content = "x" * 100_001
|
|
423
|
+
serialized_history = [
|
|
424
|
+
{
|
|
425
|
+
"role": "user",
|
|
426
|
+
"content": [
|
|
427
|
+
{
|
|
428
|
+
"type": "tool_result",
|
|
429
|
+
"tool_use_id": "read_entire_file_24",
|
|
430
|
+
"content": oversized_content,
|
|
431
|
+
"name": "read_entire_file",
|
|
432
|
+
"is_error": False,
|
|
433
|
+
"cache_checkpoint": False,
|
|
434
|
+
}
|
|
435
|
+
],
|
|
436
|
+
"stop_reason": None,
|
|
437
|
+
}
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
base_agent.restore_message_history(serialized_history)
|
|
441
|
+
|
|
442
|
+
result = base_agent.history[0].content[0]
|
|
443
|
+
assert isinstance(result, ToolResult)
|
|
444
|
+
assert result.tool_use_id == "read_entire_file_24"
|
|
445
|
+
assert result.name == "read_entire_file"
|
|
446
|
+
assert result.is_error is False
|
|
447
|
+
assert len(result.content) < 500
|
|
448
|
+
assert "Tool result omitted from history" in result.content
|
|
449
|
+
|
|
450
|
+
def test_dump_restore_cycle(self, base_agent):
|
|
451
|
+
"""Test that dumping and then restoring results in the original history using custom methods."""
|
|
452
|
+
original_history = MessageHistory(
|
|
453
|
+
[
|
|
454
|
+
Message(role="user", content=[TextBlock(text="Cycle Test")]),
|
|
455
|
+
Message(
|
|
456
|
+
role="assistant",
|
|
457
|
+
content=[
|
|
458
|
+
ThinkingBlock(thinking="reasoning", signature="provider-signature"),
|
|
459
|
+
RedactedThinkingBlock(data="encrypted-redacted-reasoning"),
|
|
460
|
+
TextBlock(text="Acknowledged."),
|
|
461
|
+
],
|
|
462
|
+
),
|
|
463
|
+
Message(role="assistant", content=[ToolCall(id="tool3", name="dummy_tool", input={})]),
|
|
464
|
+
Message(
|
|
465
|
+
role="user",
|
|
466
|
+
content=[ToolResult(tool_use_id="tool3", name="dummy_tool", content="Success", is_error=False)],
|
|
467
|
+
),
|
|
468
|
+
]
|
|
469
|
+
)
|
|
470
|
+
base_agent.history = original_history
|
|
471
|
+
|
|
472
|
+
# Dump the history
|
|
473
|
+
dumped_data = base_agent.dump_message_history()
|
|
474
|
+
|
|
475
|
+
# Restore the history
|
|
476
|
+
base_agent.restore_message_history(dumped_data)
|
|
477
|
+
|
|
478
|
+
# Assert the restored history matches the original content structure
|
|
479
|
+
# We need a more nuanced comparison since direct object comparison might fail
|
|
480
|
+
# due to new object instances, even if structurally identical.
|
|
481
|
+
assert len(base_agent.history) == len(original_history)
|
|
482
|
+
for restored_msg, original_msg in zip(base_agent.history, original_history):
|
|
483
|
+
# Use the to_dict method for comparing structure
|
|
484
|
+
assert restored_msg.to_dict() == original_msg.to_dict()
|
|
485
|
+
|
|
486
|
+
# Tests for history validation methods
|
|
487
|
+
def test_is_history_valid_for_anthropic_valid_history(self, base_agent):
|
|
488
|
+
"""Test validation with a valid history containing tool calls and results."""
|
|
489
|
+
valid_history = [
|
|
490
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
491
|
+
Message(role="assistant", content=[TextBlock(text="Response")]),
|
|
492
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
493
|
+
Message(
|
|
494
|
+
role="user",
|
|
495
|
+
content=[ToolResult(tool_use_id="tool1", name="test_tool", content="Success", is_error=False)],
|
|
496
|
+
),
|
|
497
|
+
]
|
|
498
|
+
|
|
499
|
+
assert base_agent._is_history_valid_for_anthropic(valid_history) is True
|
|
500
|
+
|
|
501
|
+
def test_is_history_valid_for_anthropic_valid_history_no_tools(self, base_agent):
|
|
502
|
+
"""Test validation with a valid history containing no tool calls."""
|
|
503
|
+
valid_history = [
|
|
504
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
505
|
+
Message(role="assistant", content=[TextBlock(text="Response")]),
|
|
506
|
+
Message(role="user", content=[TextBlock(text="Another message")]),
|
|
507
|
+
Message(role="assistant", content=[TextBlock(text="Another response")]),
|
|
508
|
+
]
|
|
509
|
+
|
|
510
|
+
assert base_agent._is_history_valid_for_anthropic(valid_history) is True
|
|
511
|
+
|
|
512
|
+
def test_is_history_valid_for_anthropic_missing_tool_result(self, base_agent):
|
|
513
|
+
"""Test validation fails when tool call has no corresponding result."""
|
|
514
|
+
invalid_history = [
|
|
515
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
516
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
517
|
+
# Missing tool result message
|
|
518
|
+
]
|
|
519
|
+
|
|
520
|
+
assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
|
|
521
|
+
|
|
522
|
+
def test_is_history_valid_for_anthropic_incomplete_tool_results(self, base_agent):
|
|
523
|
+
"""Test validation fails when some tool calls don't have results."""
|
|
524
|
+
invalid_history = [
|
|
525
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
526
|
+
Message(
|
|
527
|
+
role="assistant",
|
|
528
|
+
content=[
|
|
529
|
+
ToolCall(id="tool1", name="test_tool1", input={}),
|
|
530
|
+
ToolCall(id="tool2", name="test_tool2", input={}),
|
|
531
|
+
],
|
|
532
|
+
),
|
|
533
|
+
Message(
|
|
534
|
+
role="user",
|
|
535
|
+
content=[
|
|
536
|
+
ToolResult(tool_use_id="tool1", name="test_tool1", content="Success", is_error=False)
|
|
537
|
+
# Missing tool2 result
|
|
538
|
+
],
|
|
539
|
+
),
|
|
540
|
+
]
|
|
541
|
+
|
|
542
|
+
assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
|
|
543
|
+
|
|
544
|
+
def test_is_history_valid_for_anthropic_wrong_role_sequence(self, base_agent):
|
|
545
|
+
"""Test validation fails when tool call is followed by non-user message."""
|
|
546
|
+
invalid_history = [
|
|
547
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
548
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
549
|
+
Message(role="assistant", content=[TextBlock(text="Another assistant message")]), # Should be user
|
|
550
|
+
]
|
|
551
|
+
|
|
552
|
+
assert base_agent._is_history_valid_for_anthropic(invalid_history) is False
|
|
553
|
+
|
|
554
|
+
def test_is_history_valid_for_anthropic_empty_history(self, base_agent):
|
|
555
|
+
"""Test validation passes for empty history."""
|
|
556
|
+
assert base_agent._is_history_valid_for_anthropic([]) is True
|
|
557
|
+
|
|
558
|
+
def test_is_history_valid_for_anthropic_uses_self_history(self, base_agent):
|
|
559
|
+
"""Test validation uses self.history when no messages parameter provided."""
|
|
560
|
+
base_agent.history = MessageHistory(
|
|
561
|
+
[
|
|
562
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
563
|
+
# Missing tool result
|
|
564
|
+
]
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
assert base_agent._is_history_valid_for_anthropic() is False
|
|
568
|
+
|
|
569
|
+
def testfix_incomplete_tool_calls_no_changes_needed(self, base_agent):
|
|
570
|
+
"""Test fix method doesn't modify valid history."""
|
|
571
|
+
valid_history = [
|
|
572
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
573
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
574
|
+
Message(
|
|
575
|
+
role="user",
|
|
576
|
+
content=[ToolResult(tool_use_id="tool1", name="test_tool", content="Success", is_error=False)],
|
|
577
|
+
),
|
|
578
|
+
]
|
|
579
|
+
|
|
580
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(valid_history)
|
|
581
|
+
|
|
582
|
+
assert len(fixed_history) == 3
|
|
583
|
+
assert fixed_history[0].to_dict() == valid_history[0].to_dict()
|
|
584
|
+
assert fixed_history[1].to_dict() == valid_history[1].to_dict()
|
|
585
|
+
assert fixed_history[2].to_dict() == valid_history[2].to_dict()
|
|
586
|
+
|
|
587
|
+
def testfix_incomplete_tool_calls_adds_placeholder_result(self, base_agent):
|
|
588
|
+
"""Test fix method adds placeholder result for orphaned tool call."""
|
|
589
|
+
incomplete_history = [
|
|
590
|
+
Message(role="user", content=[TextBlock(text="Test message")]),
|
|
591
|
+
Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]),
|
|
592
|
+
# Missing tool result
|
|
593
|
+
]
|
|
594
|
+
|
|
595
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
|
|
596
|
+
|
|
597
|
+
assert len(fixed_history) == 3 # Original 2 + 1 placeholder
|
|
598
|
+
|
|
599
|
+
# Check original messages are preserved
|
|
600
|
+
assert fixed_history[0].to_dict() == incomplete_history[0].to_dict()
|
|
601
|
+
assert fixed_history[1].to_dict() == incomplete_history[1].to_dict()
|
|
602
|
+
|
|
603
|
+
# Check placeholder was added
|
|
604
|
+
placeholder_msg = fixed_history[2]
|
|
605
|
+
assert placeholder_msg.role == "user"
|
|
606
|
+
assert len(placeholder_msg.content) == 1
|
|
607
|
+
assert isinstance(placeholder_msg.content[0], ToolResult)
|
|
608
|
+
assert placeholder_msg.content[0].tool_use_id == "tool1"
|
|
609
|
+
assert placeholder_msg.content[0].name == "test_tool"
|
|
610
|
+
assert placeholder_msg.content[0].is_error is True
|
|
611
|
+
assert "interrupted" in placeholder_msg.content[0].content.lower()
|
|
612
|
+
|
|
613
|
+
def testfix_incomplete_tool_calls_multiple_tools(self, base_agent):
|
|
614
|
+
"""Test fix method handles multiple incomplete tool calls."""
|
|
615
|
+
incomplete_history = [
|
|
616
|
+
Message(
|
|
617
|
+
role="assistant",
|
|
618
|
+
content=[
|
|
619
|
+
ToolCall(id="tool1", name="test_tool1", input={}),
|
|
620
|
+
ToolCall(id="tool2", name="test_tool2", input={}),
|
|
621
|
+
],
|
|
622
|
+
),
|
|
623
|
+
# Missing tool results
|
|
624
|
+
]
|
|
625
|
+
|
|
626
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
|
|
627
|
+
|
|
628
|
+
assert len(fixed_history) == 2 # Original 1 + 1 placeholder
|
|
629
|
+
|
|
630
|
+
# Check placeholder message has results for both tools
|
|
631
|
+
placeholder_msg = fixed_history[1]
|
|
632
|
+
assert placeholder_msg.role == "user"
|
|
633
|
+
assert len(placeholder_msg.content) == 2
|
|
634
|
+
|
|
635
|
+
tool_result_ids = {result.tool_use_id for result in placeholder_msg.content}
|
|
636
|
+
assert tool_result_ids == {"tool1", "tool2"}
|
|
637
|
+
|
|
638
|
+
for result in placeholder_msg.content:
|
|
639
|
+
assert isinstance(result, ToolResult)
|
|
640
|
+
assert result.is_error is True
|
|
641
|
+
|
|
642
|
+
def testfix_incomplete_tool_calls_partial_results(self, base_agent):
|
|
643
|
+
"""Test fix method handles partial tool results correctly."""
|
|
644
|
+
incomplete_history = [
|
|
645
|
+
Message(
|
|
646
|
+
role="assistant",
|
|
647
|
+
content=[
|
|
648
|
+
ToolCall(id="tool1", name="test_tool1", input={}),
|
|
649
|
+
ToolCall(id="tool2", name="test_tool2", input={}),
|
|
650
|
+
],
|
|
651
|
+
),
|
|
652
|
+
Message(
|
|
653
|
+
role="user",
|
|
654
|
+
content=[
|
|
655
|
+
ToolResult(tool_use_id="tool1", name="test_tool1", content="Success", is_error=False)
|
|
656
|
+
# Missing tool2 result
|
|
657
|
+
],
|
|
658
|
+
),
|
|
659
|
+
]
|
|
660
|
+
|
|
661
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(incomplete_history)
|
|
662
|
+
|
|
663
|
+
# Should have same length since placeholder is merged into existing user message
|
|
664
|
+
assert len(fixed_history) == 2 # Same as original
|
|
665
|
+
|
|
666
|
+
# Check that the user message now has both tool results
|
|
667
|
+
user_message = fixed_history[1]
|
|
668
|
+
assert user_message.role == "user"
|
|
669
|
+
assert len(user_message.content) == 2 # Now has both tool results
|
|
670
|
+
|
|
671
|
+
# Check tool result IDs
|
|
672
|
+
tool_result_ids = {result.tool_use_id for result in user_message.content if isinstance(result, ToolResult)}
|
|
673
|
+
assert tool_result_ids == {"tool1", "tool2"}
|
|
674
|
+
|
|
675
|
+
# Check that placeholder was added for tool2
|
|
676
|
+
tool2_result = next(result for result in user_message.content if result.tool_use_id == "tool2")
|
|
677
|
+
assert tool2_result.is_error is True
|
|
678
|
+
assert "interrupted" in tool2_result.content.lower()
|
|
679
|
+
|
|
680
|
+
# Verify the fixed history is valid
|
|
681
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
682
|
+
|
|
683
|
+
def testfix_incomplete_tool_calls_empty_history(self, base_agent):
|
|
684
|
+
"""Test fix method handles empty history."""
|
|
685
|
+
fixed_history = base_agent.fix_incomplete_tool_calls([])
|
|
686
|
+
assert fixed_history == []
|
|
687
|
+
|
|
688
|
+
def test_restore_message_history_with_incomplete_tool_calls(self, base_agent):
|
|
689
|
+
"""Test restore method does NOT automatically fix incomplete tool calls."""
|
|
690
|
+
# Serialized history with incomplete tool call (simulating interrupted state)
|
|
691
|
+
serialized_incomplete_history = [
|
|
692
|
+
{
|
|
693
|
+
"role": "user",
|
|
694
|
+
"content": [{"type": "text", "text": "Test message", "cache_checkpoint": False}],
|
|
695
|
+
"stop_reason": None,
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
"role": "assistant",
|
|
699
|
+
"content": [
|
|
700
|
+
{
|
|
701
|
+
"type": "tool_call",
|
|
702
|
+
"id": "tool1",
|
|
703
|
+
"name": "test_tool",
|
|
704
|
+
"input": {"param": "value"},
|
|
705
|
+
"cache_checkpoint": False,
|
|
706
|
+
}
|
|
707
|
+
],
|
|
708
|
+
"stop_reason": "tool_use",
|
|
709
|
+
},
|
|
710
|
+
# Missing tool result message (simulating interruption)
|
|
711
|
+
]
|
|
712
|
+
|
|
713
|
+
base_agent.restore_message_history(serialized_incomplete_history)
|
|
714
|
+
|
|
715
|
+
# Verify history was NOT fixed - restore should preserve authentic history
|
|
716
|
+
assert len(base_agent.history) == 2 # Original 2 messages unchanged
|
|
717
|
+
|
|
718
|
+
# Check original messages are preserved as-is
|
|
719
|
+
assert base_agent.history[0].role == "user"
|
|
720
|
+
assert base_agent.history[1].role == "assistant"
|
|
721
|
+
assert len(base_agent.history[1].tool_calls) == 1
|
|
722
|
+
|
|
723
|
+
# Verify the history is still invalid for Anthropic (not fixed)
|
|
724
|
+
assert base_agent._is_history_valid_for_anthropic() is False
|
|
725
|
+
|
|
726
|
+
# But verify that fix_incomplete_tool_calls can fix it
|
|
727
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
|
|
728
|
+
assert len(fixed_history) == 3 # Now fixed with placeholder
|
|
729
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
730
|
+
|
|
731
|
+
# Tests for robustness - incomplete tool calls at various positions
|
|
732
|
+
def testfix_incomplete_tool_calls_at_beginning_of_history(self, base_agent):
|
|
733
|
+
"""Test fix method handles incomplete tool calls at the beginning of message history."""
|
|
734
|
+
corrupted_history = [
|
|
735
|
+
# Incomplete tool call sequence at the beginning
|
|
736
|
+
Message(
|
|
737
|
+
role="assistant",
|
|
738
|
+
content=[
|
|
739
|
+
ToolCall(id="early_tool1", name="early_tool", input={}),
|
|
740
|
+
ToolCall(id="early_tool2", name="another_early_tool", input={}),
|
|
741
|
+
],
|
|
742
|
+
),
|
|
743
|
+
Message(
|
|
744
|
+
role="user",
|
|
745
|
+
content=[
|
|
746
|
+
ToolResult(tool_use_id="early_tool1", name="early_tool", content="Success", is_error=False)
|
|
747
|
+
# Missing early_tool2 result
|
|
748
|
+
],
|
|
749
|
+
),
|
|
750
|
+
# Normal conversation continues
|
|
751
|
+
Message(role="user", content=[TextBlock(text="How are things?")]),
|
|
752
|
+
Message(role="assistant", content=[TextBlock(text="Things are going well.")]),
|
|
753
|
+
# Complete tool call sequence later
|
|
754
|
+
Message(role="assistant", content=[ToolCall(id="later_tool", name="later_tool", input={})]),
|
|
755
|
+
Message(
|
|
756
|
+
role="user",
|
|
757
|
+
content=[
|
|
758
|
+
ToolResult(tool_use_id="later_tool", name="later_tool", content="Later success", is_error=False)
|
|
759
|
+
],
|
|
760
|
+
),
|
|
761
|
+
]
|
|
762
|
+
|
|
763
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
764
|
+
|
|
765
|
+
# Should have same length since placeholder is merged into existing user message
|
|
766
|
+
assert len(fixed_history) == 6 # Same as original
|
|
767
|
+
|
|
768
|
+
# Verify the early incomplete sequence was fixed by merging placeholder into existing user message
|
|
769
|
+
assert fixed_history[0].role == "assistant" # Original tool call message
|
|
770
|
+
assert fixed_history[1].role == "user" # User message now has both results
|
|
771
|
+
assert len(fixed_history[1].content) == 2 # Now has both tool results
|
|
772
|
+
|
|
773
|
+
# Check that both tool results are present
|
|
774
|
+
tool_result_ids = {result.tool_use_id for result in fixed_history[1].content if isinstance(result, ToolResult)}
|
|
775
|
+
assert tool_result_ids == {"early_tool1", "early_tool2"}
|
|
776
|
+
|
|
777
|
+
# Verify the placeholder result is marked as error
|
|
778
|
+
placeholder_result = next(result for result in fixed_history[1].content if result.tool_use_id == "early_tool2")
|
|
779
|
+
assert placeholder_result.is_error is True
|
|
780
|
+
assert "interrupted" in placeholder_result.content.lower()
|
|
781
|
+
|
|
782
|
+
# Verify rest of history is preserved
|
|
783
|
+
assert fixed_history[2].role == "user" # "How are things?"
|
|
784
|
+
assert fixed_history[3].role == "assistant" # "Things are going well."
|
|
785
|
+
assert fixed_history[4].role == "assistant" # later_tool call
|
|
786
|
+
assert fixed_history[5].role == "user" # later_tool result
|
|
787
|
+
|
|
788
|
+
# Verify final history is valid
|
|
789
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
790
|
+
|
|
791
|
+
def testfix_incomplete_tool_calls_in_middle_of_history(self, base_agent):
|
|
792
|
+
"""Test fix method handles incomplete tool calls in the middle of message history."""
|
|
793
|
+
corrupted_history = [
|
|
794
|
+
# Normal conversation start
|
|
795
|
+
Message(role="user", content=[TextBlock(text="Hello")]),
|
|
796
|
+
Message(role="assistant", content=[TextBlock(text="Hi there!")]),
|
|
797
|
+
# Incomplete tool call sequence in the middle
|
|
798
|
+
Message(
|
|
799
|
+
role="assistant",
|
|
800
|
+
content=[
|
|
801
|
+
ToolCall(id="middle_tool1", name="middle_tool", input={}),
|
|
802
|
+
ToolCall(id="middle_tool2", name="another_middle_tool", input={}),
|
|
803
|
+
ToolCall(id="middle_tool3", name="third_middle_tool", input={}),
|
|
804
|
+
],
|
|
805
|
+
),
|
|
806
|
+
Message(
|
|
807
|
+
role="user",
|
|
808
|
+
content=[
|
|
809
|
+
ToolResult(tool_use_id="middle_tool1", name="middle_tool", content="Success", is_error=False),
|
|
810
|
+
ToolResult(tool_use_id="middle_tool3", name="third_middle_tool", content="Success", is_error=False),
|
|
811
|
+
# Missing middle_tool2 result
|
|
812
|
+
],
|
|
813
|
+
),
|
|
814
|
+
# Normal conversation continues
|
|
815
|
+
Message(role="assistant", content=[TextBlock(text="Let me continue...")]),
|
|
816
|
+
Message(role="user", content=[TextBlock(text="Sounds good")]),
|
|
817
|
+
]
|
|
818
|
+
|
|
819
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
820
|
+
|
|
821
|
+
# Should have same length since placeholder is merged into existing user message
|
|
822
|
+
assert len(fixed_history) == 6 # Same as original
|
|
823
|
+
|
|
824
|
+
# Verify the middle incomplete sequence was fixed
|
|
825
|
+
assert fixed_history[2].role == "assistant" # Tool call message
|
|
826
|
+
assert fixed_history[3].role == "user" # User message now has all 3 results
|
|
827
|
+
assert len(fixed_history[3].content) == 3 # Now has all 3 tool results
|
|
828
|
+
|
|
829
|
+
# Check that all tool results are present
|
|
830
|
+
tool_result_ids = {result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)}
|
|
831
|
+
assert tool_result_ids == {"middle_tool1", "middle_tool2", "middle_tool3"}
|
|
832
|
+
|
|
833
|
+
# Verify the placeholder result is marked as error
|
|
834
|
+
placeholder_result = next(result for result in fixed_history[3].content if result.tool_use_id == "middle_tool2")
|
|
835
|
+
assert placeholder_result.is_error is True
|
|
836
|
+
|
|
837
|
+
# Verify rest of history is preserved
|
|
838
|
+
assert fixed_history[4].role == "assistant" # "Let me continue..."
|
|
839
|
+
assert fixed_history[5].role == "user" # "Sounds good"
|
|
840
|
+
|
|
841
|
+
# Verify final history is valid
|
|
842
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
843
|
+
|
|
844
|
+
def test_fix_multiple_incomplete_tool_call_sequences(self, base_agent):
|
|
845
|
+
"""Test fix method handles multiple incomplete tool call sequences in the same history."""
|
|
846
|
+
corrupted_history = [
|
|
847
|
+
# First incomplete sequence
|
|
848
|
+
Message(
|
|
849
|
+
role="assistant",
|
|
850
|
+
content=[
|
|
851
|
+
ToolCall(id="seq1_tool1", name="tool1", input={}),
|
|
852
|
+
ToolCall(id="seq1_tool2", name="tool2", input={}),
|
|
853
|
+
],
|
|
854
|
+
),
|
|
855
|
+
Message(
|
|
856
|
+
role="user",
|
|
857
|
+
content=[
|
|
858
|
+
ToolResult(tool_use_id="seq1_tool1", name="tool1", content="Success", is_error=False)
|
|
859
|
+
# Missing seq1_tool2
|
|
860
|
+
],
|
|
861
|
+
),
|
|
862
|
+
# Normal conversation
|
|
863
|
+
Message(role="user", content=[TextBlock(text="Continue")]),
|
|
864
|
+
Message(role="assistant", content=[TextBlock(text="Continuing...")]),
|
|
865
|
+
# Second incomplete sequence
|
|
866
|
+
Message(
|
|
867
|
+
role="assistant",
|
|
868
|
+
content=[
|
|
869
|
+
ToolCall(id="seq2_tool1", name="tool3", input={}),
|
|
870
|
+
ToolCall(id="seq2_tool2", name="tool4", input={}),
|
|
871
|
+
ToolCall(id="seq2_tool3", name="tool5", input={}),
|
|
872
|
+
],
|
|
873
|
+
),
|
|
874
|
+
Message(
|
|
875
|
+
role="user",
|
|
876
|
+
content=[
|
|
877
|
+
ToolResult(tool_use_id="seq2_tool2", name="tool4", content="Success", is_error=False)
|
|
878
|
+
# Missing seq2_tool1 and seq2_tool3
|
|
879
|
+
],
|
|
880
|
+
),
|
|
881
|
+
# End conversation
|
|
882
|
+
Message(role="assistant", content=[TextBlock(text="Done")]),
|
|
883
|
+
]
|
|
884
|
+
|
|
885
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
886
|
+
|
|
887
|
+
# Should have same length since placeholders are merged into existing user messages
|
|
888
|
+
assert len(fixed_history) == 7 # Same as original
|
|
889
|
+
|
|
890
|
+
# Verify first incomplete sequence was fixed
|
|
891
|
+
assert fixed_history[1].role == "user"
|
|
892
|
+
assert len(fixed_history[1].content) == 2 # Now has both tool results
|
|
893
|
+
first_tool_result_ids = {
|
|
894
|
+
result.tool_use_id for result in fixed_history[1].content if isinstance(result, ToolResult)
|
|
895
|
+
}
|
|
896
|
+
assert first_tool_result_ids == {"seq1_tool1", "seq1_tool2"}
|
|
897
|
+
|
|
898
|
+
# Verify second incomplete sequence was fixed
|
|
899
|
+
assert fixed_history[5].role == "user"
|
|
900
|
+
assert len(fixed_history[5].content) == 3 # Now has all 3 tool results
|
|
901
|
+
second_tool_result_ids = {
|
|
902
|
+
result.tool_use_id for result in fixed_history[5].content if isinstance(result, ToolResult)
|
|
903
|
+
}
|
|
904
|
+
assert second_tool_result_ids == {"seq2_tool1", "seq2_tool2", "seq2_tool3"}
|
|
905
|
+
|
|
906
|
+
# Verify final history is valid
|
|
907
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
908
|
+
|
|
909
|
+
def testfix_incomplete_tool_calls_at_end_with_no_user_message(self, base_agent):
|
|
910
|
+
"""Test fix method handles incomplete tool calls at the very end with no following user message."""
|
|
911
|
+
corrupted_history = [
|
|
912
|
+
Message(role="user", content=[TextBlock(text="Do something")]),
|
|
913
|
+
Message(role="assistant", content=[TextBlock(text="Sure, let me help.")]),
|
|
914
|
+
# Tool calls at the end with no user response (simulates interruption)
|
|
915
|
+
Message(
|
|
916
|
+
role="assistant",
|
|
917
|
+
content=[
|
|
918
|
+
ToolCall(id="end_tool1", name="end_tool", input={}),
|
|
919
|
+
ToolCall(id="end_tool2", name="another_end_tool", input={}),
|
|
920
|
+
],
|
|
921
|
+
),
|
|
922
|
+
# No user message follows (interrupted)
|
|
923
|
+
]
|
|
924
|
+
|
|
925
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
926
|
+
|
|
927
|
+
# Should have added 1 new user message for the missing tools
|
|
928
|
+
assert len(fixed_history) == 4 # Original 3 + 1 new user message
|
|
929
|
+
|
|
930
|
+
# Verify placeholder was added at the end
|
|
931
|
+
assert fixed_history[3].role == "user"
|
|
932
|
+
assert len(fixed_history[3].content) == 2
|
|
933
|
+
placeholder_ids = {result.tool_use_id for result in fixed_history[3].content}
|
|
934
|
+
assert placeholder_ids == {"end_tool1", "end_tool2"}
|
|
935
|
+
|
|
936
|
+
for result in fixed_history[3].content:
|
|
937
|
+
assert result.is_error is True
|
|
938
|
+
assert "interrupted" in result.content.lower()
|
|
939
|
+
|
|
940
|
+
# Verify final history is valid
|
|
941
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
942
|
+
|
|
943
|
+
def test_fix_consecutive_incomplete_tool_sequences(self, base_agent):
|
|
944
|
+
"""Test fix method handles consecutive incomplete tool call sequences."""
|
|
945
|
+
corrupted_history = [
|
|
946
|
+
# First assistant message with tool calls
|
|
947
|
+
Message(role="assistant", content=[ToolCall(id="consec1_tool", name="tool1", input={})]),
|
|
948
|
+
# Partial results
|
|
949
|
+
Message(
|
|
950
|
+
role="user",
|
|
951
|
+
content=[ToolResult(tool_use_id="consec1_tool", name="tool1", content="Success", is_error=False)],
|
|
952
|
+
),
|
|
953
|
+
# Immediately another assistant message with incomplete tools
|
|
954
|
+
Message(
|
|
955
|
+
role="assistant",
|
|
956
|
+
content=[
|
|
957
|
+
ToolCall(id="consec2_tool1", name="tool2", input={}),
|
|
958
|
+
ToolCall(id="consec2_tool2", name="tool3", input={}),
|
|
959
|
+
],
|
|
960
|
+
),
|
|
961
|
+
Message(
|
|
962
|
+
role="user",
|
|
963
|
+
content=[
|
|
964
|
+
ToolResult(tool_use_id="consec2_tool1", name="tool2", content="Success", is_error=False)
|
|
965
|
+
# Missing consec2_tool2
|
|
966
|
+
],
|
|
967
|
+
),
|
|
968
|
+
# Third consecutive assistant message
|
|
969
|
+
Message(role="assistant", content=[ToolCall(id="consec3_tool", name="tool4", input={})]),
|
|
970
|
+
# No user message (interrupted)
|
|
971
|
+
]
|
|
972
|
+
|
|
973
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
974
|
+
|
|
975
|
+
# Should have same length since one placeholder is merged, one new message is added
|
|
976
|
+
assert len(fixed_history) == 6 # Same as original (merge + add)
|
|
977
|
+
|
|
978
|
+
# First sequence is complete, no changes
|
|
979
|
+
assert fixed_history[0].role == "assistant"
|
|
980
|
+
assert fixed_history[1].role == "user"
|
|
981
|
+
|
|
982
|
+
# Second sequence should have placeholder merged into existing user message
|
|
983
|
+
assert fixed_history[2].role == "assistant"
|
|
984
|
+
assert fixed_history[3].role == "user" # Original partial results now complete
|
|
985
|
+
assert len(fixed_history[3].content) == 2 # Now has both tool results
|
|
986
|
+
second_tool_result_ids = {
|
|
987
|
+
result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)
|
|
988
|
+
}
|
|
989
|
+
assert second_tool_result_ids == {"consec2_tool1", "consec2_tool2"}
|
|
990
|
+
|
|
991
|
+
# Third sequence should have new user message added
|
|
992
|
+
assert fixed_history[4].role == "assistant"
|
|
993
|
+
assert fixed_history[5].role == "user" # NEW: User message for consec3_tool
|
|
994
|
+
assert fixed_history[5].content[0].tool_use_id == "consec3_tool"
|
|
995
|
+
|
|
996
|
+
# Verify final history is valid
|
|
997
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
998
|
+
|
|
999
|
+
def test_fix_mixed_complete_and_incomplete_sequences(self, base_agent):
|
|
1000
|
+
"""Test fix method handles a mix of complete and incomplete tool call sequences."""
|
|
1001
|
+
mixed_history = [
|
|
1002
|
+
# Complete sequence 1
|
|
1003
|
+
Message(role="assistant", content=[ToolCall(id="complete1", name="complete_tool", input={})]),
|
|
1004
|
+
Message(
|
|
1005
|
+
role="user",
|
|
1006
|
+
content=[ToolResult(tool_use_id="complete1", name="complete_tool", content="Success", is_error=False)],
|
|
1007
|
+
),
|
|
1008
|
+
# Incomplete sequence
|
|
1009
|
+
Message(
|
|
1010
|
+
role="assistant",
|
|
1011
|
+
content=[
|
|
1012
|
+
ToolCall(id="incomplete1", name="incomplete_tool", input={}),
|
|
1013
|
+
ToolCall(id="incomplete2", name="another_incomplete", input={}),
|
|
1014
|
+
],
|
|
1015
|
+
),
|
|
1016
|
+
Message(
|
|
1017
|
+
role="user",
|
|
1018
|
+
content=[
|
|
1019
|
+
ToolResult(tool_use_id="incomplete1", name="incomplete_tool", content="Success", is_error=False)
|
|
1020
|
+
# Missing incomplete2
|
|
1021
|
+
],
|
|
1022
|
+
),
|
|
1023
|
+
# Complete sequence 2
|
|
1024
|
+
Message(role="assistant", content=[ToolCall(id="complete2", name="another_complete", input={})]),
|
|
1025
|
+
Message(
|
|
1026
|
+
role="user",
|
|
1027
|
+
content=[
|
|
1028
|
+
ToolResult(tool_use_id="complete2", name="another_complete", content="Success", is_error=False)
|
|
1029
|
+
],
|
|
1030
|
+
),
|
|
1031
|
+
# Normal text
|
|
1032
|
+
Message(role="user", content=[TextBlock(text="All done")]),
|
|
1033
|
+
Message(role="assistant", content=[TextBlock(text="Great work!")]),
|
|
1034
|
+
]
|
|
1035
|
+
|
|
1036
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(mixed_history)
|
|
1037
|
+
|
|
1038
|
+
# Should have same length since placeholder is merged into existing user message
|
|
1039
|
+
assert len(fixed_history) == 8 # Same as original
|
|
1040
|
+
|
|
1041
|
+
# Verify complete sequences are unchanged
|
|
1042
|
+
assert fixed_history[0].role == "assistant" # complete1 tool call
|
|
1043
|
+
assert fixed_history[1].role == "user" # complete1 result
|
|
1044
|
+
|
|
1045
|
+
# Verify incomplete sequence was fixed by merging placeholder
|
|
1046
|
+
assert fixed_history[2].role == "assistant" # incomplete tools call
|
|
1047
|
+
assert fixed_history[3].role == "user" # user message now has both results
|
|
1048
|
+
assert len(fixed_history[3].content) == 2 # Now has both tool results
|
|
1049
|
+
incomplete_tool_result_ids = {
|
|
1050
|
+
result.tool_use_id for result in fixed_history[3].content if isinstance(result, ToolResult)
|
|
1051
|
+
}
|
|
1052
|
+
assert incomplete_tool_result_ids == {"incomplete1", "incomplete2"}
|
|
1053
|
+
|
|
1054
|
+
# Verify rest is unchanged
|
|
1055
|
+
assert fixed_history[4].role == "assistant" # complete2 tool call
|
|
1056
|
+
assert fixed_history[5].role == "user" # complete2 result
|
|
1057
|
+
assert fixed_history[6].role == "user" # "All done"
|
|
1058
|
+
assert fixed_history[7].role == "assistant" # "Great work!"
|
|
1059
|
+
|
|
1060
|
+
# Verify final history is valid
|
|
1061
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1062
|
+
|
|
1063
|
+
def test_complex_corrupted_history_recovery(self, base_agent):
|
|
1064
|
+
"""Test fix method can recover from a complex, heavily corrupted message history."""
|
|
1065
|
+
heavily_corrupted_history = [
|
|
1066
|
+
# Start with incomplete sequence
|
|
1067
|
+
Message(
|
|
1068
|
+
role="assistant",
|
|
1069
|
+
content=[
|
|
1070
|
+
ToolCall(id="start_tool1", name="start1", input={}),
|
|
1071
|
+
ToolCall(id="start_tool2", name="start2", input={}),
|
|
1072
|
+
ToolCall(id="start_tool3", name="start3", input={}),
|
|
1073
|
+
],
|
|
1074
|
+
),
|
|
1075
|
+
Message(
|
|
1076
|
+
role="user",
|
|
1077
|
+
content=[
|
|
1078
|
+
ToolResult(tool_use_id="start_tool2", name="start2", content="Success", is_error=False)
|
|
1079
|
+
# Missing start_tool1 and start_tool3
|
|
1080
|
+
],
|
|
1081
|
+
),
|
|
1082
|
+
# Some normal conversation
|
|
1083
|
+
Message(role="user", content=[TextBlock(text="What about the other tasks?")]),
|
|
1084
|
+
Message(role="assistant", content=[TextBlock(text="Let me check on those.")]),
|
|
1085
|
+
# Another incomplete sequence
|
|
1086
|
+
Message(
|
|
1087
|
+
role="assistant",
|
|
1088
|
+
content=[
|
|
1089
|
+
ToolCall(id="mid_tool1", name="mid1", input={}),
|
|
1090
|
+
ToolCall(id="mid_tool2", name="mid2", input={}),
|
|
1091
|
+
ToolCall(id="mid_tool3", name="mid3", input={}),
|
|
1092
|
+
ToolCall(id="mid_tool4", name="mid4", input={}),
|
|
1093
|
+
],
|
|
1094
|
+
),
|
|
1095
|
+
Message(
|
|
1096
|
+
role="user",
|
|
1097
|
+
content=[
|
|
1098
|
+
ToolResult(tool_use_id="mid_tool1", name="mid1", content="Success", is_error=False),
|
|
1099
|
+
ToolResult(tool_use_id="mid_tool4", name="mid4", content="Success", is_error=False),
|
|
1100
|
+
# Missing mid_tool2 and mid_tool3
|
|
1101
|
+
],
|
|
1102
|
+
),
|
|
1103
|
+
# Complete sequence (should be left alone)
|
|
1104
|
+
Message(role="assistant", content=[ToolCall(id="good_tool", name="good", input={})]),
|
|
1105
|
+
Message(
|
|
1106
|
+
role="user",
|
|
1107
|
+
content=[ToolResult(tool_use_id="good_tool", name="good", content="Success", is_error=False)],
|
|
1108
|
+
),
|
|
1109
|
+
# Final incomplete sequence at the end
|
|
1110
|
+
Message(
|
|
1111
|
+
role="assistant",
|
|
1112
|
+
content=[
|
|
1113
|
+
ToolCall(id="end_tool1", name="end1", input={}),
|
|
1114
|
+
ToolCall(id="end_tool2", name="end2", input={}),
|
|
1115
|
+
],
|
|
1116
|
+
),
|
|
1117
|
+
# No user response (interrupted at the very end)
|
|
1118
|
+
]
|
|
1119
|
+
|
|
1120
|
+
# Verify original history is invalid
|
|
1121
|
+
assert base_agent._is_history_valid_for_anthropic(heavily_corrupted_history) is False
|
|
1122
|
+
|
|
1123
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(heavily_corrupted_history)
|
|
1124
|
+
|
|
1125
|
+
# Should have same length since 2 placeholders are merged, 1 new message is added
|
|
1126
|
+
assert len(fixed_history) == 10 # Same as original (2 merges + 1 add = net 0 change)
|
|
1127
|
+
|
|
1128
|
+
# Verify all incomplete sequences were fixed
|
|
1129
|
+
|
|
1130
|
+
# First sequence: placeholders merged into existing user message
|
|
1131
|
+
assert len(fixed_history[1].content) == 3 # Now has all 3 tool results
|
|
1132
|
+
first_placeholders = {r.tool_use_id for r in fixed_history[1].content if isinstance(r, ToolResult)}
|
|
1133
|
+
assert first_placeholders == {"start_tool1", "start_tool2", "start_tool3"}
|
|
1134
|
+
|
|
1135
|
+
# Second sequence: placeholders merged into existing user message
|
|
1136
|
+
assert len(fixed_history[5].content) == 4 # Now has all 4 tool results
|
|
1137
|
+
second_placeholders = {r.tool_use_id for r in fixed_history[5].content if isinstance(r, ToolResult)}
|
|
1138
|
+
assert second_placeholders == {"mid_tool1", "mid_tool2", "mid_tool3", "mid_tool4"}
|
|
1139
|
+
|
|
1140
|
+
# Third sequence: new user message created
|
|
1141
|
+
assert fixed_history[9].role == "user"
|
|
1142
|
+
assert len(fixed_history[9].content) == 2
|
|
1143
|
+
third_placeholders = {r.tool_use_id for r in fixed_history[9].content}
|
|
1144
|
+
assert third_placeholders == {"end_tool1", "end_tool2"}
|
|
1145
|
+
|
|
1146
|
+
# Verify all placeholders are marked as errors (check a few samples)
|
|
1147
|
+
start_placeholder = next(r for r in fixed_history[1].content if r.tool_use_id == "start_tool1")
|
|
1148
|
+
assert start_placeholder.is_error is True
|
|
1149
|
+
assert "interrupted" in start_placeholder.content.lower()
|
|
1150
|
+
|
|
1151
|
+
# Most importantly: verify the fixed history is now valid for Anthropic
|
|
1152
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1153
|
+
|
|
1154
|
+
# Integration tests with real Anthropic API for message history corruption recovery
|
|
1155
|
+
@pytest.mark.slow
|
|
1156
|
+
@pytest.mark.integration
|
|
1157
|
+
@pytest.mark.asyncio
|
|
1158
|
+
async def testfix_incomplete_tool_calls_with_real_api_simple_case(self, base_agent):
|
|
1159
|
+
"""Integration test: Fix simple incomplete tool call and verify it works with real Anthropic API."""
|
|
1160
|
+
# Skip if no API key is available
|
|
1161
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1162
|
+
if not api_key or api_key == "test_key":
|
|
1163
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1164
|
+
|
|
1165
|
+
# Create a corrupted history with incomplete tool call (simulating interruption)
|
|
1166
|
+
corrupted_history = [
|
|
1167
|
+
Message(role="user", content=[TextBlock(text="Can you help me with a simple task?")]),
|
|
1168
|
+
Message(role="assistant", content=[TextBlock(text="Of course! I'd be happy to help you.")]),
|
|
1169
|
+
Message(
|
|
1170
|
+
role="assistant",
|
|
1171
|
+
content=[ToolCall(id="interrupted_tool", name="read_file", input={"path": "example.txt"})],
|
|
1172
|
+
),
|
|
1173
|
+
# Missing tool result - simulates interruption during tool execution
|
|
1174
|
+
]
|
|
1175
|
+
|
|
1176
|
+
# Verify the corrupted history is invalid
|
|
1177
|
+
assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
|
|
1178
|
+
|
|
1179
|
+
# Fix the corrupted history
|
|
1180
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
1181
|
+
|
|
1182
|
+
# Verify the fix worked
|
|
1183
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1184
|
+
assert len(fixed_history) == 4 # Original 3 + 1 placeholder user message
|
|
1185
|
+
|
|
1186
|
+
# Set up the fixed history in the agent
|
|
1187
|
+
base_agent.history = MessageHistory(fixed_history)
|
|
1188
|
+
|
|
1189
|
+
try:
|
|
1190
|
+
# Test that the fixed history works with real Anthropic API
|
|
1191
|
+
# by sending a follow-up message
|
|
1192
|
+
system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
|
|
1193
|
+
|
|
1194
|
+
# Add a new user message to continue the conversation
|
|
1195
|
+
base_agent.history.append(Message(role="user", content=[TextBlock(text="What should I do next?")]))
|
|
1196
|
+
|
|
1197
|
+
# Call the real LLM API with the fixed history
|
|
1198
|
+
response = await base_agent.llm.generate(
|
|
1199
|
+
messages=base_agent.history,
|
|
1200
|
+
system=system_message,
|
|
1201
|
+
model=base_agent.config.long_context_config.model,
|
|
1202
|
+
max_completion_tokens=100, # Keep it small for testing
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
# Verify we got a valid response
|
|
1206
|
+
assert response is not None
|
|
1207
|
+
response_text = response.get_text_content()
|
|
1208
|
+
assert isinstance(response_text, str)
|
|
1209
|
+
assert len(response_text.strip()) > 0
|
|
1210
|
+
|
|
1211
|
+
except Exception as e:
|
|
1212
|
+
pytest.fail(f"Real API call failed with fixed history: {str(e)}")
|
|
1213
|
+
|
|
1214
|
+
@pytest.mark.slow
|
|
1215
|
+
@pytest.mark.integration
|
|
1216
|
+
@pytest.mark.asyncio
|
|
1217
|
+
async def test_fix_multiple_incomplete_tool_calls_with_real_api(self, base_agent):
|
|
1218
|
+
"""Integration test: Fix multiple incomplete tool calls and verify with real Anthropic API."""
|
|
1219
|
+
# Skip if no API key is available
|
|
1220
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1221
|
+
if not api_key or api_key == "test_key":
|
|
1222
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1223
|
+
|
|
1224
|
+
# Create a heavily corrupted history with multiple incomplete sequences
|
|
1225
|
+
corrupted_history = [
|
|
1226
|
+
Message(role="user", content=[TextBlock(text="I need help with several file operations.")]),
|
|
1227
|
+
Message(
|
|
1228
|
+
role="assistant",
|
|
1229
|
+
content=[TextBlock(text="I can help you with file operations. Let me start working on those.")],
|
|
1230
|
+
),
|
|
1231
|
+
# First incomplete sequence
|
|
1232
|
+
Message(
|
|
1233
|
+
role="assistant",
|
|
1234
|
+
content=[
|
|
1235
|
+
ToolCall(id="tool1", name="read_file", input={"path": "file1.txt"}),
|
|
1236
|
+
ToolCall(id="tool2", name="read_file", input={"path": "file2.txt"}),
|
|
1237
|
+
ToolCall(id="tool3", name="list_dir", input={"path": "."}),
|
|
1238
|
+
],
|
|
1239
|
+
),
|
|
1240
|
+
Message(
|
|
1241
|
+
role="user",
|
|
1242
|
+
content=[
|
|
1243
|
+
ToolResult(tool_use_id="tool1", name="read_file", content="Content of file1", is_error=False)
|
|
1244
|
+
# Missing tool2 and tool3 results
|
|
1245
|
+
],
|
|
1246
|
+
),
|
|
1247
|
+
# Normal conversation
|
|
1248
|
+
Message(role="user", content=[TextBlock(text="What about the other operations?")]),
|
|
1249
|
+
Message(role="assistant", content=[TextBlock(text="Let me continue with the remaining operations.")]),
|
|
1250
|
+
# Second incomplete sequence
|
|
1251
|
+
Message(
|
|
1252
|
+
role="assistant",
|
|
1253
|
+
content=[
|
|
1254
|
+
ToolCall(id="tool4", name="write_file", input={"path": "output.txt", "content": "test"}),
|
|
1255
|
+
ToolCall(id="tool5", name="read_file", input={"path": "config.json"}),
|
|
1256
|
+
],
|
|
1257
|
+
),
|
|
1258
|
+
# No user message - interrupted at the end
|
|
1259
|
+
]
|
|
1260
|
+
|
|
1261
|
+
# Verify the corrupted history is invalid
|
|
1262
|
+
assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
|
|
1263
|
+
|
|
1264
|
+
# Fix the corrupted history
|
|
1265
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
1266
|
+
|
|
1267
|
+
# Verify the fix worked
|
|
1268
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1269
|
+
|
|
1270
|
+
# Set up the fixed history in the agent
|
|
1271
|
+
base_agent.history = MessageHistory(fixed_history)
|
|
1272
|
+
|
|
1273
|
+
try:
|
|
1274
|
+
# Test that the fixed history works with real Anthropic API
|
|
1275
|
+
system_message = Message(
|
|
1276
|
+
role="system", content=[TextBlock(text="You are a helpful assistant for file operations.")]
|
|
1277
|
+
)
|
|
1278
|
+
|
|
1279
|
+
# Add a new user message to continue the conversation
|
|
1280
|
+
base_agent.history.append(
|
|
1281
|
+
Message(role="user", content=[TextBlock(text="Can you summarize what operations were attempted?")])
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
# Call the real LLM API with the fixed history
|
|
1285
|
+
response = await base_agent.llm.generate(
|
|
1286
|
+
messages=base_agent.history,
|
|
1287
|
+
system=system_message,
|
|
1288
|
+
model=base_agent.config.long_context_config.model,
|
|
1289
|
+
max_completion_tokens=150,
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
# Verify we got a valid response
|
|
1293
|
+
assert response is not None
|
|
1294
|
+
response_text = response.get_text_content()
|
|
1295
|
+
assert isinstance(response_text, str)
|
|
1296
|
+
assert len(response_text.strip()) > 0
|
|
1297
|
+
|
|
1298
|
+
# The response should acknowledge the interrupted operations
|
|
1299
|
+
response_lower = response_text.lower()
|
|
1300
|
+
assert any(word in response_lower for word in ["interrupt", "error", "operation", "attempt"])
|
|
1301
|
+
|
|
1302
|
+
except Exception as e:
|
|
1303
|
+
pytest.fail(f"Real API call failed with fixed history containing multiple corruptions: {str(e)}")
|
|
1304
|
+
|
|
1305
|
+
@pytest.mark.slow
|
|
1306
|
+
@pytest.mark.integration
|
|
1307
|
+
@pytest.mark.asyncio
|
|
1308
|
+
async def test_fix_corrupted_serialized_history_with_real_api(self, base_agent):
|
|
1309
|
+
"""Integration test: Fix corrupted serialized history before API call works."""
|
|
1310
|
+
# Skip if no API key is available
|
|
1311
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1312
|
+
if not api_key or api_key == "test_key":
|
|
1313
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1314
|
+
|
|
1315
|
+
# Create a serialized corrupted history (simulating what would be saved to database)
|
|
1316
|
+
serialized_corrupted_history = [
|
|
1317
|
+
{
|
|
1318
|
+
"role": "user",
|
|
1319
|
+
"content": [{"type": "text", "text": "Please analyze this data for me.", "cache_checkpoint": False}],
|
|
1320
|
+
"stop_reason": None,
|
|
1321
|
+
},
|
|
1322
|
+
{
|
|
1323
|
+
"role": "assistant",
|
|
1324
|
+
"content": [
|
|
1325
|
+
{
|
|
1326
|
+
"type": "text",
|
|
1327
|
+
"text": "I'll analyze the data for you. Let me start by reading the files.",
|
|
1328
|
+
"cache_checkpoint": False,
|
|
1329
|
+
}
|
|
1330
|
+
],
|
|
1331
|
+
"stop_reason": None,
|
|
1332
|
+
},
|
|
1333
|
+
{
|
|
1334
|
+
"role": "assistant",
|
|
1335
|
+
"content": [
|
|
1336
|
+
{
|
|
1337
|
+
"type": "tool_call",
|
|
1338
|
+
"id": "analysis_tool1",
|
|
1339
|
+
"name": "read_file",
|
|
1340
|
+
"input": {"path": "data.csv"},
|
|
1341
|
+
"cache_checkpoint": False,
|
|
1342
|
+
},
|
|
1343
|
+
{
|
|
1344
|
+
"type": "tool_call",
|
|
1345
|
+
"id": "analysis_tool2",
|
|
1346
|
+
"name": "read_file",
|
|
1347
|
+
"input": {"path": "metadata.json"},
|
|
1348
|
+
"cache_checkpoint": False,
|
|
1349
|
+
},
|
|
1350
|
+
{
|
|
1351
|
+
"type": "tool_call",
|
|
1352
|
+
"id": "analysis_tool3",
|
|
1353
|
+
"name": "list_dir",
|
|
1354
|
+
"input": {"path": "analysis_results"},
|
|
1355
|
+
"cache_checkpoint": False,
|
|
1356
|
+
},
|
|
1357
|
+
],
|
|
1358
|
+
"stop_reason": "tool_use",
|
|
1359
|
+
},
|
|
1360
|
+
{
|
|
1361
|
+
"role": "user",
|
|
1362
|
+
"content": [
|
|
1363
|
+
{
|
|
1364
|
+
"type": "tool_result",
|
|
1365
|
+
"tool_use_id": "analysis_tool1",
|
|
1366
|
+
"content": "CSV data with 1000 rows, 5 columns",
|
|
1367
|
+
"name": "read_file",
|
|
1368
|
+
"is_error": False,
|
|
1369
|
+
"cache_checkpoint": False,
|
|
1370
|
+
}
|
|
1371
|
+
# Missing analysis_tool2 and analysis_tool3 results (interrupted)
|
|
1372
|
+
],
|
|
1373
|
+
"stop_reason": None,
|
|
1374
|
+
},
|
|
1375
|
+
{
|
|
1376
|
+
"role": "user",
|
|
1377
|
+
"content": [{"type": "text", "text": "What did you find in the analysis?", "cache_checkpoint": False}],
|
|
1378
|
+
"stop_reason": None,
|
|
1379
|
+
},
|
|
1380
|
+
]
|
|
1381
|
+
|
|
1382
|
+
try:
|
|
1383
|
+
# Restore the corrupted history (this should NOT fix it)
|
|
1384
|
+
base_agent.restore_message_history(serialized_corrupted_history)
|
|
1385
|
+
|
|
1386
|
+
# Verify the restored history is still invalid
|
|
1387
|
+
assert base_agent._is_history_valid_for_anthropic() is False
|
|
1388
|
+
|
|
1389
|
+
# Fix the history manually
|
|
1390
|
+
fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
|
|
1391
|
+
|
|
1392
|
+
# Verify the fix was applied correctly
|
|
1393
|
+
# Should have merged placeholders for missing tool results
|
|
1394
|
+
tool_result_message = None
|
|
1395
|
+
for msg in fixed_history:
|
|
1396
|
+
if msg.role == "user" and any(isinstance(block, ToolResult) for block in msg.content):
|
|
1397
|
+
tool_result_message = msg
|
|
1398
|
+
break
|
|
1399
|
+
|
|
1400
|
+
assert tool_result_message is not None
|
|
1401
|
+
tool_results = [block for block in tool_result_message.content if isinstance(block, ToolResult)]
|
|
1402
|
+
assert len(tool_results) == 3 # Should now have all 3 tool results
|
|
1403
|
+
|
|
1404
|
+
# Check that placeholders were added for missing results
|
|
1405
|
+
tool_result_ids = {result.tool_use_id for result in tool_results}
|
|
1406
|
+
assert tool_result_ids == {"analysis_tool1", "analysis_tool2", "analysis_tool3"}
|
|
1407
|
+
|
|
1408
|
+
# Test that the fixed history works with real Anthropic API
|
|
1409
|
+
system_message = Message(role="system", content=[TextBlock(text="You are a data analysis assistant.")])
|
|
1410
|
+
|
|
1411
|
+
# Call the real LLM API with the fixed history
|
|
1412
|
+
response = await base_agent.llm.generate(
|
|
1413
|
+
messages=fixed_history,
|
|
1414
|
+
system=system_message,
|
|
1415
|
+
model=base_agent.config.long_context_config.model,
|
|
1416
|
+
max_completion_tokens=200,
|
|
1417
|
+
)
|
|
1418
|
+
|
|
1419
|
+
# Verify we got a valid response
|
|
1420
|
+
assert response is not None
|
|
1421
|
+
response_text = response.get_text_content()
|
|
1422
|
+
assert isinstance(response_text, str)
|
|
1423
|
+
assert len(response_text.strip()) > 0
|
|
1424
|
+
|
|
1425
|
+
except Exception as e:
|
|
1426
|
+
pytest.fail(f"Real API call failed with restored corrupted history: {str(e)}")
|
|
1427
|
+
|
|
1428
|
+
@pytest.mark.slow
|
|
1429
|
+
@pytest.mark.integration
|
|
1430
|
+
@pytest.mark.asyncio
|
|
1431
|
+
async def test_fix_consecutive_tool_interruptions_with_real_api(self, base_agent):
|
|
1432
|
+
"""Integration test: Fix consecutive tool call interruptions and verify with real API."""
|
|
1433
|
+
# Skip if no API key is available
|
|
1434
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1435
|
+
if not api_key or api_key == "test_key":
|
|
1436
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1437
|
+
|
|
1438
|
+
# Create a corrupted history with consecutive interruptions
|
|
1439
|
+
corrupted_history = [
|
|
1440
|
+
Message(role="user", content=[TextBlock(text="Help me manage multiple files.")]),
|
|
1441
|
+
Message(role="assistant", content=[TextBlock(text="I'll help you manage your files systematically.")]),
|
|
1442
|
+
# First tool call sequence
|
|
1443
|
+
Message(role="assistant", content=[ToolCall(id="seq1_tool", name="list_dir", input={"path": "."})]),
|
|
1444
|
+
Message(
|
|
1445
|
+
role="user",
|
|
1446
|
+
content=[
|
|
1447
|
+
ToolResult(
|
|
1448
|
+
tool_use_id="seq1_tool", name="list_dir", content="file1.txt, file2.txt, dir1/", is_error=False
|
|
1449
|
+
)
|
|
1450
|
+
],
|
|
1451
|
+
),
|
|
1452
|
+
# Second tool call sequence - partially interrupted
|
|
1453
|
+
Message(
|
|
1454
|
+
role="assistant",
|
|
1455
|
+
content=[
|
|
1456
|
+
ToolCall(id="seq2_tool1", name="read_file", input={"path": "file1.txt"}),
|
|
1457
|
+
ToolCall(id="seq2_tool2", name="read_file", input={"path": "file2.txt"}),
|
|
1458
|
+
],
|
|
1459
|
+
),
|
|
1460
|
+
Message(
|
|
1461
|
+
role="user",
|
|
1462
|
+
content=[
|
|
1463
|
+
ToolResult(tool_use_id="seq2_tool1", name="read_file", content="Content of file1", is_error=False)
|
|
1464
|
+
# Missing seq2_tool2 result
|
|
1465
|
+
],
|
|
1466
|
+
),
|
|
1467
|
+
# Third tool call sequence - completely interrupted
|
|
1468
|
+
Message(
|
|
1469
|
+
role="assistant",
|
|
1470
|
+
content=[
|
|
1471
|
+
ToolCall(id="seq3_tool1", name="write_file", input={"path": "summary.txt", "content": "Summary"}),
|
|
1472
|
+
ToolCall(id="seq3_tool2", name="list_dir", input={"path": "dir1"}),
|
|
1473
|
+
],
|
|
1474
|
+
),
|
|
1475
|
+
# No user message for third sequence (interrupted)
|
|
1476
|
+
]
|
|
1477
|
+
|
|
1478
|
+
# Verify the corrupted history is invalid
|
|
1479
|
+
assert base_agent._is_history_valid_for_anthropic(corrupted_history) is False
|
|
1480
|
+
|
|
1481
|
+
# Fix the corrupted history
|
|
1482
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(corrupted_history)
|
|
1483
|
+
|
|
1484
|
+
# Verify the fix worked
|
|
1485
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1486
|
+
|
|
1487
|
+
# Set up the fixed history in the agent
|
|
1488
|
+
base_agent.history = MessageHistory(fixed_history)
|
|
1489
|
+
|
|
1490
|
+
try:
|
|
1491
|
+
# Test with a follow-up conversation
|
|
1492
|
+
system_message = Message(
|
|
1493
|
+
role="system",
|
|
1494
|
+
content=[
|
|
1495
|
+
TextBlock(
|
|
1496
|
+
text="You are a file management assistant. When operations are interrupted, acknowledge this and offer to retry."
|
|
1497
|
+
)
|
|
1498
|
+
],
|
|
1499
|
+
)
|
|
1500
|
+
|
|
1501
|
+
# Add a new user message
|
|
1502
|
+
base_agent.history.append(
|
|
1503
|
+
Message(
|
|
1504
|
+
role="user",
|
|
1505
|
+
content=[
|
|
1506
|
+
TextBlock(
|
|
1507
|
+
text="Some operations seem to have been interrupted. Can you tell me what happened and what we should do next?"
|
|
1508
|
+
)
|
|
1509
|
+
],
|
|
1510
|
+
)
|
|
1511
|
+
)
|
|
1512
|
+
|
|
1513
|
+
# Call the real LLM API
|
|
1514
|
+
response = await base_agent.llm.generate(
|
|
1515
|
+
messages=base_agent.history,
|
|
1516
|
+
system=system_message,
|
|
1517
|
+
model=base_agent.config.long_context_config.model,
|
|
1518
|
+
max_completion_tokens=250,
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
# Verify we got a valid response
|
|
1522
|
+
assert response is not None
|
|
1523
|
+
response_text = response.get_text_content()
|
|
1524
|
+
assert isinstance(response_text, str)
|
|
1525
|
+
assert len(response_text.strip()) > 0
|
|
1526
|
+
|
|
1527
|
+
# The response should acknowledge the interruptions
|
|
1528
|
+
response_lower = response_text.lower()
|
|
1529
|
+
assert any(word in response_lower for word in ["interrupt", "error", "retry", "again", "issue"])
|
|
1530
|
+
|
|
1531
|
+
except Exception as e:
|
|
1532
|
+
pytest.fail(f"Real API call failed with consecutive tool interruptions: {str(e)}")
|
|
1533
|
+
|
|
1534
|
+
@pytest.mark.slow
|
|
1535
|
+
@pytest.mark.integration
|
|
1536
|
+
@pytest.mark.asyncio
|
|
1537
|
+
async def test_edge_case_tool_corruption_with_real_api(self, base_agent):
|
|
1538
|
+
"""Integration test: Test edge case corruptions that might break the API."""
|
|
1539
|
+
# Skip if no API key is available
|
|
1540
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1541
|
+
if not api_key or api_key == "test_key":
|
|
1542
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1543
|
+
|
|
1544
|
+
# Create an edge case: assistant message with tools followed by another assistant message
|
|
1545
|
+
edge_case_history = [
|
|
1546
|
+
Message(role="user", content=[TextBlock(text="Process this complex workflow.")]),
|
|
1547
|
+
Message(role="assistant", content=[TextBlock(text="I'll process the workflow step by step.")]),
|
|
1548
|
+
# Assistant with tools
|
|
1549
|
+
Message(
|
|
1550
|
+
role="assistant",
|
|
1551
|
+
content=[
|
|
1552
|
+
ToolCall(id="workflow_step1", name="read_file", input={"path": "config.yaml"}),
|
|
1553
|
+
ToolCall(id="workflow_step2", name="validate_data", input={"data": "test"}),
|
|
1554
|
+
ToolCall(id="workflow_step3", name="process_workflow", input={"step": 1}),
|
|
1555
|
+
],
|
|
1556
|
+
),
|
|
1557
|
+
# Another assistant message (invalid - should have user message with tool results first)
|
|
1558
|
+
Message(role="assistant", content=[TextBlock(text="Let me continue with the next steps.")]),
|
|
1559
|
+
# User asking about status
|
|
1560
|
+
Message(role="user", content=[TextBlock(text="How is the workflow going?")]),
|
|
1561
|
+
]
|
|
1562
|
+
|
|
1563
|
+
# Verify this edge case is invalid
|
|
1564
|
+
assert base_agent._is_history_valid_for_anthropic(edge_case_history) is False
|
|
1565
|
+
|
|
1566
|
+
# Fix the edge case
|
|
1567
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(edge_case_history)
|
|
1568
|
+
|
|
1569
|
+
# Verify the fix worked
|
|
1570
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1571
|
+
|
|
1572
|
+
# Set up the fixed history
|
|
1573
|
+
base_agent.history = MessageHistory(fixed_history)
|
|
1574
|
+
|
|
1575
|
+
try:
|
|
1576
|
+
# Test with real API
|
|
1577
|
+
system_message = Message(
|
|
1578
|
+
role="system", content=[TextBlock(text="You are a workflow processing assistant.")]
|
|
1579
|
+
)
|
|
1580
|
+
|
|
1581
|
+
# Call the real LLM API
|
|
1582
|
+
response = await base_agent.llm.generate(
|
|
1583
|
+
messages=base_agent.history,
|
|
1584
|
+
system=system_message,
|
|
1585
|
+
model=base_agent.config.long_context_config.model,
|
|
1586
|
+
max_completion_tokens=150,
|
|
1587
|
+
)
|
|
1588
|
+
|
|
1589
|
+
# Verify we got a valid response
|
|
1590
|
+
assert response is not None
|
|
1591
|
+
response_text = response.get_text_content()
|
|
1592
|
+
assert isinstance(response_text, str)
|
|
1593
|
+
assert len(response_text.strip()) > 0
|
|
1594
|
+
|
|
1595
|
+
except Exception as e:
|
|
1596
|
+
pytest.fail(f"Real API call failed with edge case corruption: {str(e)}")
|
|
1597
|
+
|
|
1598
|
+
# Tests for new safe append methods
|
|
1599
|
+
def test_append_user_message_with_incomplete_tool_calls(self, base_agent):
|
|
1600
|
+
"""Test that append_user_message does NOT fix incomplete tool calls."""
|
|
1601
|
+
# Add assistant message with tool calls
|
|
1602
|
+
base_agent.history.append(Message(role="assistant", content=[ToolCall(id="tool1", name="test_tool", input={})]))
|
|
1603
|
+
|
|
1604
|
+
# Append user message - should NOT fix the history
|
|
1605
|
+
base_agent.append_user_message("New user message")
|
|
1606
|
+
|
|
1607
|
+
# Verify history was NOT fixed - append should preserve authentic history
|
|
1608
|
+
assert len(base_agent.history) == 2 # assistant, user (new message)
|
|
1609
|
+
assert not base_agent._is_history_valid_for_anthropic() # Still invalid
|
|
1610
|
+
|
|
1611
|
+
# Check the new message was added
|
|
1612
|
+
new_msg = base_agent.history[1]
|
|
1613
|
+
assert new_msg.role == "user"
|
|
1614
|
+
assert new_msg.content[0].text == "New user message"
|
|
1615
|
+
|
|
1616
|
+
# But verify that fix_incomplete_tool_calls can fix it
|
|
1617
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
|
|
1618
|
+
assert len(fixed_history) == 3 # assistant, user (tool result), user (new message)
|
|
1619
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history) is True
|
|
1620
|
+
|
|
1621
|
+
def test_append_user_message_no_fix_needed(self, base_agent):
|
|
1622
|
+
"""Test that append_user_message works normally when no fix needed."""
|
|
1623
|
+
# Add a normal message
|
|
1624
|
+
base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
|
|
1625
|
+
|
|
1626
|
+
# Append another user message
|
|
1627
|
+
base_agent.append_user_message("Another message")
|
|
1628
|
+
|
|
1629
|
+
# Should just append normally
|
|
1630
|
+
assert len(base_agent.history) == 2
|
|
1631
|
+
assert base_agent.history[1].content[0].text == "Another message"
|
|
1632
|
+
|
|
1633
|
+
def test_append_user_message_with_list_content(self, base_agent):
|
|
1634
|
+
"""Test append_user_message with list of ContentBlocks."""
|
|
1635
|
+
content_blocks = [TextBlock(text="Message part 1"), TextBlock(text="Message part 2")]
|
|
1636
|
+
|
|
1637
|
+
base_agent.append_user_message(content_blocks)
|
|
1638
|
+
|
|
1639
|
+
assert len(base_agent.history) == 1
|
|
1640
|
+
assert len(base_agent.history[0].content) == 2
|
|
1641
|
+
assert base_agent.history[0].content[0].text == "Message part 1"
|
|
1642
|
+
assert base_agent.history[0].content[1].text == "Message part 2"
|
|
1643
|
+
|
|
1644
|
+
def test_append_user_message_with_single_block(self, base_agent):
|
|
1645
|
+
"""Test append_user_message with single ContentBlock."""
|
|
1646
|
+
content_block = TextBlock(text="Single block message")
|
|
1647
|
+
|
|
1648
|
+
base_agent.append_user_message(content_block)
|
|
1649
|
+
|
|
1650
|
+
assert len(base_agent.history) == 1
|
|
1651
|
+
assert len(base_agent.history[0].content) == 1
|
|
1652
|
+
assert base_agent.history[0].content[0].text == "Single block message"
|
|
1653
|
+
|
|
1654
|
+
def test_append_assistant_message(self, base_agent):
|
|
1655
|
+
"""Test that append_assistant_message works correctly."""
|
|
1656
|
+
# Add a user message first
|
|
1657
|
+
base_agent.append_user_message("User question")
|
|
1658
|
+
|
|
1659
|
+
# Add assistant message
|
|
1660
|
+
assistant_msg = Message(role="assistant", content=[TextBlock(text="Assistant response")])
|
|
1661
|
+
base_agent.append_assistant_message(assistant_msg)
|
|
1662
|
+
|
|
1663
|
+
assert len(base_agent.history) == 2
|
|
1664
|
+
assert base_agent.history[1].role == "assistant"
|
|
1665
|
+
assert base_agent.history[1].content[0].text == "Assistant response"
|
|
1666
|
+
|
|
1667
|
+
def test_get_effective_history_preserves_thinking_blocks(self, base_agent):
|
|
1668
|
+
base_agent.history = MessageHistory(
|
|
1669
|
+
[
|
|
1670
|
+
Message(
|
|
1671
|
+
role="assistant",
|
|
1672
|
+
content=[
|
|
1673
|
+
ThinkingBlock(thinking="unsigned thinking"),
|
|
1674
|
+
ThinkingBlock(thinking="signed thinking", signature="sig"),
|
|
1675
|
+
RedactedThinkingBlock(data="encrypted-redacted-thinking"),
|
|
1676
|
+
TextBlock(text="final answer"),
|
|
1677
|
+
],
|
|
1678
|
+
)
|
|
1679
|
+
]
|
|
1680
|
+
)
|
|
1681
|
+
|
|
1682
|
+
effective = base_agent.get_effective_history_for_llm()
|
|
1683
|
+
|
|
1684
|
+
assert len(effective) == 1
|
|
1685
|
+
assert [block.type for block in effective[0].content] == [
|
|
1686
|
+
"thinking",
|
|
1687
|
+
"thinking",
|
|
1688
|
+
"redacted_thinking",
|
|
1689
|
+
"text",
|
|
1690
|
+
]
|
|
1691
|
+
assert effective[0].content[0].thinking == "unsigned thinking"
|
|
1692
|
+
assert effective[0].content[1].thinking == "signed thinking"
|
|
1693
|
+
assert effective[0].content[1].signature == "sig"
|
|
1694
|
+
assert effective[0].content[2].data == "encrypted-redacted-thinking"
|
|
1695
|
+
|
|
1696
|
+
def test_extend_history_no_fix_needed(self, base_agent):
|
|
1697
|
+
"""Test extend_history works normally when no fix needed."""
|
|
1698
|
+
# Start with valid history
|
|
1699
|
+
base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
|
|
1700
|
+
base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Hi there")]))
|
|
1701
|
+
|
|
1702
|
+
# Extend with more messages
|
|
1703
|
+
new_messages = [
|
|
1704
|
+
Message(role="user", content=[TextBlock(text="How are you?")]),
|
|
1705
|
+
Message(role="assistant", content=[TextBlock(text="I'm doing well")]),
|
|
1706
|
+
]
|
|
1707
|
+
|
|
1708
|
+
base_agent.extend_history(new_messages)
|
|
1709
|
+
|
|
1710
|
+
assert len(base_agent.history) == 4
|
|
1711
|
+
assert base_agent.history[2].content[0].text == "How are you?"
|
|
1712
|
+
assert base_agent.history[3].content[0].text == "I'm doing well"
|
|
1713
|
+
|
|
1714
|
+
def test_needs_tool_call_fix(self, base_agent):
|
|
1715
|
+
"""Test the _needs_tool_call_fix method."""
|
|
1716
|
+
# Empty history
|
|
1717
|
+
assert not base_agent._needs_tool_call_fix()
|
|
1718
|
+
|
|
1719
|
+
# User message last
|
|
1720
|
+
base_agent.history.append(Message(role="user", content=[TextBlock(text="Hello")]))
|
|
1721
|
+
assert not base_agent._needs_tool_call_fix()
|
|
1722
|
+
|
|
1723
|
+
# Assistant message without tools
|
|
1724
|
+
base_agent.history.append(Message(role="assistant", content=[TextBlock(text="Hi")]))
|
|
1725
|
+
assert not base_agent._needs_tool_call_fix()
|
|
1726
|
+
|
|
1727
|
+
# Assistant message with tools
|
|
1728
|
+
base_agent.history.append(Message(role="assistant", content=[ToolCall(id="tool1", name="test", input={})]))
|
|
1729
|
+
assert base_agent._needs_tool_call_fix()
|
|
1730
|
+
|
|
1731
|
+
def test_needs_tool_call_fix_with_mixed_content(self, base_agent):
|
|
1732
|
+
"""Test _needs_tool_call_fix with mixed content blocks."""
|
|
1733
|
+
# Assistant message with text and tool calls
|
|
1734
|
+
base_agent.history.append(
|
|
1735
|
+
Message(
|
|
1736
|
+
role="assistant",
|
|
1737
|
+
content=[
|
|
1738
|
+
TextBlock(text="Let me help you with that."),
|
|
1739
|
+
ToolCall(id="tool1", name="read_file", input={"path": "test.txt"}),
|
|
1740
|
+
],
|
|
1741
|
+
)
|
|
1742
|
+
)
|
|
1743
|
+
|
|
1744
|
+
assert base_agent._needs_tool_call_fix()
|
|
1745
|
+
|
|
1746
|
+
def test_needs_tool_call_fix_string_content(self, base_agent):
|
|
1747
|
+
"""Test _needs_tool_call_fix with string content (edge case)."""
|
|
1748
|
+
# This shouldn't happen in practice, but test the edge case
|
|
1749
|
+
base_agent.history.append(Message(role="assistant", content="Just a string"))
|
|
1750
|
+
|
|
1751
|
+
assert not base_agent._needs_tool_call_fix()
|
|
1752
|
+
|
|
1753
|
+
@pytest.mark.slow
|
|
1754
|
+
@pytest.mark.integration
|
|
1755
|
+
@pytest.mark.asyncio
|
|
1756
|
+
async def test_safe_append_with_real_api(self, base_agent):
|
|
1757
|
+
"""Test that append methods work with real Anthropic API when history is fixed."""
|
|
1758
|
+
# Skip if no API key is available
|
|
1759
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1760
|
+
if not api_key or api_key == "test_key":
|
|
1761
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1762
|
+
|
|
1763
|
+
# Set up history with tool calls
|
|
1764
|
+
base_agent.history.append(Message(role="user", content=[TextBlock(text="Read the README file")]))
|
|
1765
|
+
base_agent.history.append(
|
|
1766
|
+
Message(
|
|
1767
|
+
role="assistant",
|
|
1768
|
+
content=[
|
|
1769
|
+
TextBlock(text="I'll read the README file for you."),
|
|
1770
|
+
ToolCall(id="tool1", name="read_file", input={"path": "README.md"}),
|
|
1771
|
+
],
|
|
1772
|
+
)
|
|
1773
|
+
)
|
|
1774
|
+
|
|
1775
|
+
# Append user message - history is now invalid
|
|
1776
|
+
base_agent.append_user_message("What does it say?")
|
|
1777
|
+
|
|
1778
|
+
# Verify history is invalid
|
|
1779
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
1780
|
+
|
|
1781
|
+
# Fix history before API call
|
|
1782
|
+
fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
|
|
1783
|
+
|
|
1784
|
+
# Verify we can make an API call with fixed history
|
|
1785
|
+
system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
|
|
1786
|
+
|
|
1787
|
+
try:
|
|
1788
|
+
response = await base_agent.llm.generate(
|
|
1789
|
+
messages=fixed_history,
|
|
1790
|
+
system=system_message,
|
|
1791
|
+
model=base_agent.config.long_context_config.model,
|
|
1792
|
+
max_completion_tokens=200,
|
|
1793
|
+
)
|
|
1794
|
+
|
|
1795
|
+
assert response is not None
|
|
1796
|
+
assert response.get_text_content()
|
|
1797
|
+
except Exception as e:
|
|
1798
|
+
pytest.fail(f"API call failed after fixing history: {str(e)}")
|
|
1799
|
+
|
|
1800
|
+
def test_append_user_message_multiple_incomplete_sequences(self, base_agent):
|
|
1801
|
+
"""Test append_user_message does NOT fix multiple incomplete sequences."""
|
|
1802
|
+
# Create history with multiple incomplete tool sequences
|
|
1803
|
+
base_agent.history = MessageHistory(
|
|
1804
|
+
[
|
|
1805
|
+
Message(role="user", content=[TextBlock(text="Initial request")]),
|
|
1806
|
+
Message(
|
|
1807
|
+
role="assistant",
|
|
1808
|
+
content=[
|
|
1809
|
+
ToolCall(id="tool1", name="first_tool", input={}),
|
|
1810
|
+
ToolCall(id="tool2", name="second_tool", input={}),
|
|
1811
|
+
],
|
|
1812
|
+
),
|
|
1813
|
+
Message(
|
|
1814
|
+
role="user",
|
|
1815
|
+
content=[
|
|
1816
|
+
ToolResult(tool_use_id="tool1", name="first_tool", content="Result 1", is_error=False)
|
|
1817
|
+
# Missing tool2 result
|
|
1818
|
+
],
|
|
1819
|
+
),
|
|
1820
|
+
Message(role="assistant", content=[ToolCall(id="tool3", name="third_tool", input={})]),
|
|
1821
|
+
# Missing tool3 result
|
|
1822
|
+
]
|
|
1823
|
+
)
|
|
1824
|
+
|
|
1825
|
+
# Append new user message
|
|
1826
|
+
base_agent.append_user_message("Continue with the task")
|
|
1827
|
+
|
|
1828
|
+
# History should still be invalid - append doesn't fix
|
|
1829
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
1830
|
+
|
|
1831
|
+
# But fix_incomplete_tool_calls should be able to fix it
|
|
1832
|
+
fixed_history = base_agent.fix_incomplete_tool_calls(list(base_agent.history))
|
|
1833
|
+
assert base_agent._is_history_valid_for_anthropic(fixed_history)
|
|
1834
|
+
|
|
1835
|
+
# Verify the fixed history has all tool results
|
|
1836
|
+
tool_results = []
|
|
1837
|
+
for msg in fixed_history:
|
|
1838
|
+
if msg.role == "user":
|
|
1839
|
+
tool_results.extend([b for b in msg.content if isinstance(b, ToolResult)])
|
|
1840
|
+
|
|
1841
|
+
tool_result_ids = {r.tool_use_id for r in tool_results}
|
|
1842
|
+
assert "tool2" in tool_result_ids # Should have placeholder for tool2
|
|
1843
|
+
assert "tool3" in tool_result_ids # Should have placeholder for tool3
|
|
1844
|
+
|
|
1845
|
+
@pytest.mark.slow
|
|
1846
|
+
@pytest.mark.integration
|
|
1847
|
+
@pytest.mark.asyncio
|
|
1848
|
+
async def test_restore_and_append_scenario(self, base_agent):
|
|
1849
|
+
"""Test the scenario where restore and append don't fix, but LLM call fixes."""
|
|
1850
|
+
# Skip if no API key is available
|
|
1851
|
+
api_key = base_agent.config.get_api_key(base_agent.config.long_context_config.provider)
|
|
1852
|
+
if not api_key or api_key == "test_key":
|
|
1853
|
+
pytest.skip("No valid API key available for LLM provider")
|
|
1854
|
+
|
|
1855
|
+
# Create a serialized history that ends with tool calls (simulating what's in the DB)
|
|
1856
|
+
serialized_history = [
|
|
1857
|
+
{
|
|
1858
|
+
"role": "user",
|
|
1859
|
+
"content": [{"type": "text", "text": "Help me with a task", "cache_checkpoint": False}],
|
|
1860
|
+
"stop_reason": None,
|
|
1861
|
+
},
|
|
1862
|
+
{
|
|
1863
|
+
"role": "assistant",
|
|
1864
|
+
"content": [{"type": "text", "text": "I'll help you with that task.", "cache_checkpoint": False}],
|
|
1865
|
+
"stop_reason": None,
|
|
1866
|
+
},
|
|
1867
|
+
{
|
|
1868
|
+
"role": "assistant",
|
|
1869
|
+
"content": [
|
|
1870
|
+
{
|
|
1871
|
+
"type": "tool_call",
|
|
1872
|
+
"id": "tool_1",
|
|
1873
|
+
"name": "read_file",
|
|
1874
|
+
"input": {"path": "task.txt"},
|
|
1875
|
+
"cache_checkpoint": False,
|
|
1876
|
+
}
|
|
1877
|
+
],
|
|
1878
|
+
"stop_reason": "tool_use",
|
|
1879
|
+
},
|
|
1880
|
+
# Missing tool result - simulating an interrupted session
|
|
1881
|
+
]
|
|
1882
|
+
|
|
1883
|
+
# Restore the history (should NOT auto-fix)
|
|
1884
|
+
base_agent.restore_message_history(serialized_history)
|
|
1885
|
+
|
|
1886
|
+
# Verify history is still invalid
|
|
1887
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
1888
|
+
|
|
1889
|
+
# Add a new user message (history remains invalid)
|
|
1890
|
+
base_agent.append_user_message("What's the status of my task?")
|
|
1891
|
+
|
|
1892
|
+
# Verify the history is still invalid
|
|
1893
|
+
assert not base_agent._is_history_valid_for_anthropic()
|
|
1894
|
+
|
|
1895
|
+
# Fix history before API call
|
|
1896
|
+
fixed_history = MessageHistory(base_agent.fix_incomplete_tool_calls(list(base_agent.history)))
|
|
1897
|
+
|
|
1898
|
+
# Test with real API
|
|
1899
|
+
system_message = Message(role="system", content=[TextBlock(text="You are a helpful assistant.")])
|
|
1900
|
+
|
|
1901
|
+
try:
|
|
1902
|
+
response = await base_agent.llm.generate(
|
|
1903
|
+
messages=fixed_history,
|
|
1904
|
+
system=system_message,
|
|
1905
|
+
model=base_agent.config.long_context_config.model,
|
|
1906
|
+
max_completion_tokens=100,
|
|
1907
|
+
)
|
|
1908
|
+
|
|
1909
|
+
assert response is not None
|
|
1910
|
+
assert response.get_text_content()
|
|
1911
|
+
except Exception as e:
|
|
1912
|
+
# If this fails with the tool_use_id error, our fix didn't work
|
|
1913
|
+
pytest.fail(f"API call failed with fixed history: {str(e)}")
|
|
1914
|
+
|
|
1915
|
+
def test_get_effective_history_falls_back_when_no_compression(self, base_agent):
|
|
1916
|
+
# With no compression, effective == full history
|
|
1917
|
+
base_agent.history = MessageHistory(
|
|
1918
|
+
[
|
|
1919
|
+
Message(role="user", content=[TextBlock(text="hi")]),
|
|
1920
|
+
Message(role="assistant", content=[TextBlock(text="yo")]),
|
|
1921
|
+
]
|
|
1922
|
+
)
|
|
1923
|
+
eff = base_agent.get_effective_history_for_llm()
|
|
1924
|
+
assert len(eff) == 2
|
|
1925
|
+
|
|
1926
|
+
def test_get_effective_history_after_markers(self, base_agent):
|
|
1927
|
+
base_agent.history = MessageHistory(
|
|
1928
|
+
[
|
|
1929
|
+
Message(role="user", content=[TextBlock(text="a")]),
|
|
1930
|
+
Message(role="assistant", content=[TextBlock(text="b")]),
|
|
1931
|
+
Message(role="user", content=[TextBlock(text="c")]),
|
|
1932
|
+
]
|
|
1933
|
+
)
|
|
1934
|
+
base_agent.last_compression_index = 2
|
|
1935
|
+
# Append a summary message as it would be after compression
|
|
1936
|
+
base_agent.history.append(
|
|
1937
|
+
Message(role="user", content=[TextBlock(text="CONVERSATION HISTORY SUMMARY (compressed at ...)")])
|
|
1938
|
+
)
|
|
1939
|
+
eff = base_agent.get_effective_history_for_llm()
|
|
1940
|
+
# boundary is 2, so tail is after index 2 -> empty, but we still have summary
|
|
1941
|
+
assert len(eff) == 1
|
|
1942
|
+
assert "CONVERSATION HISTORY SUMMARY" in eff[0].content[0].text
|