openhands 0.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openhands might be problematic. Click here for more details.
- openhands-1.0.1.dist-info/METADATA +52 -0
- openhands-1.0.1.dist-info/RECORD +31 -0
- {openhands-0.0.0.dist-info → openhands-1.0.1.dist-info}/WHEEL +1 -2
- openhands-1.0.1.dist-info/entry_points.txt +2 -0
- openhands_cli/__init__.py +8 -0
- openhands_cli/agent_chat.py +186 -0
- openhands_cli/argparsers/main_parser.py +56 -0
- openhands_cli/argparsers/serve_parser.py +31 -0
- openhands_cli/gui_launcher.py +220 -0
- openhands_cli/listeners/__init__.py +4 -0
- openhands_cli/listeners/loading_listener.py +63 -0
- openhands_cli/listeners/pause_listener.py +83 -0
- openhands_cli/llm_utils.py +57 -0
- openhands_cli/locations.py +13 -0
- openhands_cli/pt_style.py +30 -0
- openhands_cli/runner.py +178 -0
- openhands_cli/setup.py +116 -0
- openhands_cli/simple_main.py +59 -0
- openhands_cli/tui/__init__.py +5 -0
- openhands_cli/tui/settings/mcp_screen.py +217 -0
- openhands_cli/tui/settings/settings_screen.py +202 -0
- openhands_cli/tui/settings/store.py +93 -0
- openhands_cli/tui/status.py +109 -0
- openhands_cli/tui/tui.py +100 -0
- openhands_cli/tui/utils.py +14 -0
- openhands_cli/user_actions/__init__.py +17 -0
- openhands_cli/user_actions/agent_action.py +95 -0
- openhands_cli/user_actions/exit_session.py +18 -0
- openhands_cli/user_actions/settings_action.py +171 -0
- openhands_cli/user_actions/types.py +18 -0
- openhands_cli/user_actions/utils.py +199 -0
- openhands/__init__.py +0 -1
- openhands/sdk/__init__.py +0 -45
- openhands/sdk/agent/__init__.py +0 -8
- openhands/sdk/agent/agent/__init__.py +0 -6
- openhands/sdk/agent/agent/agent.py +0 -349
- openhands/sdk/agent/base.py +0 -103
- openhands/sdk/context/__init__.py +0 -28
- openhands/sdk/context/agent_context.py +0 -153
- openhands/sdk/context/condenser/__init__.py +0 -5
- openhands/sdk/context/condenser/condenser.py +0 -73
- openhands/sdk/context/condenser/no_op_condenser.py +0 -13
- openhands/sdk/context/manager.py +0 -5
- openhands/sdk/context/microagents/__init__.py +0 -26
- openhands/sdk/context/microagents/exceptions.py +0 -11
- openhands/sdk/context/microagents/microagent.py +0 -345
- openhands/sdk/context/microagents/types.py +0 -70
- openhands/sdk/context/utils/__init__.py +0 -8
- openhands/sdk/context/utils/prompt.py +0 -52
- openhands/sdk/context/view.py +0 -116
- openhands/sdk/conversation/__init__.py +0 -12
- openhands/sdk/conversation/conversation.py +0 -207
- openhands/sdk/conversation/state.py +0 -50
- openhands/sdk/conversation/types.py +0 -6
- openhands/sdk/conversation/visualizer.py +0 -300
- openhands/sdk/event/__init__.py +0 -27
- openhands/sdk/event/base.py +0 -148
- openhands/sdk/event/condenser.py +0 -49
- openhands/sdk/event/llm_convertible.py +0 -265
- openhands/sdk/event/types.py +0 -5
- openhands/sdk/event/user_action.py +0 -12
- openhands/sdk/event/utils.py +0 -30
- openhands/sdk/llm/__init__.py +0 -19
- openhands/sdk/llm/exceptions.py +0 -108
- openhands/sdk/llm/llm.py +0 -867
- openhands/sdk/llm/llm_registry.py +0 -116
- openhands/sdk/llm/message.py +0 -216
- openhands/sdk/llm/metadata.py +0 -34
- openhands/sdk/llm/utils/fn_call_converter.py +0 -1049
- openhands/sdk/llm/utils/metrics.py +0 -311
- openhands/sdk/llm/utils/model_features.py +0 -153
- openhands/sdk/llm/utils/retry_mixin.py +0 -122
- openhands/sdk/llm/utils/telemetry.py +0 -252
- openhands/sdk/logger.py +0 -167
- openhands/sdk/mcp/__init__.py +0 -20
- openhands/sdk/mcp/client.py +0 -113
- openhands/sdk/mcp/definition.py +0 -69
- openhands/sdk/mcp/tool.py +0 -104
- openhands/sdk/mcp/utils.py +0 -59
- openhands/sdk/tests/llm/test_llm.py +0 -447
- openhands/sdk/tests/llm/test_llm_fncall_converter.py +0 -691
- openhands/sdk/tests/llm/test_model_features.py +0 -221
- openhands/sdk/tool/__init__.py +0 -30
- openhands/sdk/tool/builtins/__init__.py +0 -34
- openhands/sdk/tool/builtins/finish.py +0 -57
- openhands/sdk/tool/builtins/think.py +0 -60
- openhands/sdk/tool/schema.py +0 -236
- openhands/sdk/tool/security_prompt.py +0 -5
- openhands/sdk/tool/tool.py +0 -142
- openhands/sdk/utils/__init__.py +0 -14
- openhands/sdk/utils/discriminated_union.py +0 -210
- openhands/sdk/utils/json.py +0 -48
- openhands/sdk/utils/truncate.py +0 -44
- openhands/tools/__init__.py +0 -44
- openhands/tools/execute_bash/__init__.py +0 -30
- openhands/tools/execute_bash/constants.py +0 -31
- openhands/tools/execute_bash/definition.py +0 -166
- openhands/tools/execute_bash/impl.py +0 -38
- openhands/tools/execute_bash/metadata.py +0 -101
- openhands/tools/execute_bash/terminal/__init__.py +0 -22
- openhands/tools/execute_bash/terminal/factory.py +0 -113
- openhands/tools/execute_bash/terminal/interface.py +0 -189
- openhands/tools/execute_bash/terminal/subprocess_terminal.py +0 -412
- openhands/tools/execute_bash/terminal/terminal_session.py +0 -492
- openhands/tools/execute_bash/terminal/tmux_terminal.py +0 -160
- openhands/tools/execute_bash/utils/command.py +0 -150
- openhands/tools/str_replace_editor/__init__.py +0 -17
- openhands/tools/str_replace_editor/definition.py +0 -158
- openhands/tools/str_replace_editor/editor.py +0 -683
- openhands/tools/str_replace_editor/exceptions.py +0 -41
- openhands/tools/str_replace_editor/impl.py +0 -66
- openhands/tools/str_replace_editor/utils/__init__.py +0 -0
- openhands/tools/str_replace_editor/utils/config.py +0 -2
- openhands/tools/str_replace_editor/utils/constants.py +0 -9
- openhands/tools/str_replace_editor/utils/encoding.py +0 -135
- openhands/tools/str_replace_editor/utils/file_cache.py +0 -154
- openhands/tools/str_replace_editor/utils/history.py +0 -122
- openhands/tools/str_replace_editor/utils/shell.py +0 -72
- openhands/tools/task_tracker/__init__.py +0 -16
- openhands/tools/task_tracker/definition.py +0 -336
- openhands/tools/utils/__init__.py +0 -1
- openhands-0.0.0.dist-info/METADATA +0 -3
- openhands-0.0.0.dist-info/RECORD +0 -94
- openhands-0.0.0.dist-info/top_level.txt +0 -1
openhands/sdk/mcp/tool.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
"""Utility functions for MCP integration."""
|
|
2
|
-
|
|
3
|
-
import re
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
|
-
|
|
6
|
-
import mcp.types
|
|
7
|
-
from pydantic import ValidationError
|
|
8
|
-
|
|
9
|
-
from openhands.sdk.llm import TextContent
|
|
10
|
-
from openhands.sdk.logger import get_logger
|
|
11
|
-
from openhands.sdk.mcp import MCPToolObservation
|
|
12
|
-
from openhands.sdk.tool import MCPActionBase, Tool, ToolAnnotations, ToolExecutor
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
if TYPE_CHECKING:
|
|
16
|
-
from openhands.sdk.mcp.client import MCPClient
|
|
17
|
-
|
|
18
|
-
logger = get_logger(__name__)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# NOTE: We don't define MCPToolAction because it
|
|
22
|
-
# will be a pydantic BaseModel dynamically created from the MCP tool schema.
|
|
23
|
-
# It will be available as "tool.action_type".
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def to_camel_case(s: str) -> str:
|
|
27
|
-
parts = re.split(r"[_\-\s]+", s)
|
|
28
|
-
return "".join(word.capitalize() for word in parts if word)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class MCPToolExecutor(ToolExecutor):
|
|
32
|
-
"""Executor for MCP tools."""
|
|
33
|
-
|
|
34
|
-
def __init__(self, tool_name: str, client: "MCPClient"):
|
|
35
|
-
self.tool_name = tool_name
|
|
36
|
-
self.client = client
|
|
37
|
-
|
|
38
|
-
async def call_tool(self, action: MCPActionBase) -> MCPToolObservation:
|
|
39
|
-
async with self.client:
|
|
40
|
-
assert self.client.is_connected(), "MCP client is not connected."
|
|
41
|
-
try:
|
|
42
|
-
logger.debug(
|
|
43
|
-
f"Calling MCP tool {self.tool_name} "
|
|
44
|
-
f"with args: {action.model_dump()}"
|
|
45
|
-
)
|
|
46
|
-
result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
|
|
47
|
-
name=self.tool_name, arguments=action.to_mcp_arguments()
|
|
48
|
-
)
|
|
49
|
-
return MCPToolObservation.from_call_tool_result(
|
|
50
|
-
tool_name=self.tool_name, result=result
|
|
51
|
-
)
|
|
52
|
-
except Exception as e:
|
|
53
|
-
error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
|
|
54
|
-
logger.error(error_msg, exc_info=True)
|
|
55
|
-
return MCPToolObservation(
|
|
56
|
-
content=[TextContent(text=error_msg)],
|
|
57
|
-
is_error=True,
|
|
58
|
-
tool_name=self.tool_name,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def __call__(self, action: MCPActionBase) -> MCPToolObservation:
|
|
62
|
-
"""Execute an MCP tool call."""
|
|
63
|
-
return self.client.call_async_from_sync(
|
|
64
|
-
self.call_tool, action=action, timeout=300
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class MCPTool(Tool[MCPActionBase, MCPToolObservation]):
|
|
69
|
-
"""MCP Tool that wraps an MCP client and provides tool functionality."""
|
|
70
|
-
|
|
71
|
-
def __init__(
|
|
72
|
-
self,
|
|
73
|
-
mcp_tool: mcp.types.Tool,
|
|
74
|
-
mcp_client: "MCPClient",
|
|
75
|
-
):
|
|
76
|
-
self.mcp_client = mcp_client
|
|
77
|
-
self.mcp_tool = mcp_tool
|
|
78
|
-
|
|
79
|
-
try:
|
|
80
|
-
if mcp_tool.annotations:
|
|
81
|
-
anno_dict = mcp_tool.annotations.model_dump(exclude_none=True)
|
|
82
|
-
annotations = ToolAnnotations.model_validate(anno_dict)
|
|
83
|
-
else:
|
|
84
|
-
annotations = None
|
|
85
|
-
|
|
86
|
-
MCPActionType = MCPActionBase.from_mcp_schema(
|
|
87
|
-
f"{to_camel_case(mcp_tool.name)}Action", mcp_tool.inputSchema
|
|
88
|
-
)
|
|
89
|
-
super().__init__(
|
|
90
|
-
name=mcp_tool.name,
|
|
91
|
-
description=mcp_tool.description or "No description provided",
|
|
92
|
-
input_schema=MCPActionType,
|
|
93
|
-
output_schema=MCPToolObservation,
|
|
94
|
-
annotations=annotations,
|
|
95
|
-
_meta=mcp_tool.meta,
|
|
96
|
-
executor=MCPToolExecutor(tool_name=mcp_tool.name, client=mcp_client),
|
|
97
|
-
)
|
|
98
|
-
except ValidationError as e:
|
|
99
|
-
logger.error(
|
|
100
|
-
f"Validation error creating MCPTool for {mcp_tool.name}: "
|
|
101
|
-
f"{e.json(indent=2)}",
|
|
102
|
-
exc_info=True,
|
|
103
|
-
)
|
|
104
|
-
raise e
|
openhands/sdk/mcp/utils.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
"""Utility functions for MCP integration."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
|
|
5
|
-
import mcp.types
|
|
6
|
-
from fastmcp.client.logging import LogMessage
|
|
7
|
-
from fastmcp.mcp_config import MCPConfig
|
|
8
|
-
|
|
9
|
-
from openhands.sdk.logger import get_logger
|
|
10
|
-
from openhands.sdk.mcp import MCPClient, MCPTool
|
|
11
|
-
from openhands.sdk.tool import Tool
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
logger = get_logger(__name__)
|
|
15
|
-
LOGGING_LEVEL_MAP = logging.getLevelNamesMapping()
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
async def log_handler(message: LogMessage):
|
|
19
|
-
"""
|
|
20
|
-
Handles incoming logs from the MCP server and forwards them
|
|
21
|
-
to the standard Python logging system.
|
|
22
|
-
"""
|
|
23
|
-
msg = message.data.get("msg")
|
|
24
|
-
extra = message.data.get("extra")
|
|
25
|
-
|
|
26
|
-
# Convert the MCP log level to a Python log level
|
|
27
|
-
level = LOGGING_LEVEL_MAP.get(message.level.upper(), logging.INFO)
|
|
28
|
-
|
|
29
|
-
# Log the message using the standard logging library
|
|
30
|
-
logger.log(level, msg, extra=extra)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
async def _list_tools(client: MCPClient) -> list[Tool]:
|
|
34
|
-
"""List tools from an MCP client."""
|
|
35
|
-
tools: list[Tool] = []
|
|
36
|
-
|
|
37
|
-
async with client:
|
|
38
|
-
assert client.is_connected(), "MCP client is not connected."
|
|
39
|
-
mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
|
|
40
|
-
tools = [MCPTool(mcp_tool=t, mcp_client=client) for t in mcp_type_tools]
|
|
41
|
-
assert not client.is_connected(), (
|
|
42
|
-
"MCP client should be disconnected after listing tools."
|
|
43
|
-
)
|
|
44
|
-
return tools
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def create_mcp_tools(
|
|
48
|
-
config: dict | MCPConfig,
|
|
49
|
-
timeout: float = 30.0,
|
|
50
|
-
) -> list[Tool]:
|
|
51
|
-
"""Create MCP tools from MCP configuration."""
|
|
52
|
-
tools: list[Tool] = []
|
|
53
|
-
if isinstance(config, dict):
|
|
54
|
-
config = MCPConfig.model_validate(config)
|
|
55
|
-
client = MCPClient(config, log_handler=log_handler)
|
|
56
|
-
tools = client.call_async_from_sync(_list_tools, timeout=timeout, client=client)
|
|
57
|
-
|
|
58
|
-
logger.info(f"Created {len(tools)} MCP tools: {[t.name for t in tools]}")
|
|
59
|
-
return tools
|
|
@@ -1,447 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
from unittest.mock import MagicMock, patch
|
|
3
|
-
|
|
4
|
-
import pytest
|
|
5
|
-
from litellm.exceptions import (
|
|
6
|
-
RateLimitError,
|
|
7
|
-
)
|
|
8
|
-
from pydantic import SecretStr
|
|
9
|
-
|
|
10
|
-
from openhands.sdk.llm import LLM, Message, TextContent
|
|
11
|
-
from openhands.sdk.llm.exceptions import LLMNoResponseError
|
|
12
|
-
from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
|
|
16
|
-
"""Helper function to create properly structured mock responses."""
|
|
17
|
-
mock_response = MagicMock()
|
|
18
|
-
mock_response.choices = [MagicMock()]
|
|
19
|
-
mock_response.choices[0].message.content = content
|
|
20
|
-
|
|
21
|
-
# Create usage mock
|
|
22
|
-
mock_usage = MagicMock()
|
|
23
|
-
mock_usage.get.side_effect = lambda key, default=None: {
|
|
24
|
-
"prompt_tokens": 10,
|
|
25
|
-
"completion_tokens": 5,
|
|
26
|
-
"model_extra": {},
|
|
27
|
-
}.get(key, default)
|
|
28
|
-
mock_usage.prompt_tokens_details = None
|
|
29
|
-
|
|
30
|
-
# Response data mapping
|
|
31
|
-
response_data = {
|
|
32
|
-
"choices": mock_response.choices,
|
|
33
|
-
"usage": mock_usage,
|
|
34
|
-
"id": response_id,
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
# Mock both .get() and dict-like access (LLM code uses both patterns inconsistently)
|
|
38
|
-
mock_response.get.side_effect = lambda key, default=None: response_data.get(
|
|
39
|
-
key, default
|
|
40
|
-
)
|
|
41
|
-
mock_response.__getitem__ = lambda self, key: response_data[key]
|
|
42
|
-
|
|
43
|
-
return mock_response
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@pytest.fixture(autouse=True)
|
|
47
|
-
def mock_logger(monkeypatch):
|
|
48
|
-
# suppress logging of completion data to file
|
|
49
|
-
mock_logger = MagicMock()
|
|
50
|
-
monkeypatch.setattr("openhands.sdk.llm.llm.logger", mock_logger)
|
|
51
|
-
return mock_logger
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@pytest.fixture
|
|
55
|
-
def default_config() -> dict[str, Any]:
|
|
56
|
-
return {
|
|
57
|
-
"model": "gpt-4o",
|
|
58
|
-
"api_key": SecretStr("test_key"),
|
|
59
|
-
"num_retries": 2,
|
|
60
|
-
"retry_min_wait": 1,
|
|
61
|
-
"retry_max_wait": 2,
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def test_llm_init_with_default_config(default_config):
|
|
66
|
-
llm = LLM(**default_config, service_id="test-service")
|
|
67
|
-
assert llm.model == "gpt-4o"
|
|
68
|
-
assert llm.api_key is not None and llm.api_key.get_secret_value() == "test_key"
|
|
69
|
-
assert isinstance(llm.metrics, Metrics)
|
|
70
|
-
assert llm.metrics.model_name == "gpt-4o"
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def test_token_usage_add():
|
|
74
|
-
"""Test that TokenUsage instances can be added together."""
|
|
75
|
-
# Create two TokenUsage instances
|
|
76
|
-
usage1 = TokenUsage(
|
|
77
|
-
model="model1",
|
|
78
|
-
prompt_tokens=10,
|
|
79
|
-
completion_tokens=5,
|
|
80
|
-
cache_read_tokens=3,
|
|
81
|
-
cache_write_tokens=2,
|
|
82
|
-
response_id="response-1",
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
usage2 = TokenUsage(
|
|
86
|
-
model="model2",
|
|
87
|
-
prompt_tokens=8,
|
|
88
|
-
completion_tokens=6,
|
|
89
|
-
cache_read_tokens=2,
|
|
90
|
-
cache_write_tokens=4,
|
|
91
|
-
response_id="response-2",
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Add them together
|
|
95
|
-
combined = usage1 + usage2
|
|
96
|
-
|
|
97
|
-
# Verify the result
|
|
98
|
-
assert combined.model == "model1" # Should keep the model from the first instance
|
|
99
|
-
assert combined.prompt_tokens == 18 # 10 + 8
|
|
100
|
-
assert combined.completion_tokens == 11 # 5 + 6
|
|
101
|
-
assert combined.cache_read_tokens == 5 # 3 + 2
|
|
102
|
-
assert combined.cache_write_tokens == 6 # 2 + 4
|
|
103
|
-
assert (
|
|
104
|
-
combined.response_id == "response-1"
|
|
105
|
-
) # Should keep the response_id from the first instance
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def test_metrics_merge_accumulated_token_usage():
|
|
109
|
-
"""Test that accumulated token usage is properly merged between two Metrics
|
|
110
|
-
instances."""
|
|
111
|
-
# Create two Metrics instances
|
|
112
|
-
metrics1 = Metrics(model_name="model1")
|
|
113
|
-
metrics2 = Metrics(model_name="model2")
|
|
114
|
-
|
|
115
|
-
# Add token usage to each
|
|
116
|
-
metrics1.add_token_usage(10, 5, 3, 2, 1000, "response-1")
|
|
117
|
-
metrics2.add_token_usage(8, 6, 2, 4, 1000, "response-2")
|
|
118
|
-
|
|
119
|
-
# Verify initial accumulated token usage
|
|
120
|
-
metrics1_data = metrics1.get()
|
|
121
|
-
accumulated1 = metrics1_data["accumulated_token_usage"]
|
|
122
|
-
assert accumulated1["prompt_tokens"] == 10
|
|
123
|
-
assert accumulated1["completion_tokens"] == 5
|
|
124
|
-
assert accumulated1["cache_read_tokens"] == 3
|
|
125
|
-
assert accumulated1["cache_write_tokens"] == 2
|
|
126
|
-
|
|
127
|
-
metrics2_data = metrics2.get()
|
|
128
|
-
accumulated2 = metrics2_data["accumulated_token_usage"]
|
|
129
|
-
assert accumulated2["prompt_tokens"] == 8
|
|
130
|
-
assert accumulated2["completion_tokens"] == 6
|
|
131
|
-
assert accumulated2["cache_read_tokens"] == 2
|
|
132
|
-
assert accumulated2["cache_write_tokens"] == 4
|
|
133
|
-
|
|
134
|
-
# Merge metrics2 into metrics1
|
|
135
|
-
metrics1.merge(metrics2)
|
|
136
|
-
|
|
137
|
-
# Verify merged accumulated token usage
|
|
138
|
-
merged_data = metrics1.get()
|
|
139
|
-
merged_accumulated = merged_data["accumulated_token_usage"]
|
|
140
|
-
assert merged_accumulated["prompt_tokens"] == 18 # 10 + 8
|
|
141
|
-
assert merged_accumulated["completion_tokens"] == 11 # 5 + 6
|
|
142
|
-
assert merged_accumulated["cache_read_tokens"] == 5 # 3 + 2
|
|
143
|
-
assert merged_accumulated["cache_write_tokens"] == 6 # 2 + 4
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def test_metrics_diff():
|
|
147
|
-
"""Test that metrics diff correctly calculates the difference between two
|
|
148
|
-
metrics."""
|
|
149
|
-
# Create baseline metrics
|
|
150
|
-
baseline = Metrics(model_name="test-model")
|
|
151
|
-
baseline.add_cost(1.0)
|
|
152
|
-
baseline.add_token_usage(10, 5, 2, 1, 1000, "baseline-response")
|
|
153
|
-
baseline.add_response_latency(0.5, "baseline-response")
|
|
154
|
-
|
|
155
|
-
# Create current metrics with additional data
|
|
156
|
-
current = Metrics(model_name="test-model")
|
|
157
|
-
current.merge(baseline) # Start with baseline
|
|
158
|
-
current.add_cost(2.0) # Add more cost
|
|
159
|
-
current.add_token_usage(15, 8, 3, 2, 1000, "current-response") # Add more tokens
|
|
160
|
-
current.add_response_latency(0.8, "current-response") # Add more latency
|
|
161
|
-
|
|
162
|
-
# Calculate diff
|
|
163
|
-
diff = current.diff(baseline)
|
|
164
|
-
|
|
165
|
-
# Verify diff contains only the additional data
|
|
166
|
-
diff_data = diff.get()
|
|
167
|
-
assert diff_data["accumulated_cost"] == 2.0 # Only the additional cost
|
|
168
|
-
assert len(diff_data["costs"]) == 1 # Only the additional cost entry
|
|
169
|
-
assert len(diff_data["token_usages"]) == 1 # Only the additional token usage
|
|
170
|
-
assert len(diff_data["response_latencies"]) == 1 # Only the additional latency
|
|
171
|
-
|
|
172
|
-
# Verify accumulated token usage diff
|
|
173
|
-
accumulated_diff = diff_data["accumulated_token_usage"]
|
|
174
|
-
assert accumulated_diff["prompt_tokens"] == 15 # Only the additional tokens
|
|
175
|
-
assert accumulated_diff["completion_tokens"] == 8
|
|
176
|
-
assert accumulated_diff["cache_read_tokens"] == 3
|
|
177
|
-
assert accumulated_diff["cache_write_tokens"] == 2
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
@patch("openhands.sdk.llm.llm.litellm_completion")
|
|
181
|
-
def test_llm_completion_with_mock(mock_completion, default_config):
|
|
182
|
-
"""Test LLM completion with mocked litellm."""
|
|
183
|
-
mock_response = create_mock_response("Test response")
|
|
184
|
-
mock_completion.return_value = mock_response
|
|
185
|
-
|
|
186
|
-
llm = LLM(**default_config) # type: ignore
|
|
187
|
-
|
|
188
|
-
# Test completion
|
|
189
|
-
messages = [{"role": "user", "content": "Hello"}]
|
|
190
|
-
response = llm.completion(messages=messages)
|
|
191
|
-
|
|
192
|
-
assert response == mock_response
|
|
193
|
-
mock_completion.assert_called_once()
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
@patch("openhands.sdk.llm.llm.litellm_completion")
|
|
197
|
-
def test_llm_retry_on_rate_limit(mock_completion, default_config):
|
|
198
|
-
"""Test that LLM retries on rate limit errors."""
|
|
199
|
-
mock_response = create_mock_response("Success after retry")
|
|
200
|
-
|
|
201
|
-
mock_completion.side_effect = [
|
|
202
|
-
RateLimitError(
|
|
203
|
-
message="Rate limit exceeded",
|
|
204
|
-
llm_provider="test_provider",
|
|
205
|
-
model="test_model",
|
|
206
|
-
),
|
|
207
|
-
mock_response,
|
|
208
|
-
]
|
|
209
|
-
|
|
210
|
-
llm = LLM(**default_config) # type: ignore
|
|
211
|
-
|
|
212
|
-
# Test completion with retry
|
|
213
|
-
messages = [{"role": "user", "content": "Hello"}]
|
|
214
|
-
response = llm.completion(messages=messages)
|
|
215
|
-
|
|
216
|
-
assert response == mock_response
|
|
217
|
-
assert mock_completion.call_count == 2 # First call failed, second succeeded
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def test_llm_cost_calculation(default_config):
|
|
221
|
-
"""Test LLM cost calculation and metrics tracking."""
|
|
222
|
-
llm = LLM(**default_config) # type: ignore
|
|
223
|
-
|
|
224
|
-
# Test cost addition
|
|
225
|
-
assert llm.metrics is not None
|
|
226
|
-
initial_cost = llm.metrics.accumulated_cost
|
|
227
|
-
llm.metrics.add_cost(1.5)
|
|
228
|
-
assert llm.metrics.accumulated_cost == initial_cost + 1.5
|
|
229
|
-
|
|
230
|
-
# Test cost validation
|
|
231
|
-
with pytest.raises(ValueError, match="Added cost cannot be negative"):
|
|
232
|
-
llm.metrics.add_cost(-1.0)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def test_llm_token_counting(default_config):
|
|
236
|
-
"""Test LLM token counting functionality."""
|
|
237
|
-
llm = LLM(**default_config) # type: ignore
|
|
238
|
-
|
|
239
|
-
# Test with dict messages
|
|
240
|
-
messages = [
|
|
241
|
-
{"role": "user", "content": "Hello"},
|
|
242
|
-
{"role": "assistant", "content": "Hi there!"},
|
|
243
|
-
]
|
|
244
|
-
|
|
245
|
-
# Token counting might return 0 if model not supported, but should not error
|
|
246
|
-
token_count = llm.get_token_count(messages)
|
|
247
|
-
assert isinstance(token_count, int)
|
|
248
|
-
assert token_count >= 0
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def test_llm_vision_support(default_config):
|
|
252
|
-
"""Test LLM vision support detection."""
|
|
253
|
-
llm = LLM(**default_config) # type: ignore
|
|
254
|
-
|
|
255
|
-
# Vision support detection should work without errors
|
|
256
|
-
vision_active = llm.vision_is_active()
|
|
257
|
-
assert isinstance(vision_active, bool)
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
def test_llm_function_calling_support(default_config):
|
|
261
|
-
"""Test LLM function calling support detection."""
|
|
262
|
-
llm = LLM(**default_config) # type: ignore
|
|
263
|
-
|
|
264
|
-
# Function calling support detection should work without errors
|
|
265
|
-
function_calling_active = llm.is_function_calling_active()
|
|
266
|
-
assert isinstance(function_calling_active, bool)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def test_llm_caching_support(default_config):
|
|
270
|
-
"""Test LLM prompt caching support detection."""
|
|
271
|
-
llm = LLM(**default_config) # type: ignore
|
|
272
|
-
|
|
273
|
-
# Caching support detection should work without errors
|
|
274
|
-
caching_active = llm.is_caching_prompt_active()
|
|
275
|
-
assert isinstance(caching_active, bool)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def test_llm_string_representation(default_config):
|
|
279
|
-
"""Test LLM string representation."""
|
|
280
|
-
llm = LLM(**default_config) # type: ignore
|
|
281
|
-
|
|
282
|
-
str_repr = str(llm)
|
|
283
|
-
assert "LLM(" in str_repr
|
|
284
|
-
assert "gpt-4o" in str_repr
|
|
285
|
-
|
|
286
|
-
repr_str = repr(llm)
|
|
287
|
-
assert repr_str == str_repr
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def test_llm_openhands_provider_rewrite():
|
|
291
|
-
"""Test OpenHands provider rewriting."""
|
|
292
|
-
llm = LLM(model="openhands/gpt-4o")
|
|
293
|
-
|
|
294
|
-
# Model should be rewritten to litellm_proxy format
|
|
295
|
-
assert llm.model == "litellm_proxy/gpt-4o"
|
|
296
|
-
assert llm.base_url == "https://llm-proxy.app.all-hands.dev/"
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def test_llm_message_formatting(default_config):
|
|
300
|
-
"""Test LLM message formatting for different message types."""
|
|
301
|
-
llm = LLM(**default_config) # type: ignore
|
|
302
|
-
|
|
303
|
-
# Test with single Message object
|
|
304
|
-
message = Message(role="user", content=[TextContent(text="Hello")])
|
|
305
|
-
formatted = llm.format_messages_for_llm([message])
|
|
306
|
-
assert isinstance(formatted, list)
|
|
307
|
-
assert len(formatted) == 1
|
|
308
|
-
assert isinstance(formatted[0], dict)
|
|
309
|
-
|
|
310
|
-
# Test with list of Message objects
|
|
311
|
-
messages = [
|
|
312
|
-
Message(role="user", content=[TextContent(text="Hello")]),
|
|
313
|
-
Message(role="assistant", content=[TextContent(text="Hi there!")]),
|
|
314
|
-
]
|
|
315
|
-
formatted = llm.format_messages_for_llm(messages)
|
|
316
|
-
assert isinstance(formatted, list)
|
|
317
|
-
assert len(formatted) == 2
|
|
318
|
-
assert all(isinstance(msg, dict) for msg in formatted)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
def test_metrics_copy():
|
|
322
|
-
"""Test that metrics can be copied correctly."""
|
|
323
|
-
original = Metrics(model_name="test-model")
|
|
324
|
-
original.add_cost(1.0)
|
|
325
|
-
original.add_token_usage(10, 5, 2, 1, 1000, "test-response")
|
|
326
|
-
original.add_response_latency(0.5, "test-response")
|
|
327
|
-
|
|
328
|
-
# Create a copy
|
|
329
|
-
copied = original.copy()
|
|
330
|
-
|
|
331
|
-
# Verify copy has same data
|
|
332
|
-
original_data = original.get()
|
|
333
|
-
copied_data = copied.get()
|
|
334
|
-
|
|
335
|
-
assert original_data["accumulated_cost"] == copied_data["accumulated_cost"]
|
|
336
|
-
assert len(original_data["costs"]) == len(copied_data["costs"])
|
|
337
|
-
assert len(original_data["token_usages"]) == len(copied_data["token_usages"])
|
|
338
|
-
assert len(original_data["response_latencies"]) == len(
|
|
339
|
-
copied_data["response_latencies"]
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
# Verify they are independent (modifying one doesn't affect the other)
|
|
343
|
-
copied.add_cost(2.0)
|
|
344
|
-
assert original.accumulated_cost != copied.accumulated_cost
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
def test_metrics_log():
|
|
348
|
-
"""Test metrics logging functionality."""
|
|
349
|
-
metrics = Metrics(model_name="test-model")
|
|
350
|
-
metrics.add_cost(1.5)
|
|
351
|
-
metrics.add_token_usage(10, 5, 2, 1, 1000, "test-response")
|
|
352
|
-
|
|
353
|
-
log_output = metrics.log()
|
|
354
|
-
assert isinstance(log_output, str)
|
|
355
|
-
assert "accumulated_cost" in log_output
|
|
356
|
-
assert "1.5" in log_output
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
def test_llm_config_validation():
|
|
360
|
-
"""Test LLM configuration validation."""
|
|
361
|
-
# Test with minimal valid config
|
|
362
|
-
llm = LLM(model="gpt-4o")
|
|
363
|
-
assert llm.model == "gpt-4o"
|
|
364
|
-
|
|
365
|
-
# Test with full config
|
|
366
|
-
full_llm = LLM(
|
|
367
|
-
model="gpt-4o",
|
|
368
|
-
api_key=SecretStr("test_key"),
|
|
369
|
-
base_url="https://api.openai.com/v1",
|
|
370
|
-
temperature=0.7,
|
|
371
|
-
max_output_tokens=1000,
|
|
372
|
-
num_retries=3,
|
|
373
|
-
retry_min_wait=1,
|
|
374
|
-
retry_max_wait=10,
|
|
375
|
-
)
|
|
376
|
-
assert full_llm.temperature == 0.7
|
|
377
|
-
assert full_llm.max_output_tokens == 1000
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
@patch("openhands.sdk.llm.llm.litellm_completion")
|
|
381
|
-
def test_llm_no_response_error(mock_completion):
|
|
382
|
-
"""Test handling of LLMNoResponseError."""
|
|
383
|
-
# Mock empty response
|
|
384
|
-
mock_response = MagicMock()
|
|
385
|
-
mock_response.choices = []
|
|
386
|
-
mock_response.get.return_value = None
|
|
387
|
-
mock_response.__getitem__.side_effect = lambda key: {
|
|
388
|
-
"choices": [],
|
|
389
|
-
"usage": None,
|
|
390
|
-
"id": None,
|
|
391
|
-
}[key]
|
|
392
|
-
mock_completion.return_value = mock_response
|
|
393
|
-
|
|
394
|
-
llm = LLM(**default_config) # type: ignore
|
|
395
|
-
|
|
396
|
-
# Test that empty response raises LLMNoResponseError
|
|
397
|
-
messages = [{"role": "user", "content": "Hello"}]
|
|
398
|
-
with pytest.raises(LLMNoResponseError):
|
|
399
|
-
llm.completion(messages=messages)
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
def test_response_latency_tracking(default_config):
|
|
403
|
-
"""Test response latency tracking in metrics."""
|
|
404
|
-
metrics = Metrics(model_name="test-model")
|
|
405
|
-
|
|
406
|
-
# Add some latencies
|
|
407
|
-
metrics.add_response_latency(0.5, "response-1")
|
|
408
|
-
metrics.add_response_latency(1.2, "response-2")
|
|
409
|
-
metrics.add_response_latency(0.8, "response-3")
|
|
410
|
-
|
|
411
|
-
latencies = metrics.response_latencies
|
|
412
|
-
assert len(latencies) == 3
|
|
413
|
-
assert latencies[0].latency == 0.5
|
|
414
|
-
assert latencies[1].latency == 1.2
|
|
415
|
-
assert latencies[2].latency == 0.8
|
|
416
|
-
|
|
417
|
-
# Test negative latency is converted to 0
|
|
418
|
-
metrics.add_response_latency(-0.1, "response-4")
|
|
419
|
-
assert metrics.response_latencies[-1].latency == 0.0
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
def test_token_usage_context_window():
|
|
423
|
-
"""Test token usage with context window tracking."""
|
|
424
|
-
usage = TokenUsage(
|
|
425
|
-
model="test-model",
|
|
426
|
-
prompt_tokens=100,
|
|
427
|
-
completion_tokens=50,
|
|
428
|
-
context_window=4096,
|
|
429
|
-
response_id="test-response",
|
|
430
|
-
)
|
|
431
|
-
|
|
432
|
-
assert usage.context_window == 4096
|
|
433
|
-
assert usage.per_turn_token == 0 # Default value
|
|
434
|
-
|
|
435
|
-
# Test addition preserves max context window
|
|
436
|
-
usage2 = TokenUsage(
|
|
437
|
-
model="test-model",
|
|
438
|
-
prompt_tokens=200,
|
|
439
|
-
completion_tokens=75,
|
|
440
|
-
context_window=8192,
|
|
441
|
-
response_id="test-response-2",
|
|
442
|
-
)
|
|
443
|
-
|
|
444
|
-
combined = usage + usage2
|
|
445
|
-
assert combined.context_window == 8192 # Should take the max
|
|
446
|
-
assert combined.prompt_tokens == 300
|
|
447
|
-
assert combined.completion_tokens == 125
|