openhands 0.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openhands might be problematic. Click here for more details.

Files changed (124) hide show
  1. openhands-1.0.1.dist-info/METADATA +52 -0
  2. openhands-1.0.1.dist-info/RECORD +31 -0
  3. {openhands-0.0.0.dist-info → openhands-1.0.1.dist-info}/WHEEL +1 -2
  4. openhands-1.0.1.dist-info/entry_points.txt +2 -0
  5. openhands_cli/__init__.py +8 -0
  6. openhands_cli/agent_chat.py +186 -0
  7. openhands_cli/argparsers/main_parser.py +56 -0
  8. openhands_cli/argparsers/serve_parser.py +31 -0
  9. openhands_cli/gui_launcher.py +220 -0
  10. openhands_cli/listeners/__init__.py +4 -0
  11. openhands_cli/listeners/loading_listener.py +63 -0
  12. openhands_cli/listeners/pause_listener.py +83 -0
  13. openhands_cli/llm_utils.py +57 -0
  14. openhands_cli/locations.py +13 -0
  15. openhands_cli/pt_style.py +30 -0
  16. openhands_cli/runner.py +178 -0
  17. openhands_cli/setup.py +116 -0
  18. openhands_cli/simple_main.py +59 -0
  19. openhands_cli/tui/__init__.py +5 -0
  20. openhands_cli/tui/settings/mcp_screen.py +217 -0
  21. openhands_cli/tui/settings/settings_screen.py +202 -0
  22. openhands_cli/tui/settings/store.py +93 -0
  23. openhands_cli/tui/status.py +109 -0
  24. openhands_cli/tui/tui.py +100 -0
  25. openhands_cli/tui/utils.py +14 -0
  26. openhands_cli/user_actions/__init__.py +17 -0
  27. openhands_cli/user_actions/agent_action.py +95 -0
  28. openhands_cli/user_actions/exit_session.py +18 -0
  29. openhands_cli/user_actions/settings_action.py +171 -0
  30. openhands_cli/user_actions/types.py +18 -0
  31. openhands_cli/user_actions/utils.py +199 -0
  32. openhands/__init__.py +0 -1
  33. openhands/sdk/__init__.py +0 -45
  34. openhands/sdk/agent/__init__.py +0 -8
  35. openhands/sdk/agent/agent/__init__.py +0 -6
  36. openhands/sdk/agent/agent/agent.py +0 -349
  37. openhands/sdk/agent/base.py +0 -103
  38. openhands/sdk/context/__init__.py +0 -28
  39. openhands/sdk/context/agent_context.py +0 -153
  40. openhands/sdk/context/condenser/__init__.py +0 -5
  41. openhands/sdk/context/condenser/condenser.py +0 -73
  42. openhands/sdk/context/condenser/no_op_condenser.py +0 -13
  43. openhands/sdk/context/manager.py +0 -5
  44. openhands/sdk/context/microagents/__init__.py +0 -26
  45. openhands/sdk/context/microagents/exceptions.py +0 -11
  46. openhands/sdk/context/microagents/microagent.py +0 -345
  47. openhands/sdk/context/microagents/types.py +0 -70
  48. openhands/sdk/context/utils/__init__.py +0 -8
  49. openhands/sdk/context/utils/prompt.py +0 -52
  50. openhands/sdk/context/view.py +0 -116
  51. openhands/sdk/conversation/__init__.py +0 -12
  52. openhands/sdk/conversation/conversation.py +0 -207
  53. openhands/sdk/conversation/state.py +0 -50
  54. openhands/sdk/conversation/types.py +0 -6
  55. openhands/sdk/conversation/visualizer.py +0 -300
  56. openhands/sdk/event/__init__.py +0 -27
  57. openhands/sdk/event/base.py +0 -148
  58. openhands/sdk/event/condenser.py +0 -49
  59. openhands/sdk/event/llm_convertible.py +0 -265
  60. openhands/sdk/event/types.py +0 -5
  61. openhands/sdk/event/user_action.py +0 -12
  62. openhands/sdk/event/utils.py +0 -30
  63. openhands/sdk/llm/__init__.py +0 -19
  64. openhands/sdk/llm/exceptions.py +0 -108
  65. openhands/sdk/llm/llm.py +0 -867
  66. openhands/sdk/llm/llm_registry.py +0 -116
  67. openhands/sdk/llm/message.py +0 -216
  68. openhands/sdk/llm/metadata.py +0 -34
  69. openhands/sdk/llm/utils/fn_call_converter.py +0 -1049
  70. openhands/sdk/llm/utils/metrics.py +0 -311
  71. openhands/sdk/llm/utils/model_features.py +0 -153
  72. openhands/sdk/llm/utils/retry_mixin.py +0 -122
  73. openhands/sdk/llm/utils/telemetry.py +0 -252
  74. openhands/sdk/logger.py +0 -167
  75. openhands/sdk/mcp/__init__.py +0 -20
  76. openhands/sdk/mcp/client.py +0 -113
  77. openhands/sdk/mcp/definition.py +0 -69
  78. openhands/sdk/mcp/tool.py +0 -104
  79. openhands/sdk/mcp/utils.py +0 -59
  80. openhands/sdk/tests/llm/test_llm.py +0 -447
  81. openhands/sdk/tests/llm/test_llm_fncall_converter.py +0 -691
  82. openhands/sdk/tests/llm/test_model_features.py +0 -221
  83. openhands/sdk/tool/__init__.py +0 -30
  84. openhands/sdk/tool/builtins/__init__.py +0 -34
  85. openhands/sdk/tool/builtins/finish.py +0 -57
  86. openhands/sdk/tool/builtins/think.py +0 -60
  87. openhands/sdk/tool/schema.py +0 -236
  88. openhands/sdk/tool/security_prompt.py +0 -5
  89. openhands/sdk/tool/tool.py +0 -142
  90. openhands/sdk/utils/__init__.py +0 -14
  91. openhands/sdk/utils/discriminated_union.py +0 -210
  92. openhands/sdk/utils/json.py +0 -48
  93. openhands/sdk/utils/truncate.py +0 -44
  94. openhands/tools/__init__.py +0 -44
  95. openhands/tools/execute_bash/__init__.py +0 -30
  96. openhands/tools/execute_bash/constants.py +0 -31
  97. openhands/tools/execute_bash/definition.py +0 -166
  98. openhands/tools/execute_bash/impl.py +0 -38
  99. openhands/tools/execute_bash/metadata.py +0 -101
  100. openhands/tools/execute_bash/terminal/__init__.py +0 -22
  101. openhands/tools/execute_bash/terminal/factory.py +0 -113
  102. openhands/tools/execute_bash/terminal/interface.py +0 -189
  103. openhands/tools/execute_bash/terminal/subprocess_terminal.py +0 -412
  104. openhands/tools/execute_bash/terminal/terminal_session.py +0 -492
  105. openhands/tools/execute_bash/terminal/tmux_terminal.py +0 -160
  106. openhands/tools/execute_bash/utils/command.py +0 -150
  107. openhands/tools/str_replace_editor/__init__.py +0 -17
  108. openhands/tools/str_replace_editor/definition.py +0 -158
  109. openhands/tools/str_replace_editor/editor.py +0 -683
  110. openhands/tools/str_replace_editor/exceptions.py +0 -41
  111. openhands/tools/str_replace_editor/impl.py +0 -66
  112. openhands/tools/str_replace_editor/utils/__init__.py +0 -0
  113. openhands/tools/str_replace_editor/utils/config.py +0 -2
  114. openhands/tools/str_replace_editor/utils/constants.py +0 -9
  115. openhands/tools/str_replace_editor/utils/encoding.py +0 -135
  116. openhands/tools/str_replace_editor/utils/file_cache.py +0 -154
  117. openhands/tools/str_replace_editor/utils/history.py +0 -122
  118. openhands/tools/str_replace_editor/utils/shell.py +0 -72
  119. openhands/tools/task_tracker/__init__.py +0 -16
  120. openhands/tools/task_tracker/definition.py +0 -336
  121. openhands/tools/utils/__init__.py +0 -1
  122. openhands-0.0.0.dist-info/METADATA +0 -3
  123. openhands-0.0.0.dist-info/RECORD +0 -94
  124. openhands-0.0.0.dist-info/top_level.txt +0 -1
openhands/sdk/mcp/tool.py DELETED
@@ -1,104 +0,0 @@
1
- """Utility functions for MCP integration."""
2
-
3
- import re
4
- from typing import TYPE_CHECKING
5
-
6
- import mcp.types
7
- from pydantic import ValidationError
8
-
9
- from openhands.sdk.llm import TextContent
10
- from openhands.sdk.logger import get_logger
11
- from openhands.sdk.mcp import MCPToolObservation
12
- from openhands.sdk.tool import MCPActionBase, Tool, ToolAnnotations, ToolExecutor
13
-
14
-
15
- if TYPE_CHECKING:
16
- from openhands.sdk.mcp.client import MCPClient
17
-
18
- logger = get_logger(__name__)
19
-
20
-
21
- # NOTE: We don't define MCPToolAction because it
22
- # will be a pydantic BaseModel dynamically created from the MCP tool schema.
23
- # It will be available as "tool.action_type".
24
-
25
-
26
- def to_camel_case(s: str) -> str:
27
- parts = re.split(r"[_\-\s]+", s)
28
- return "".join(word.capitalize() for word in parts if word)
29
-
30
-
31
- class MCPToolExecutor(ToolExecutor):
32
- """Executor for MCP tools."""
33
-
34
- def __init__(self, tool_name: str, client: "MCPClient"):
35
- self.tool_name = tool_name
36
- self.client = client
37
-
38
- async def call_tool(self, action: MCPActionBase) -> MCPToolObservation:
39
- async with self.client:
40
- assert self.client.is_connected(), "MCP client is not connected."
41
- try:
42
- logger.debug(
43
- f"Calling MCP tool {self.tool_name} "
44
- f"with args: {action.model_dump()}"
45
- )
46
- result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
47
- name=self.tool_name, arguments=action.to_mcp_arguments()
48
- )
49
- return MCPToolObservation.from_call_tool_result(
50
- tool_name=self.tool_name, result=result
51
- )
52
- except Exception as e:
53
- error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
54
- logger.error(error_msg, exc_info=True)
55
- return MCPToolObservation(
56
- content=[TextContent(text=error_msg)],
57
- is_error=True,
58
- tool_name=self.tool_name,
59
- )
60
-
61
- def __call__(self, action: MCPActionBase) -> MCPToolObservation:
62
- """Execute an MCP tool call."""
63
- return self.client.call_async_from_sync(
64
- self.call_tool, action=action, timeout=300
65
- )
66
-
67
-
68
- class MCPTool(Tool[MCPActionBase, MCPToolObservation]):
69
- """MCP Tool that wraps an MCP client and provides tool functionality."""
70
-
71
- def __init__(
72
- self,
73
- mcp_tool: mcp.types.Tool,
74
- mcp_client: "MCPClient",
75
- ):
76
- self.mcp_client = mcp_client
77
- self.mcp_tool = mcp_tool
78
-
79
- try:
80
- if mcp_tool.annotations:
81
- anno_dict = mcp_tool.annotations.model_dump(exclude_none=True)
82
- annotations = ToolAnnotations.model_validate(anno_dict)
83
- else:
84
- annotations = None
85
-
86
- MCPActionType = MCPActionBase.from_mcp_schema(
87
- f"{to_camel_case(mcp_tool.name)}Action", mcp_tool.inputSchema
88
- )
89
- super().__init__(
90
- name=mcp_tool.name,
91
- description=mcp_tool.description or "No description provided",
92
- input_schema=MCPActionType,
93
- output_schema=MCPToolObservation,
94
- annotations=annotations,
95
- _meta=mcp_tool.meta,
96
- executor=MCPToolExecutor(tool_name=mcp_tool.name, client=mcp_client),
97
- )
98
- except ValidationError as e:
99
- logger.error(
100
- f"Validation error creating MCPTool for {mcp_tool.name}: "
101
- f"{e.json(indent=2)}",
102
- exc_info=True,
103
- )
104
- raise e
@@ -1,59 +0,0 @@
1
- """Utility functions for MCP integration."""
2
-
3
- import logging
4
-
5
- import mcp.types
6
- from fastmcp.client.logging import LogMessage
7
- from fastmcp.mcp_config import MCPConfig
8
-
9
- from openhands.sdk.logger import get_logger
10
- from openhands.sdk.mcp import MCPClient, MCPTool
11
- from openhands.sdk.tool import Tool
12
-
13
-
14
- logger = get_logger(__name__)
15
- LOGGING_LEVEL_MAP = logging.getLevelNamesMapping()
16
-
17
-
18
- async def log_handler(message: LogMessage):
19
- """
20
- Handles incoming logs from the MCP server and forwards them
21
- to the standard Python logging system.
22
- """
23
- msg = message.data.get("msg")
24
- extra = message.data.get("extra")
25
-
26
- # Convert the MCP log level to a Python log level
27
- level = LOGGING_LEVEL_MAP.get(message.level.upper(), logging.INFO)
28
-
29
- # Log the message using the standard logging library
30
- logger.log(level, msg, extra=extra)
31
-
32
-
33
- async def _list_tools(client: MCPClient) -> list[Tool]:
34
- """List tools from an MCP client."""
35
- tools: list[Tool] = []
36
-
37
- async with client:
38
- assert client.is_connected(), "MCP client is not connected."
39
- mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
40
- tools = [MCPTool(mcp_tool=t, mcp_client=client) for t in mcp_type_tools]
41
- assert not client.is_connected(), (
42
- "MCP client should be disconnected after listing tools."
43
- )
44
- return tools
45
-
46
-
47
- def create_mcp_tools(
48
- config: dict | MCPConfig,
49
- timeout: float = 30.0,
50
- ) -> list[Tool]:
51
- """Create MCP tools from MCP configuration."""
52
- tools: list[Tool] = []
53
- if isinstance(config, dict):
54
- config = MCPConfig.model_validate(config)
55
- client = MCPClient(config, log_handler=log_handler)
56
- tools = client.call_async_from_sync(_list_tools, timeout=timeout, client=client)
57
-
58
- logger.info(f"Created {len(tools)} MCP tools: {[t.name for t in tools]}")
59
- return tools
@@ -1,447 +0,0 @@
1
- from typing import Any
2
- from unittest.mock import MagicMock, patch
3
-
4
- import pytest
5
- from litellm.exceptions import (
6
- RateLimitError,
7
- )
8
- from pydantic import SecretStr
9
-
10
- from openhands.sdk.llm import LLM, Message, TextContent
11
- from openhands.sdk.llm.exceptions import LLMNoResponseError
12
- from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage
13
-
14
-
15
- def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
16
- """Helper function to create properly structured mock responses."""
17
- mock_response = MagicMock()
18
- mock_response.choices = [MagicMock()]
19
- mock_response.choices[0].message.content = content
20
-
21
- # Create usage mock
22
- mock_usage = MagicMock()
23
- mock_usage.get.side_effect = lambda key, default=None: {
24
- "prompt_tokens": 10,
25
- "completion_tokens": 5,
26
- "model_extra": {},
27
- }.get(key, default)
28
- mock_usage.prompt_tokens_details = None
29
-
30
- # Response data mapping
31
- response_data = {
32
- "choices": mock_response.choices,
33
- "usage": mock_usage,
34
- "id": response_id,
35
- }
36
-
37
- # Mock both .get() and dict-like access (LLM code uses both patterns inconsistently)
38
- mock_response.get.side_effect = lambda key, default=None: response_data.get(
39
- key, default
40
- )
41
- mock_response.__getitem__ = lambda self, key: response_data[key]
42
-
43
- return mock_response
44
-
45
-
46
- @pytest.fixture(autouse=True)
47
- def mock_logger(monkeypatch):
48
- # suppress logging of completion data to file
49
- mock_logger = MagicMock()
50
- monkeypatch.setattr("openhands.sdk.llm.llm.logger", mock_logger)
51
- return mock_logger
52
-
53
-
54
- @pytest.fixture
55
- def default_config() -> dict[str, Any]:
56
- return {
57
- "model": "gpt-4o",
58
- "api_key": SecretStr("test_key"),
59
- "num_retries": 2,
60
- "retry_min_wait": 1,
61
- "retry_max_wait": 2,
62
- }
63
-
64
-
65
- def test_llm_init_with_default_config(default_config):
66
- llm = LLM(**default_config, service_id="test-service")
67
- assert llm.model == "gpt-4o"
68
- assert llm.api_key is not None and llm.api_key.get_secret_value() == "test_key"
69
- assert isinstance(llm.metrics, Metrics)
70
- assert llm.metrics.model_name == "gpt-4o"
71
-
72
-
73
- def test_token_usage_add():
74
- """Test that TokenUsage instances can be added together."""
75
- # Create two TokenUsage instances
76
- usage1 = TokenUsage(
77
- model="model1",
78
- prompt_tokens=10,
79
- completion_tokens=5,
80
- cache_read_tokens=3,
81
- cache_write_tokens=2,
82
- response_id="response-1",
83
- )
84
-
85
- usage2 = TokenUsage(
86
- model="model2",
87
- prompt_tokens=8,
88
- completion_tokens=6,
89
- cache_read_tokens=2,
90
- cache_write_tokens=4,
91
- response_id="response-2",
92
- )
93
-
94
- # Add them together
95
- combined = usage1 + usage2
96
-
97
- # Verify the result
98
- assert combined.model == "model1" # Should keep the model from the first instance
99
- assert combined.prompt_tokens == 18 # 10 + 8
100
- assert combined.completion_tokens == 11 # 5 + 6
101
- assert combined.cache_read_tokens == 5 # 3 + 2
102
- assert combined.cache_write_tokens == 6 # 2 + 4
103
- assert (
104
- combined.response_id == "response-1"
105
- ) # Should keep the response_id from the first instance
106
-
107
-
108
- def test_metrics_merge_accumulated_token_usage():
109
- """Test that accumulated token usage is properly merged between two Metrics
110
- instances."""
111
- # Create two Metrics instances
112
- metrics1 = Metrics(model_name="model1")
113
- metrics2 = Metrics(model_name="model2")
114
-
115
- # Add token usage to each
116
- metrics1.add_token_usage(10, 5, 3, 2, 1000, "response-1")
117
- metrics2.add_token_usage(8, 6, 2, 4, 1000, "response-2")
118
-
119
- # Verify initial accumulated token usage
120
- metrics1_data = metrics1.get()
121
- accumulated1 = metrics1_data["accumulated_token_usage"]
122
- assert accumulated1["prompt_tokens"] == 10
123
- assert accumulated1["completion_tokens"] == 5
124
- assert accumulated1["cache_read_tokens"] == 3
125
- assert accumulated1["cache_write_tokens"] == 2
126
-
127
- metrics2_data = metrics2.get()
128
- accumulated2 = metrics2_data["accumulated_token_usage"]
129
- assert accumulated2["prompt_tokens"] == 8
130
- assert accumulated2["completion_tokens"] == 6
131
- assert accumulated2["cache_read_tokens"] == 2
132
- assert accumulated2["cache_write_tokens"] == 4
133
-
134
- # Merge metrics2 into metrics1
135
- metrics1.merge(metrics2)
136
-
137
- # Verify merged accumulated token usage
138
- merged_data = metrics1.get()
139
- merged_accumulated = merged_data["accumulated_token_usage"]
140
- assert merged_accumulated["prompt_tokens"] == 18 # 10 + 8
141
- assert merged_accumulated["completion_tokens"] == 11 # 5 + 6
142
- assert merged_accumulated["cache_read_tokens"] == 5 # 3 + 2
143
- assert merged_accumulated["cache_write_tokens"] == 6 # 2 + 4
144
-
145
-
146
- def test_metrics_diff():
147
- """Test that metrics diff correctly calculates the difference between two
148
- metrics."""
149
- # Create baseline metrics
150
- baseline = Metrics(model_name="test-model")
151
- baseline.add_cost(1.0)
152
- baseline.add_token_usage(10, 5, 2, 1, 1000, "baseline-response")
153
- baseline.add_response_latency(0.5, "baseline-response")
154
-
155
- # Create current metrics with additional data
156
- current = Metrics(model_name="test-model")
157
- current.merge(baseline) # Start with baseline
158
- current.add_cost(2.0) # Add more cost
159
- current.add_token_usage(15, 8, 3, 2, 1000, "current-response") # Add more tokens
160
- current.add_response_latency(0.8, "current-response") # Add more latency
161
-
162
- # Calculate diff
163
- diff = current.diff(baseline)
164
-
165
- # Verify diff contains only the additional data
166
- diff_data = diff.get()
167
- assert diff_data["accumulated_cost"] == 2.0 # Only the additional cost
168
- assert len(diff_data["costs"]) == 1 # Only the additional cost entry
169
- assert len(diff_data["token_usages"]) == 1 # Only the additional token usage
170
- assert len(diff_data["response_latencies"]) == 1 # Only the additional latency
171
-
172
- # Verify accumulated token usage diff
173
- accumulated_diff = diff_data["accumulated_token_usage"]
174
- assert accumulated_diff["prompt_tokens"] == 15 # Only the additional tokens
175
- assert accumulated_diff["completion_tokens"] == 8
176
- assert accumulated_diff["cache_read_tokens"] == 3
177
- assert accumulated_diff["cache_write_tokens"] == 2
178
-
179
-
180
- @patch("openhands.sdk.llm.llm.litellm_completion")
181
- def test_llm_completion_with_mock(mock_completion, default_config):
182
- """Test LLM completion with mocked litellm."""
183
- mock_response = create_mock_response("Test response")
184
- mock_completion.return_value = mock_response
185
-
186
- llm = LLM(**default_config) # type: ignore
187
-
188
- # Test completion
189
- messages = [{"role": "user", "content": "Hello"}]
190
- response = llm.completion(messages=messages)
191
-
192
- assert response == mock_response
193
- mock_completion.assert_called_once()
194
-
195
-
196
- @patch("openhands.sdk.llm.llm.litellm_completion")
197
- def test_llm_retry_on_rate_limit(mock_completion, default_config):
198
- """Test that LLM retries on rate limit errors."""
199
- mock_response = create_mock_response("Success after retry")
200
-
201
- mock_completion.side_effect = [
202
- RateLimitError(
203
- message="Rate limit exceeded",
204
- llm_provider="test_provider",
205
- model="test_model",
206
- ),
207
- mock_response,
208
- ]
209
-
210
- llm = LLM(**default_config) # type: ignore
211
-
212
- # Test completion with retry
213
- messages = [{"role": "user", "content": "Hello"}]
214
- response = llm.completion(messages=messages)
215
-
216
- assert response == mock_response
217
- assert mock_completion.call_count == 2 # First call failed, second succeeded
218
-
219
-
220
- def test_llm_cost_calculation(default_config):
221
- """Test LLM cost calculation and metrics tracking."""
222
- llm = LLM(**default_config) # type: ignore
223
-
224
- # Test cost addition
225
- assert llm.metrics is not None
226
- initial_cost = llm.metrics.accumulated_cost
227
- llm.metrics.add_cost(1.5)
228
- assert llm.metrics.accumulated_cost == initial_cost + 1.5
229
-
230
- # Test cost validation
231
- with pytest.raises(ValueError, match="Added cost cannot be negative"):
232
- llm.metrics.add_cost(-1.0)
233
-
234
-
235
- def test_llm_token_counting(default_config):
236
- """Test LLM token counting functionality."""
237
- llm = LLM(**default_config) # type: ignore
238
-
239
- # Test with dict messages
240
- messages = [
241
- {"role": "user", "content": "Hello"},
242
- {"role": "assistant", "content": "Hi there!"},
243
- ]
244
-
245
- # Token counting might return 0 if model not supported, but should not error
246
- token_count = llm.get_token_count(messages)
247
- assert isinstance(token_count, int)
248
- assert token_count >= 0
249
-
250
-
251
- def test_llm_vision_support(default_config):
252
- """Test LLM vision support detection."""
253
- llm = LLM(**default_config) # type: ignore
254
-
255
- # Vision support detection should work without errors
256
- vision_active = llm.vision_is_active()
257
- assert isinstance(vision_active, bool)
258
-
259
-
260
- def test_llm_function_calling_support(default_config):
261
- """Test LLM function calling support detection."""
262
- llm = LLM(**default_config) # type: ignore
263
-
264
- # Function calling support detection should work without errors
265
- function_calling_active = llm.is_function_calling_active()
266
- assert isinstance(function_calling_active, bool)
267
-
268
-
269
- def test_llm_caching_support(default_config):
270
- """Test LLM prompt caching support detection."""
271
- llm = LLM(**default_config) # type: ignore
272
-
273
- # Caching support detection should work without errors
274
- caching_active = llm.is_caching_prompt_active()
275
- assert isinstance(caching_active, bool)
276
-
277
-
278
- def test_llm_string_representation(default_config):
279
- """Test LLM string representation."""
280
- llm = LLM(**default_config) # type: ignore
281
-
282
- str_repr = str(llm)
283
- assert "LLM(" in str_repr
284
- assert "gpt-4o" in str_repr
285
-
286
- repr_str = repr(llm)
287
- assert repr_str == str_repr
288
-
289
-
290
- def test_llm_openhands_provider_rewrite():
291
- """Test OpenHands provider rewriting."""
292
- llm = LLM(model="openhands/gpt-4o")
293
-
294
- # Model should be rewritten to litellm_proxy format
295
- assert llm.model == "litellm_proxy/gpt-4o"
296
- assert llm.base_url == "https://llm-proxy.app.all-hands.dev/"
297
-
298
-
299
- def test_llm_message_formatting(default_config):
300
- """Test LLM message formatting for different message types."""
301
- llm = LLM(**default_config) # type: ignore
302
-
303
- # Test with single Message object
304
- message = Message(role="user", content=[TextContent(text="Hello")])
305
- formatted = llm.format_messages_for_llm([message])
306
- assert isinstance(formatted, list)
307
- assert len(formatted) == 1
308
- assert isinstance(formatted[0], dict)
309
-
310
- # Test with list of Message objects
311
- messages = [
312
- Message(role="user", content=[TextContent(text="Hello")]),
313
- Message(role="assistant", content=[TextContent(text="Hi there!")]),
314
- ]
315
- formatted = llm.format_messages_for_llm(messages)
316
- assert isinstance(formatted, list)
317
- assert len(formatted) == 2
318
- assert all(isinstance(msg, dict) for msg in formatted)
319
-
320
-
321
- def test_metrics_copy():
322
- """Test that metrics can be copied correctly."""
323
- original = Metrics(model_name="test-model")
324
- original.add_cost(1.0)
325
- original.add_token_usage(10, 5, 2, 1, 1000, "test-response")
326
- original.add_response_latency(0.5, "test-response")
327
-
328
- # Create a copy
329
- copied = original.copy()
330
-
331
- # Verify copy has same data
332
- original_data = original.get()
333
- copied_data = copied.get()
334
-
335
- assert original_data["accumulated_cost"] == copied_data["accumulated_cost"]
336
- assert len(original_data["costs"]) == len(copied_data["costs"])
337
- assert len(original_data["token_usages"]) == len(copied_data["token_usages"])
338
- assert len(original_data["response_latencies"]) == len(
339
- copied_data["response_latencies"]
340
- )
341
-
342
- # Verify they are independent (modifying one doesn't affect the other)
343
- copied.add_cost(2.0)
344
- assert original.accumulated_cost != copied.accumulated_cost
345
-
346
-
347
- def test_metrics_log():
348
- """Test metrics logging functionality."""
349
- metrics = Metrics(model_name="test-model")
350
- metrics.add_cost(1.5)
351
- metrics.add_token_usage(10, 5, 2, 1, 1000, "test-response")
352
-
353
- log_output = metrics.log()
354
- assert isinstance(log_output, str)
355
- assert "accumulated_cost" in log_output
356
- assert "1.5" in log_output
357
-
358
-
359
- def test_llm_config_validation():
360
- """Test LLM configuration validation."""
361
- # Test with minimal valid config
362
- llm = LLM(model="gpt-4o")
363
- assert llm.model == "gpt-4o"
364
-
365
- # Test with full config
366
- full_llm = LLM(
367
- model="gpt-4o",
368
- api_key=SecretStr("test_key"),
369
- base_url="https://api.openai.com/v1",
370
- temperature=0.7,
371
- max_output_tokens=1000,
372
- num_retries=3,
373
- retry_min_wait=1,
374
- retry_max_wait=10,
375
- )
376
- assert full_llm.temperature == 0.7
377
- assert full_llm.max_output_tokens == 1000
378
-
379
-
380
- @patch("openhands.sdk.llm.llm.litellm_completion")
381
- def test_llm_no_response_error(mock_completion):
382
- """Test handling of LLMNoResponseError."""
383
- # Mock empty response
384
- mock_response = MagicMock()
385
- mock_response.choices = []
386
- mock_response.get.return_value = None
387
- mock_response.__getitem__.side_effect = lambda key: {
388
- "choices": [],
389
- "usage": None,
390
- "id": None,
391
- }[key]
392
- mock_completion.return_value = mock_response
393
-
394
- llm = LLM(**default_config) # type: ignore
395
-
396
- # Test that empty response raises LLMNoResponseError
397
- messages = [{"role": "user", "content": "Hello"}]
398
- with pytest.raises(LLMNoResponseError):
399
- llm.completion(messages=messages)
400
-
401
-
402
- def test_response_latency_tracking(default_config):
403
- """Test response latency tracking in metrics."""
404
- metrics = Metrics(model_name="test-model")
405
-
406
- # Add some latencies
407
- metrics.add_response_latency(0.5, "response-1")
408
- metrics.add_response_latency(1.2, "response-2")
409
- metrics.add_response_latency(0.8, "response-3")
410
-
411
- latencies = metrics.response_latencies
412
- assert len(latencies) == 3
413
- assert latencies[0].latency == 0.5
414
- assert latencies[1].latency == 1.2
415
- assert latencies[2].latency == 0.8
416
-
417
- # Test negative latency is converted to 0
418
- metrics.add_response_latency(-0.1, "response-4")
419
- assert metrics.response_latencies[-1].latency == 0.0
420
-
421
-
422
- def test_token_usage_context_window():
423
- """Test token usage with context window tracking."""
424
- usage = TokenUsage(
425
- model="test-model",
426
- prompt_tokens=100,
427
- completion_tokens=50,
428
- context_window=4096,
429
- response_id="test-response",
430
- )
431
-
432
- assert usage.context_window == 4096
433
- assert usage.per_turn_token == 0 # Default value
434
-
435
- # Test addition preserves max context window
436
- usage2 = TokenUsage(
437
- model="test-model",
438
- prompt_tokens=200,
439
- completion_tokens=75,
440
- context_window=8192,
441
- response_id="test-response-2",
442
- )
443
-
444
- combined = usage + usage2
445
- assert combined.context_window == 8192 # Should take the max
446
- assert combined.prompt_tokens == 300
447
- assert combined.completion_tokens == 125