openhands 0.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openhands might be problematic. Click here for more details.
- openhands-1.0.1.dist-info/METADATA +52 -0
- openhands-1.0.1.dist-info/RECORD +31 -0
- {openhands-0.0.0.dist-info → openhands-1.0.1.dist-info}/WHEEL +1 -2
- openhands-1.0.1.dist-info/entry_points.txt +2 -0
- openhands_cli/__init__.py +8 -0
- openhands_cli/agent_chat.py +186 -0
- openhands_cli/argparsers/main_parser.py +56 -0
- openhands_cli/argparsers/serve_parser.py +31 -0
- openhands_cli/gui_launcher.py +220 -0
- openhands_cli/listeners/__init__.py +4 -0
- openhands_cli/listeners/loading_listener.py +63 -0
- openhands_cli/listeners/pause_listener.py +83 -0
- openhands_cli/llm_utils.py +57 -0
- openhands_cli/locations.py +13 -0
- openhands_cli/pt_style.py +30 -0
- openhands_cli/runner.py +178 -0
- openhands_cli/setup.py +116 -0
- openhands_cli/simple_main.py +59 -0
- openhands_cli/tui/__init__.py +5 -0
- openhands_cli/tui/settings/mcp_screen.py +217 -0
- openhands_cli/tui/settings/settings_screen.py +202 -0
- openhands_cli/tui/settings/store.py +93 -0
- openhands_cli/tui/status.py +109 -0
- openhands_cli/tui/tui.py +100 -0
- openhands_cli/tui/utils.py +14 -0
- openhands_cli/user_actions/__init__.py +17 -0
- openhands_cli/user_actions/agent_action.py +95 -0
- openhands_cli/user_actions/exit_session.py +18 -0
- openhands_cli/user_actions/settings_action.py +171 -0
- openhands_cli/user_actions/types.py +18 -0
- openhands_cli/user_actions/utils.py +199 -0
- openhands/__init__.py +0 -1
- openhands/sdk/__init__.py +0 -45
- openhands/sdk/agent/__init__.py +0 -8
- openhands/sdk/agent/agent/__init__.py +0 -6
- openhands/sdk/agent/agent/agent.py +0 -349
- openhands/sdk/agent/base.py +0 -103
- openhands/sdk/context/__init__.py +0 -28
- openhands/sdk/context/agent_context.py +0 -153
- openhands/sdk/context/condenser/__init__.py +0 -5
- openhands/sdk/context/condenser/condenser.py +0 -73
- openhands/sdk/context/condenser/no_op_condenser.py +0 -13
- openhands/sdk/context/manager.py +0 -5
- openhands/sdk/context/microagents/__init__.py +0 -26
- openhands/sdk/context/microagents/exceptions.py +0 -11
- openhands/sdk/context/microagents/microagent.py +0 -345
- openhands/sdk/context/microagents/types.py +0 -70
- openhands/sdk/context/utils/__init__.py +0 -8
- openhands/sdk/context/utils/prompt.py +0 -52
- openhands/sdk/context/view.py +0 -116
- openhands/sdk/conversation/__init__.py +0 -12
- openhands/sdk/conversation/conversation.py +0 -207
- openhands/sdk/conversation/state.py +0 -50
- openhands/sdk/conversation/types.py +0 -6
- openhands/sdk/conversation/visualizer.py +0 -300
- openhands/sdk/event/__init__.py +0 -27
- openhands/sdk/event/base.py +0 -148
- openhands/sdk/event/condenser.py +0 -49
- openhands/sdk/event/llm_convertible.py +0 -265
- openhands/sdk/event/types.py +0 -5
- openhands/sdk/event/user_action.py +0 -12
- openhands/sdk/event/utils.py +0 -30
- openhands/sdk/llm/__init__.py +0 -19
- openhands/sdk/llm/exceptions.py +0 -108
- openhands/sdk/llm/llm.py +0 -867
- openhands/sdk/llm/llm_registry.py +0 -116
- openhands/sdk/llm/message.py +0 -216
- openhands/sdk/llm/metadata.py +0 -34
- openhands/sdk/llm/utils/fn_call_converter.py +0 -1049
- openhands/sdk/llm/utils/metrics.py +0 -311
- openhands/sdk/llm/utils/model_features.py +0 -153
- openhands/sdk/llm/utils/retry_mixin.py +0 -122
- openhands/sdk/llm/utils/telemetry.py +0 -252
- openhands/sdk/logger.py +0 -167
- openhands/sdk/mcp/__init__.py +0 -20
- openhands/sdk/mcp/client.py +0 -113
- openhands/sdk/mcp/definition.py +0 -69
- openhands/sdk/mcp/tool.py +0 -104
- openhands/sdk/mcp/utils.py +0 -59
- openhands/sdk/tests/llm/test_llm.py +0 -447
- openhands/sdk/tests/llm/test_llm_fncall_converter.py +0 -691
- openhands/sdk/tests/llm/test_model_features.py +0 -221
- openhands/sdk/tool/__init__.py +0 -30
- openhands/sdk/tool/builtins/__init__.py +0 -34
- openhands/sdk/tool/builtins/finish.py +0 -57
- openhands/sdk/tool/builtins/think.py +0 -60
- openhands/sdk/tool/schema.py +0 -236
- openhands/sdk/tool/security_prompt.py +0 -5
- openhands/sdk/tool/tool.py +0 -142
- openhands/sdk/utils/__init__.py +0 -14
- openhands/sdk/utils/discriminated_union.py +0 -210
- openhands/sdk/utils/json.py +0 -48
- openhands/sdk/utils/truncate.py +0 -44
- openhands/tools/__init__.py +0 -44
- openhands/tools/execute_bash/__init__.py +0 -30
- openhands/tools/execute_bash/constants.py +0 -31
- openhands/tools/execute_bash/definition.py +0 -166
- openhands/tools/execute_bash/impl.py +0 -38
- openhands/tools/execute_bash/metadata.py +0 -101
- openhands/tools/execute_bash/terminal/__init__.py +0 -22
- openhands/tools/execute_bash/terminal/factory.py +0 -113
- openhands/tools/execute_bash/terminal/interface.py +0 -189
- openhands/tools/execute_bash/terminal/subprocess_terminal.py +0 -412
- openhands/tools/execute_bash/terminal/terminal_session.py +0 -492
- openhands/tools/execute_bash/terminal/tmux_terminal.py +0 -160
- openhands/tools/execute_bash/utils/command.py +0 -150
- openhands/tools/str_replace_editor/__init__.py +0 -17
- openhands/tools/str_replace_editor/definition.py +0 -158
- openhands/tools/str_replace_editor/editor.py +0 -683
- openhands/tools/str_replace_editor/exceptions.py +0 -41
- openhands/tools/str_replace_editor/impl.py +0 -66
- openhands/tools/str_replace_editor/utils/__init__.py +0 -0
- openhands/tools/str_replace_editor/utils/config.py +0 -2
- openhands/tools/str_replace_editor/utils/constants.py +0 -9
- openhands/tools/str_replace_editor/utils/encoding.py +0 -135
- openhands/tools/str_replace_editor/utils/file_cache.py +0 -154
- openhands/tools/str_replace_editor/utils/history.py +0 -122
- openhands/tools/str_replace_editor/utils/shell.py +0 -72
- openhands/tools/task_tracker/__init__.py +0 -16
- openhands/tools/task_tracker/definition.py +0 -336
- openhands/tools/utils/__init__.py +0 -1
- openhands-0.0.0.dist-info/METADATA +0 -3
- openhands-0.0.0.dist-info/RECORD +0 -94
- openhands-0.0.0.dist-info/top_level.txt +0 -1
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import time
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class Cost(BaseModel):
|
|
9
|
-
model: str
|
|
10
|
-
cost: float = Field(ge=0.0, description="Cost must be non-negative")
|
|
11
|
-
timestamp: float = Field(default_factory=time.time)
|
|
12
|
-
|
|
13
|
-
@field_validator("cost")
|
|
14
|
-
@classmethod
|
|
15
|
-
def validate_cost(cls, v: float) -> float:
|
|
16
|
-
if v < 0:
|
|
17
|
-
raise ValueError("Cost cannot be negative")
|
|
18
|
-
return v
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ResponseLatency(BaseModel):
|
|
22
|
-
"""Metric tracking the round-trip time per completion call."""
|
|
23
|
-
|
|
24
|
-
model: str
|
|
25
|
-
latency: float = Field(ge=0.0, description="Latency must be non-negative")
|
|
26
|
-
response_id: str
|
|
27
|
-
|
|
28
|
-
@field_validator("latency")
|
|
29
|
-
@classmethod
|
|
30
|
-
def validate_latency(cls, v: float) -> float:
|
|
31
|
-
return max(0.0, v)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class TokenUsage(BaseModel):
|
|
35
|
-
"""Metric tracking detailed token usage per completion call."""
|
|
36
|
-
|
|
37
|
-
model: str = Field(default="")
|
|
38
|
-
prompt_tokens: int = Field(
|
|
39
|
-
default=0, ge=0, description="Prompt tokens must be non-negative"
|
|
40
|
-
)
|
|
41
|
-
completion_tokens: int = Field(
|
|
42
|
-
default=0, ge=0, description="Completion tokens must be non-negative"
|
|
43
|
-
)
|
|
44
|
-
cache_read_tokens: int = Field(
|
|
45
|
-
default=0, ge=0, description="Cache read tokens must be non-negative"
|
|
46
|
-
)
|
|
47
|
-
cache_write_tokens: int = Field(
|
|
48
|
-
default=0, ge=0, description="Cache write tokens must be non-negative"
|
|
49
|
-
)
|
|
50
|
-
reasoning_tokens: int = Field(
|
|
51
|
-
default=0, ge=0, description="Reasoning tokens must be non-negative"
|
|
52
|
-
)
|
|
53
|
-
context_window: int = Field(
|
|
54
|
-
default=0, ge=0, description="Context window must be non-negative"
|
|
55
|
-
)
|
|
56
|
-
per_turn_token: int = Field(
|
|
57
|
-
default=0, ge=0, description="Per turn tokens must be non-negative"
|
|
58
|
-
)
|
|
59
|
-
response_id: str = Field(default="")
|
|
60
|
-
|
|
61
|
-
def __add__(self, other: "TokenUsage") -> "TokenUsage":
|
|
62
|
-
"""Add two TokenUsage instances together."""
|
|
63
|
-
return TokenUsage(
|
|
64
|
-
model=self.model,
|
|
65
|
-
prompt_tokens=self.prompt_tokens + other.prompt_tokens,
|
|
66
|
-
completion_tokens=self.completion_tokens + other.completion_tokens,
|
|
67
|
-
cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
|
|
68
|
-
cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens,
|
|
69
|
-
reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens,
|
|
70
|
-
context_window=max(self.context_window, other.context_window),
|
|
71
|
-
per_turn_token=other.per_turn_token,
|
|
72
|
-
response_id=self.response_id,
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class MetricsSnapshot(BaseModel):
|
|
77
|
-
"""A snapshot of metrics at a point in time.
|
|
78
|
-
|
|
79
|
-
Does not include lists of individual costs, latencies, or token usages.
|
|
80
|
-
"""
|
|
81
|
-
|
|
82
|
-
model_name: str = Field(default="default", description="Name of the model")
|
|
83
|
-
accumulated_cost: float = Field(
|
|
84
|
-
default=0.0, ge=0.0, description="Total accumulated cost, must be non-negative"
|
|
85
|
-
)
|
|
86
|
-
max_budget_per_task: Optional[float] = Field(
|
|
87
|
-
default=None, description="Maximum budget per task"
|
|
88
|
-
)
|
|
89
|
-
accumulated_token_usage: Optional[TokenUsage] = Field(
|
|
90
|
-
default=None, description="Accumulated token usage across all calls"
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class Metrics(MetricsSnapshot):
|
|
95
|
-
"""Metrics class can record various metrics during running and evaluation.
|
|
96
|
-
We track:
|
|
97
|
-
- accumulated_cost and costs
|
|
98
|
-
- max_budget_per_task (budget limit)
|
|
99
|
-
- A list of ResponseLatency
|
|
100
|
-
- A list of TokenUsage (one per call).
|
|
101
|
-
"""
|
|
102
|
-
|
|
103
|
-
costs: list[Cost] = Field(
|
|
104
|
-
default_factory=list, description="List of individual costs"
|
|
105
|
-
)
|
|
106
|
-
response_latencies: list[ResponseLatency] = Field(
|
|
107
|
-
default_factory=list, description="List of response latencies"
|
|
108
|
-
)
|
|
109
|
-
token_usages: list[TokenUsage] = Field(
|
|
110
|
-
default_factory=list, description="List of token usage records"
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
@field_validator("accumulated_cost")
|
|
114
|
-
@classmethod
|
|
115
|
-
def validate_accumulated_cost(cls, v: float) -> float:
|
|
116
|
-
if v < 0:
|
|
117
|
-
raise ValueError("Total cost cannot be negative.")
|
|
118
|
-
return v
|
|
119
|
-
|
|
120
|
-
@model_validator(mode="after")
|
|
121
|
-
def initialize_accumulated_token_usage(self) -> "Metrics":
|
|
122
|
-
if self.accumulated_token_usage is None:
|
|
123
|
-
self.accumulated_token_usage = TokenUsage(
|
|
124
|
-
model=self.model_name,
|
|
125
|
-
prompt_tokens=0,
|
|
126
|
-
completion_tokens=0,
|
|
127
|
-
cache_read_tokens=0,
|
|
128
|
-
cache_write_tokens=0,
|
|
129
|
-
reasoning_tokens=0,
|
|
130
|
-
context_window=0,
|
|
131
|
-
response_id="",
|
|
132
|
-
)
|
|
133
|
-
return self
|
|
134
|
-
|
|
135
|
-
def get_snapshot(self) -> MetricsSnapshot:
|
|
136
|
-
"""Get a snapshot of the current metrics without the detailed lists."""
|
|
137
|
-
return MetricsSnapshot(
|
|
138
|
-
model_name=self.model_name,
|
|
139
|
-
accumulated_cost=self.accumulated_cost,
|
|
140
|
-
max_budget_per_task=self.max_budget_per_task,
|
|
141
|
-
accumulated_token_usage=copy.deepcopy(self.accumulated_token_usage)
|
|
142
|
-
if self.accumulated_token_usage
|
|
143
|
-
else None,
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
def add_cost(self, value: float) -> None:
|
|
147
|
-
if value < 0:
|
|
148
|
-
raise ValueError("Added cost cannot be negative.")
|
|
149
|
-
self.accumulated_cost += value
|
|
150
|
-
self.costs.append(Cost(cost=value, model=self.model_name))
|
|
151
|
-
|
|
152
|
-
def add_response_latency(self, value: float, response_id: str) -> None:
|
|
153
|
-
self.response_latencies.append(
|
|
154
|
-
ResponseLatency(
|
|
155
|
-
latency=max(0.0, value), model=self.model_name, response_id=response_id
|
|
156
|
-
)
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
def add_token_usage(
|
|
160
|
-
self,
|
|
161
|
-
prompt_tokens: int,
|
|
162
|
-
completion_tokens: int,
|
|
163
|
-
cache_read_tokens: int,
|
|
164
|
-
cache_write_tokens: int,
|
|
165
|
-
context_window: int,
|
|
166
|
-
response_id: str,
|
|
167
|
-
reasoning_tokens: int = 0,
|
|
168
|
-
) -> None:
|
|
169
|
-
"""Add a single usage record."""
|
|
170
|
-
# Token each turn for calculating context usage.
|
|
171
|
-
per_turn_token = prompt_tokens + completion_tokens
|
|
172
|
-
|
|
173
|
-
usage = TokenUsage(
|
|
174
|
-
model=self.model_name,
|
|
175
|
-
prompt_tokens=prompt_tokens,
|
|
176
|
-
completion_tokens=completion_tokens,
|
|
177
|
-
cache_read_tokens=cache_read_tokens,
|
|
178
|
-
cache_write_tokens=cache_write_tokens,
|
|
179
|
-
reasoning_tokens=reasoning_tokens,
|
|
180
|
-
context_window=context_window,
|
|
181
|
-
per_turn_token=per_turn_token,
|
|
182
|
-
response_id=response_id,
|
|
183
|
-
)
|
|
184
|
-
self.token_usages.append(usage)
|
|
185
|
-
|
|
186
|
-
# Update accumulated token usage using the __add__ operator
|
|
187
|
-
new_usage = TokenUsage(
|
|
188
|
-
model=self.model_name,
|
|
189
|
-
prompt_tokens=prompt_tokens,
|
|
190
|
-
completion_tokens=completion_tokens,
|
|
191
|
-
cache_read_tokens=cache_read_tokens,
|
|
192
|
-
cache_write_tokens=cache_write_tokens,
|
|
193
|
-
reasoning_tokens=reasoning_tokens,
|
|
194
|
-
context_window=context_window,
|
|
195
|
-
per_turn_token=per_turn_token,
|
|
196
|
-
response_id="",
|
|
197
|
-
)
|
|
198
|
-
if self.accumulated_token_usage is None:
|
|
199
|
-
self.accumulated_token_usage = new_usage
|
|
200
|
-
else:
|
|
201
|
-
self.accumulated_token_usage = self.accumulated_token_usage + new_usage
|
|
202
|
-
|
|
203
|
-
def merge(self, other: "Metrics") -> None:
|
|
204
|
-
"""Merge 'other' metrics into this one."""
|
|
205
|
-
self.accumulated_cost += other.accumulated_cost
|
|
206
|
-
|
|
207
|
-
# Keep the max_budget_per_task from other if it's set and this one isn't
|
|
208
|
-
if self.max_budget_per_task is None and other.max_budget_per_task is not None:
|
|
209
|
-
self.max_budget_per_task = other.max_budget_per_task
|
|
210
|
-
|
|
211
|
-
self.costs += other.costs
|
|
212
|
-
self.token_usages += other.token_usages
|
|
213
|
-
self.response_latencies += other.response_latencies
|
|
214
|
-
|
|
215
|
-
# Merge accumulated token usage using the __add__ operator
|
|
216
|
-
if self.accumulated_token_usage is None:
|
|
217
|
-
self.accumulated_token_usage = other.accumulated_token_usage
|
|
218
|
-
elif other.accumulated_token_usage is not None:
|
|
219
|
-
self.accumulated_token_usage = (
|
|
220
|
-
self.accumulated_token_usage + other.accumulated_token_usage
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
def get(self) -> dict:
|
|
224
|
-
"""Return the metrics in a dictionary."""
|
|
225
|
-
return {
|
|
226
|
-
"accumulated_cost": self.accumulated_cost,
|
|
227
|
-
"max_budget_per_task": self.max_budget_per_task,
|
|
228
|
-
"accumulated_token_usage": self.accumulated_token_usage.model_dump()
|
|
229
|
-
if self.accumulated_token_usage
|
|
230
|
-
else None,
|
|
231
|
-
"costs": [cost.model_dump() for cost in self.costs],
|
|
232
|
-
"response_latencies": [
|
|
233
|
-
latency.model_dump() for latency in self.response_latencies
|
|
234
|
-
],
|
|
235
|
-
"token_usages": [usage.model_dump() for usage in self.token_usages],
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
def log(self) -> str:
|
|
239
|
-
"""Log the metrics."""
|
|
240
|
-
metrics = self.get()
|
|
241
|
-
logs = ""
|
|
242
|
-
for key, value in metrics.items():
|
|
243
|
-
logs += f"{key}: {value}\n"
|
|
244
|
-
return logs
|
|
245
|
-
|
|
246
|
-
def deep_copy(self) -> "Metrics":
|
|
247
|
-
"""Create a deep copy of the Metrics object."""
|
|
248
|
-
return copy.deepcopy(self)
|
|
249
|
-
|
|
250
|
-
def diff(self, baseline: "Metrics") -> "Metrics":
|
|
251
|
-
"""Calculate the difference between current metrics and a baseline.
|
|
252
|
-
|
|
253
|
-
This is useful for tracking metrics for specific operations like delegates.
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
baseline: A metrics object representing the baseline state
|
|
257
|
-
|
|
258
|
-
Returns:
|
|
259
|
-
A new Metrics object containing only the differences since the baseline
|
|
260
|
-
"""
|
|
261
|
-
result = Metrics(model_name=self.model_name)
|
|
262
|
-
|
|
263
|
-
# Calculate cost difference
|
|
264
|
-
result.accumulated_cost = self.accumulated_cost - baseline.accumulated_cost
|
|
265
|
-
|
|
266
|
-
# Include only costs that were added after the baseline
|
|
267
|
-
if baseline.costs:
|
|
268
|
-
last_baseline_timestamp = baseline.costs[-1].timestamp
|
|
269
|
-
result.costs = [
|
|
270
|
-
cost for cost in self.costs if cost.timestamp > last_baseline_timestamp
|
|
271
|
-
]
|
|
272
|
-
else:
|
|
273
|
-
result.costs = self.costs.copy()
|
|
274
|
-
|
|
275
|
-
# Include only response latencies that were added after the baseline
|
|
276
|
-
result.response_latencies = self.response_latencies[
|
|
277
|
-
len(baseline.response_latencies) :
|
|
278
|
-
]
|
|
279
|
-
|
|
280
|
-
# Include only token usages that were added after the baseline
|
|
281
|
-
result.token_usages = self.token_usages[len(baseline.token_usages) :]
|
|
282
|
-
|
|
283
|
-
# Calculate accumulated token usage difference
|
|
284
|
-
base_usage = baseline.accumulated_token_usage
|
|
285
|
-
current_usage = self.accumulated_token_usage
|
|
286
|
-
|
|
287
|
-
if current_usage is not None and base_usage is not None:
|
|
288
|
-
result.accumulated_token_usage = TokenUsage(
|
|
289
|
-
model=self.model_name,
|
|
290
|
-
prompt_tokens=current_usage.prompt_tokens - base_usage.prompt_tokens,
|
|
291
|
-
completion_tokens=current_usage.completion_tokens
|
|
292
|
-
- base_usage.completion_tokens,
|
|
293
|
-
cache_read_tokens=current_usage.cache_read_tokens
|
|
294
|
-
- base_usage.cache_read_tokens,
|
|
295
|
-
cache_write_tokens=current_usage.cache_write_tokens
|
|
296
|
-
- base_usage.cache_write_tokens,
|
|
297
|
-
reasoning_tokens=current_usage.reasoning_tokens
|
|
298
|
-
- base_usage.reasoning_tokens,
|
|
299
|
-
context_window=current_usage.context_window,
|
|
300
|
-
per_turn_token=0,
|
|
301
|
-
response_id="",
|
|
302
|
-
)
|
|
303
|
-
elif current_usage is not None:
|
|
304
|
-
result.accumulated_token_usage = current_usage
|
|
305
|
-
else:
|
|
306
|
-
result.accumulated_token_usage = None
|
|
307
|
-
|
|
308
|
-
return result
|
|
309
|
-
|
|
310
|
-
def __repr__(self) -> str:
|
|
311
|
-
return f"Metrics({self.get()}"
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from fnmatch import fnmatch
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def normalize_model_name(model: str) -> str:
|
|
6
|
-
"""Normalize a model string to a canonical, comparable name.
|
|
7
|
-
|
|
8
|
-
Strategy:
|
|
9
|
-
- Trim whitespace
|
|
10
|
-
- Lowercase
|
|
11
|
-
- If there is a '/', keep only the basename after the last '/'
|
|
12
|
-
(handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
|
|
13
|
-
and treat ':' inside that basename as an Ollama-style variant tag to be removed
|
|
14
|
-
- There is no provider:model form; providers, when present, use 'provider/model'
|
|
15
|
-
- Drop a trailing "-gguf" suffix if present
|
|
16
|
-
- If basename starts with a known vendor prefix followed by '.', drop that prefix
|
|
17
|
-
(e.g., 'anthropic.claude-*' -> 'claude-*')
|
|
18
|
-
"""
|
|
19
|
-
raw = (model or "").strip().lower()
|
|
20
|
-
if "/" in raw:
|
|
21
|
-
name = raw.split("/")[-1]
|
|
22
|
-
if ":" in name:
|
|
23
|
-
# Drop Ollama-style variant tag in basename
|
|
24
|
-
name = name.split(":", 1)[0]
|
|
25
|
-
else:
|
|
26
|
-
# No '/', keep the whole raw name (we do not support provider:model)
|
|
27
|
-
name = raw
|
|
28
|
-
|
|
29
|
-
# Drop common vendor prefixes embedded in the basename (bedrock style), once.
|
|
30
|
-
# Keep this list small and explicit to avoid accidental over-matching.
|
|
31
|
-
vendor_prefixes = {
|
|
32
|
-
"anthropic",
|
|
33
|
-
"meta",
|
|
34
|
-
"cohere",
|
|
35
|
-
"mistral",
|
|
36
|
-
"ai21",
|
|
37
|
-
"amazon",
|
|
38
|
-
}
|
|
39
|
-
if "." in name:
|
|
40
|
-
vendor, rest = name.split(".", 1)
|
|
41
|
-
if vendor in vendor_prefixes and rest:
|
|
42
|
-
name = rest
|
|
43
|
-
|
|
44
|
-
if name.endswith("-gguf"):
|
|
45
|
-
name = name[: -len("-gguf")]
|
|
46
|
-
return name
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def model_matches(model: str, patterns: list[str]) -> bool:
|
|
50
|
-
"""Return True if the model matches any of the glob patterns.
|
|
51
|
-
|
|
52
|
-
If a pattern contains a '/', it is treated as provider-qualified and matched
|
|
53
|
-
against the full, lowercased model string (including provider prefix).
|
|
54
|
-
Otherwise, it is matched against the normalized basename.
|
|
55
|
-
"""
|
|
56
|
-
raw = (model or "").strip().lower()
|
|
57
|
-
name = normalize_model_name(model)
|
|
58
|
-
for pat in patterns:
|
|
59
|
-
pat_l = pat.lower()
|
|
60
|
-
if "/" in pat_l:
|
|
61
|
-
if fnmatch(raw, pat_l):
|
|
62
|
-
return True
|
|
63
|
-
else:
|
|
64
|
-
if fnmatch(name, pat_l):
|
|
65
|
-
return True
|
|
66
|
-
return False
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@dataclass(frozen=True)
|
|
70
|
-
class ModelFeatures:
|
|
71
|
-
supports_function_calling: bool
|
|
72
|
-
supports_reasoning_effort: bool
|
|
73
|
-
supports_prompt_cache: bool
|
|
74
|
-
supports_stop_words: bool
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
# Pattern tables capturing current behavior. Keep patterns lowercase.
|
|
78
|
-
FUNCTION_CALLING_PATTERNS: list[str] = [
|
|
79
|
-
# Anthropic families
|
|
80
|
-
"claude-3-7-sonnet*",
|
|
81
|
-
"claude-3.7-sonnet*",
|
|
82
|
-
"claude-sonnet-3-7-latest",
|
|
83
|
-
"claude-3-5-sonnet*",
|
|
84
|
-
"claude-3.5-haiku*",
|
|
85
|
-
"claude-3-5-haiku*",
|
|
86
|
-
"claude-sonnet-4*",
|
|
87
|
-
"claude-opus-4*",
|
|
88
|
-
# OpenAI families
|
|
89
|
-
"gpt-4o*",
|
|
90
|
-
"gpt-4.1",
|
|
91
|
-
"gpt-5*",
|
|
92
|
-
# o-series (keep exact o1 support per existing list)
|
|
93
|
-
"o1-2024-12-17",
|
|
94
|
-
"o3*",
|
|
95
|
-
"o4-mini*",
|
|
96
|
-
# Google Gemini
|
|
97
|
-
"gemini-2.5-pro*",
|
|
98
|
-
# Others
|
|
99
|
-
"kimi-k2-0711-preview",
|
|
100
|
-
"kimi-k2-instruct",
|
|
101
|
-
"qwen3-coder*",
|
|
102
|
-
"qwen3-coder-480b-a35b-instruct",
|
|
103
|
-
]
|
|
104
|
-
|
|
105
|
-
REASONING_EFFORT_PATTERNS: list[str] = [
|
|
106
|
-
# Mirror main behavior exactly (no unintended expansion)
|
|
107
|
-
"o1-2024-12-17",
|
|
108
|
-
"o1",
|
|
109
|
-
"o3",
|
|
110
|
-
"o3-2025-04-16",
|
|
111
|
-
"o3-mini-2025-01-31",
|
|
112
|
-
"o3-mini",
|
|
113
|
-
"o4-mini",
|
|
114
|
-
"o4-mini-2025-04-16",
|
|
115
|
-
"gemini-2.5-flash",
|
|
116
|
-
"gemini-2.5-pro",
|
|
117
|
-
"gpt-5",
|
|
118
|
-
"gpt-5-2025-08-07",
|
|
119
|
-
]
|
|
120
|
-
|
|
121
|
-
PROMPT_CACHE_PATTERNS: list[str] = [
|
|
122
|
-
"claude-3-7-sonnet*",
|
|
123
|
-
"claude-3.7-sonnet*",
|
|
124
|
-
"claude-sonnet-3-7-latest",
|
|
125
|
-
"claude-3-5-sonnet*",
|
|
126
|
-
"claude-3-5-haiku*",
|
|
127
|
-
"claude-3.5-haiku*",
|
|
128
|
-
"claude-3-haiku-20240307*",
|
|
129
|
-
"claude-3-opus-20240229*",
|
|
130
|
-
"claude-sonnet-4*",
|
|
131
|
-
"claude-opus-4*",
|
|
132
|
-
]
|
|
133
|
-
|
|
134
|
-
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
|
|
135
|
-
# o1 family doesn't support stop words
|
|
136
|
-
"o1*",
|
|
137
|
-
# grok-4 specific model name (basename)
|
|
138
|
-
"grok-4-0709",
|
|
139
|
-
"grok-code-fast-1",
|
|
140
|
-
# DeepSeek R1 family
|
|
141
|
-
"deepseek-r1-0528*",
|
|
142
|
-
]
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def get_features(model: str) -> ModelFeatures:
|
|
146
|
-
return ModelFeatures(
|
|
147
|
-
supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
|
|
148
|
-
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
|
|
149
|
-
supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
|
|
150
|
-
supports_stop_words=not model_matches(
|
|
151
|
-
model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
|
|
152
|
-
),
|
|
153
|
-
)
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
from typing import Any, Callable, Iterable, cast
|
|
2
|
-
|
|
3
|
-
from tenacity import (
|
|
4
|
-
RetryCallState,
|
|
5
|
-
retry,
|
|
6
|
-
retry_if_exception_type,
|
|
7
|
-
stop_after_attempt,
|
|
8
|
-
wait_exponential,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
from openhands.sdk.llm.exceptions import LLMNoResponseError
|
|
12
|
-
from openhands.sdk.logger import get_logger
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
logger = get_logger(__name__)
|
|
16
|
-
|
|
17
|
-
# Helpful alias for listener signature: (attempt_number, max_retries) -> None
|
|
18
|
-
RetryListener = Callable[[int, int], None]
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class RetryMixin:
|
|
22
|
-
"""Mixin class for retry logic."""
|
|
23
|
-
|
|
24
|
-
def retry_decorator(
|
|
25
|
-
self,
|
|
26
|
-
num_retries: int = 5,
|
|
27
|
-
retry_exceptions: tuple[type[BaseException], ...] = (LLMNoResponseError,),
|
|
28
|
-
retry_min_wait: int = 8,
|
|
29
|
-
retry_max_wait: int = 64,
|
|
30
|
-
retry_multiplier: float = 2.0,
|
|
31
|
-
retry_listener: RetryListener | None = None,
|
|
32
|
-
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
33
|
-
"""
|
|
34
|
-
Create a LLM retry decorator with customizable parameters.
|
|
35
|
-
This is used for 429 errors, and a few other exceptions in LLM classes.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
def before_sleep(retry_state: RetryCallState) -> None:
|
|
39
|
-
# Log first (also validates outcome as part of logging)
|
|
40
|
-
self.log_retry_attempt(retry_state)
|
|
41
|
-
|
|
42
|
-
if retry_listener is not None:
|
|
43
|
-
retry_listener(retry_state.attempt_number, num_retries)
|
|
44
|
-
|
|
45
|
-
# If there is no outcome or no exception, nothing to tweak.
|
|
46
|
-
if retry_state.outcome is None:
|
|
47
|
-
return
|
|
48
|
-
exc = retry_state.outcome.exception()
|
|
49
|
-
if exc is None:
|
|
50
|
-
return
|
|
51
|
-
|
|
52
|
-
# Only adjust temperature for LLMNoResponseError
|
|
53
|
-
if isinstance(exc, LLMNoResponseError):
|
|
54
|
-
kwargs = getattr(retry_state, "kwargs", None)
|
|
55
|
-
if isinstance(kwargs, dict):
|
|
56
|
-
current_temp = kwargs.get("temperature", 0)
|
|
57
|
-
if current_temp == 0:
|
|
58
|
-
kwargs["temperature"] = 1.0
|
|
59
|
-
logger.warning(
|
|
60
|
-
"LLMNoResponseError with temperature=0, "
|
|
61
|
-
"setting temperature to 1.0 for next attempt."
|
|
62
|
-
)
|
|
63
|
-
else:
|
|
64
|
-
logger.warning(
|
|
65
|
-
f"LLMNoResponseError with temperature={current_temp}, "
|
|
66
|
-
"keeping original temperature"
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
|
|
70
|
-
before_sleep=before_sleep,
|
|
71
|
-
stop=stop_after_attempt(num_retries),
|
|
72
|
-
reraise=True,
|
|
73
|
-
retry=retry_if_exception_type(retry_exceptions),
|
|
74
|
-
wait=wait_exponential(
|
|
75
|
-
multiplier=retry_multiplier,
|
|
76
|
-
min=retry_min_wait,
|
|
77
|
-
max=retry_max_wait,
|
|
78
|
-
),
|
|
79
|
-
)
|
|
80
|
-
return retry_decorator
|
|
81
|
-
|
|
82
|
-
def log_retry_attempt(self, retry_state: RetryCallState) -> None:
|
|
83
|
-
"""Log retry attempts."""
|
|
84
|
-
|
|
85
|
-
if retry_state.outcome is None:
|
|
86
|
-
logger.error(
|
|
87
|
-
"retry_state.outcome is None. "
|
|
88
|
-
"This should not happen, please check the retry logic."
|
|
89
|
-
)
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
exc = retry_state.outcome.exception()
|
|
93
|
-
if exc is None:
|
|
94
|
-
logger.error("retry_state.outcome.exception() returned None.")
|
|
95
|
-
return
|
|
96
|
-
|
|
97
|
-
# Try to get max attempts from the stop condition if present
|
|
98
|
-
max_attempts: int | None = None
|
|
99
|
-
retry_obj = getattr(retry_state, "retry_object", None)
|
|
100
|
-
stop_condition = getattr(retry_obj, "stop", None)
|
|
101
|
-
if stop_condition is not None:
|
|
102
|
-
# stop_any has .stops, single stop does not
|
|
103
|
-
stops: Iterable[Any]
|
|
104
|
-
if hasattr(stop_condition, "stops"):
|
|
105
|
-
stops = stop_condition.stops # type: ignore[attr-defined]
|
|
106
|
-
else:
|
|
107
|
-
stops = [stop_condition]
|
|
108
|
-
for stop_func in stops:
|
|
109
|
-
if hasattr(stop_func, "max_attempts"):
|
|
110
|
-
max_attempts = getattr(stop_func, "max_attempts")
|
|
111
|
-
break
|
|
112
|
-
|
|
113
|
-
# Attach dynamic fields for downstream consumers (keep existing behavior)
|
|
114
|
-
setattr(cast(Any, exc), "retry_attempt", retry_state.attempt_number)
|
|
115
|
-
if max_attempts is not None:
|
|
116
|
-
setattr(cast(Any, exc), "max_retries", max_attempts)
|
|
117
|
-
|
|
118
|
-
logger.error(
|
|
119
|
-
"%s. Attempt #%d | You can customize retry values in the configuration.",
|
|
120
|
-
exc,
|
|
121
|
-
retry_state.attempt_number,
|
|
122
|
-
)
|