openhands-sdk 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/__init__.py +111 -0
- openhands/sdk/agent/__init__.py +8 -0
- openhands/sdk/agent/agent.py +650 -0
- openhands/sdk/agent/base.py +457 -0
- openhands/sdk/agent/prompts/in_context_learning_example.j2 +169 -0
- openhands/sdk/agent/prompts/in_context_learning_example_suffix.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
- openhands/sdk/agent/prompts/security_policy.j2 +22 -0
- openhands/sdk/agent/prompts/security_risk_assessment.j2 +21 -0
- openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
- openhands/sdk/agent/prompts/system_prompt.j2 +132 -0
- openhands/sdk/agent/prompts/system_prompt_interactive.j2 +14 -0
- openhands/sdk/agent/prompts/system_prompt_long_horizon.j2 +40 -0
- openhands/sdk/agent/prompts/system_prompt_planning.j2 +40 -0
- openhands/sdk/agent/prompts/system_prompt_tech_philosophy.j2 +122 -0
- openhands/sdk/agent/utils.py +228 -0
- openhands/sdk/context/__init__.py +28 -0
- openhands/sdk/context/agent_context.py +264 -0
- openhands/sdk/context/condenser/__init__.py +18 -0
- openhands/sdk/context/condenser/base.py +100 -0
- openhands/sdk/context/condenser/llm_summarizing_condenser.py +248 -0
- openhands/sdk/context/condenser/no_op_condenser.py +14 -0
- openhands/sdk/context/condenser/pipeline_condenser.py +56 -0
- openhands/sdk/context/condenser/prompts/summarizing_prompt.j2 +59 -0
- openhands/sdk/context/condenser/utils.py +149 -0
- openhands/sdk/context/prompts/__init__.py +6 -0
- openhands/sdk/context/prompts/prompt.py +114 -0
- openhands/sdk/context/prompts/templates/ask_agent_template.j2 +11 -0
- openhands/sdk/context/prompts/templates/skill_knowledge_info.j2 +8 -0
- openhands/sdk/context/prompts/templates/system_message_suffix.j2 +32 -0
- openhands/sdk/context/skills/__init__.py +28 -0
- openhands/sdk/context/skills/exceptions.py +11 -0
- openhands/sdk/context/skills/skill.py +720 -0
- openhands/sdk/context/skills/trigger.py +36 -0
- openhands/sdk/context/skills/types.py +48 -0
- openhands/sdk/context/view.py +503 -0
- openhands/sdk/conversation/__init__.py +40 -0
- openhands/sdk/conversation/base.py +281 -0
- openhands/sdk/conversation/conversation.py +152 -0
- openhands/sdk/conversation/conversation_stats.py +85 -0
- openhands/sdk/conversation/event_store.py +157 -0
- openhands/sdk/conversation/events_list_base.py +17 -0
- openhands/sdk/conversation/exceptions.py +50 -0
- openhands/sdk/conversation/fifo_lock.py +133 -0
- openhands/sdk/conversation/impl/__init__.py +5 -0
- openhands/sdk/conversation/impl/local_conversation.py +665 -0
- openhands/sdk/conversation/impl/remote_conversation.py +956 -0
- openhands/sdk/conversation/persistence_const.py +9 -0
- openhands/sdk/conversation/response_utils.py +41 -0
- openhands/sdk/conversation/secret_registry.py +126 -0
- openhands/sdk/conversation/serialization_diff.py +0 -0
- openhands/sdk/conversation/state.py +392 -0
- openhands/sdk/conversation/stuck_detector.py +311 -0
- openhands/sdk/conversation/title_utils.py +191 -0
- openhands/sdk/conversation/types.py +45 -0
- openhands/sdk/conversation/visualizer/__init__.py +12 -0
- openhands/sdk/conversation/visualizer/base.py +67 -0
- openhands/sdk/conversation/visualizer/default.py +373 -0
- openhands/sdk/critic/__init__.py +15 -0
- openhands/sdk/critic/base.py +38 -0
- openhands/sdk/critic/impl/__init__.py +12 -0
- openhands/sdk/critic/impl/agent_finished.py +83 -0
- openhands/sdk/critic/impl/empty_patch.py +49 -0
- openhands/sdk/critic/impl/pass_critic.py +42 -0
- openhands/sdk/event/__init__.py +42 -0
- openhands/sdk/event/base.py +149 -0
- openhands/sdk/event/condenser.py +82 -0
- openhands/sdk/event/conversation_error.py +25 -0
- openhands/sdk/event/conversation_state.py +104 -0
- openhands/sdk/event/llm_completion_log.py +39 -0
- openhands/sdk/event/llm_convertible/__init__.py +20 -0
- openhands/sdk/event/llm_convertible/action.py +139 -0
- openhands/sdk/event/llm_convertible/message.py +142 -0
- openhands/sdk/event/llm_convertible/observation.py +141 -0
- openhands/sdk/event/llm_convertible/system.py +61 -0
- openhands/sdk/event/token.py +16 -0
- openhands/sdk/event/types.py +11 -0
- openhands/sdk/event/user_action.py +21 -0
- openhands/sdk/git/exceptions.py +43 -0
- openhands/sdk/git/git_changes.py +249 -0
- openhands/sdk/git/git_diff.py +129 -0
- openhands/sdk/git/models.py +21 -0
- openhands/sdk/git/utils.py +189 -0
- openhands/sdk/hooks/__init__.py +30 -0
- openhands/sdk/hooks/config.py +180 -0
- openhands/sdk/hooks/conversation_hooks.py +227 -0
- openhands/sdk/hooks/executor.py +155 -0
- openhands/sdk/hooks/manager.py +170 -0
- openhands/sdk/hooks/types.py +40 -0
- openhands/sdk/io/__init__.py +6 -0
- openhands/sdk/io/base.py +48 -0
- openhands/sdk/io/cache.py +85 -0
- openhands/sdk/io/local.py +119 -0
- openhands/sdk/io/memory.py +54 -0
- openhands/sdk/llm/__init__.py +45 -0
- openhands/sdk/llm/exceptions/__init__.py +45 -0
- openhands/sdk/llm/exceptions/classifier.py +50 -0
- openhands/sdk/llm/exceptions/mapping.py +54 -0
- openhands/sdk/llm/exceptions/types.py +101 -0
- openhands/sdk/llm/llm.py +1140 -0
- openhands/sdk/llm/llm_registry.py +122 -0
- openhands/sdk/llm/llm_response.py +59 -0
- openhands/sdk/llm/message.py +656 -0
- openhands/sdk/llm/mixins/fn_call_converter.py +1288 -0
- openhands/sdk/llm/mixins/non_native_fc.py +97 -0
- openhands/sdk/llm/options/__init__.py +1 -0
- openhands/sdk/llm/options/chat_options.py +93 -0
- openhands/sdk/llm/options/common.py +19 -0
- openhands/sdk/llm/options/responses_options.py +67 -0
- openhands/sdk/llm/router/__init__.py +10 -0
- openhands/sdk/llm/router/base.py +117 -0
- openhands/sdk/llm/router/impl/multimodal.py +76 -0
- openhands/sdk/llm/router/impl/random.py +22 -0
- openhands/sdk/llm/streaming.py +9 -0
- openhands/sdk/llm/utils/metrics.py +312 -0
- openhands/sdk/llm/utils/model_features.py +192 -0
- openhands/sdk/llm/utils/model_info.py +90 -0
- openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
- openhands/sdk/llm/utils/retry_mixin.py +128 -0
- openhands/sdk/llm/utils/telemetry.py +362 -0
- openhands/sdk/llm/utils/unverified_models.py +156 -0
- openhands/sdk/llm/utils/verified_models.py +65 -0
- openhands/sdk/logger/__init__.py +22 -0
- openhands/sdk/logger/logger.py +195 -0
- openhands/sdk/logger/rolling.py +113 -0
- openhands/sdk/mcp/__init__.py +24 -0
- openhands/sdk/mcp/client.py +76 -0
- openhands/sdk/mcp/definition.py +106 -0
- openhands/sdk/mcp/exceptions.py +19 -0
- openhands/sdk/mcp/tool.py +270 -0
- openhands/sdk/mcp/utils.py +83 -0
- openhands/sdk/observability/__init__.py +4 -0
- openhands/sdk/observability/laminar.py +166 -0
- openhands/sdk/observability/utils.py +20 -0
- openhands/sdk/py.typed +0 -0
- openhands/sdk/secret/__init__.py +19 -0
- openhands/sdk/secret/secrets.py +92 -0
- openhands/sdk/security/__init__.py +6 -0
- openhands/sdk/security/analyzer.py +111 -0
- openhands/sdk/security/confirmation_policy.py +61 -0
- openhands/sdk/security/llm_analyzer.py +29 -0
- openhands/sdk/security/risk.py +100 -0
- openhands/sdk/tool/__init__.py +34 -0
- openhands/sdk/tool/builtins/__init__.py +34 -0
- openhands/sdk/tool/builtins/finish.py +106 -0
- openhands/sdk/tool/builtins/think.py +117 -0
- openhands/sdk/tool/registry.py +184 -0
- openhands/sdk/tool/schema.py +286 -0
- openhands/sdk/tool/spec.py +39 -0
- openhands/sdk/tool/tool.py +481 -0
- openhands/sdk/utils/__init__.py +22 -0
- openhands/sdk/utils/async_executor.py +115 -0
- openhands/sdk/utils/async_utils.py +39 -0
- openhands/sdk/utils/cipher.py +68 -0
- openhands/sdk/utils/command.py +90 -0
- openhands/sdk/utils/deprecation.py +166 -0
- openhands/sdk/utils/github.py +44 -0
- openhands/sdk/utils/json.py +48 -0
- openhands/sdk/utils/models.py +570 -0
- openhands/sdk/utils/paging.py +63 -0
- openhands/sdk/utils/pydantic_diff.py +85 -0
- openhands/sdk/utils/pydantic_secrets.py +64 -0
- openhands/sdk/utils/truncate.py +117 -0
- openhands/sdk/utils/visualize.py +58 -0
- openhands/sdk/workspace/__init__.py +17 -0
- openhands/sdk/workspace/base.py +158 -0
- openhands/sdk/workspace/local.py +189 -0
- openhands/sdk/workspace/models.py +35 -0
- openhands/sdk/workspace/remote/__init__.py +8 -0
- openhands/sdk/workspace/remote/async_remote_workspace.py +149 -0
- openhands/sdk/workspace/remote/base.py +164 -0
- openhands/sdk/workspace/remote/remote_workspace_mixin.py +323 -0
- openhands/sdk/workspace/workspace.py +49 -0
- openhands_sdk-1.7.3.dist-info/METADATA +17 -0
- openhands_sdk-1.7.3.dist-info/RECORD +180 -0
- openhands_sdk-1.7.3.dist-info/WHEEL +5 -0
- openhands_sdk-1.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from collections.abc import Callable, Iterable
|
|
2
|
+
from typing import Any, cast
|
|
3
|
+
|
|
4
|
+
from tenacity import (
|
|
5
|
+
RetryCallState,
|
|
6
|
+
retry,
|
|
7
|
+
retry_if_exception_type,
|
|
8
|
+
stop_after_attempt,
|
|
9
|
+
wait_exponential,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from openhands.sdk.llm.exceptions import LLMNoResponseError
|
|
13
|
+
from openhands.sdk.logger import get_logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
# Helpful alias for listener signature: (attempt_number, max_retries) -> None
|
|
19
|
+
RetryListener = Callable[[int, int, BaseException | None], None]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RetryMixin:
|
|
23
|
+
"""Mixin class for retry logic."""
|
|
24
|
+
|
|
25
|
+
def retry_decorator(
|
|
26
|
+
self,
|
|
27
|
+
num_retries: int = 5,
|
|
28
|
+
retry_exceptions: tuple[type[BaseException], ...] = (LLMNoResponseError,),
|
|
29
|
+
retry_min_wait: int = 8,
|
|
30
|
+
retry_max_wait: int = 64,
|
|
31
|
+
retry_multiplier: float = 2.0,
|
|
32
|
+
retry_listener: RetryListener | None = None,
|
|
33
|
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
34
|
+
"""
|
|
35
|
+
Create a LLM retry decorator with customizable parameters.
|
|
36
|
+
This is used for 429 errors, and a few other exceptions in LLM classes.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def before_sleep(retry_state: RetryCallState) -> None:
|
|
40
|
+
# Log first (also validates outcome as part of logging)
|
|
41
|
+
self.log_retry_attempt(retry_state)
|
|
42
|
+
|
|
43
|
+
if retry_listener is not None:
|
|
44
|
+
exc = (
|
|
45
|
+
retry_state.outcome.exception()
|
|
46
|
+
if retry_state.outcome is not None
|
|
47
|
+
else None
|
|
48
|
+
)
|
|
49
|
+
retry_listener(retry_state.attempt_number, num_retries, exc)
|
|
50
|
+
|
|
51
|
+
# If there is no outcome or no exception, nothing to tweak.
|
|
52
|
+
if retry_state.outcome is None:
|
|
53
|
+
return
|
|
54
|
+
exc = retry_state.outcome.exception()
|
|
55
|
+
if exc is None:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Only adjust temperature for LLMNoResponseError
|
|
59
|
+
if isinstance(exc, LLMNoResponseError):
|
|
60
|
+
kwargs = getattr(retry_state, "kwargs", None)
|
|
61
|
+
if isinstance(kwargs, dict):
|
|
62
|
+
current_temp = kwargs.get("temperature", 0)
|
|
63
|
+
if current_temp == 0:
|
|
64
|
+
kwargs["temperature"] = 1.0
|
|
65
|
+
logger.warning(
|
|
66
|
+
"LLMNoResponseError with temperature=0, "
|
|
67
|
+
"setting temperature to 1.0 for next attempt."
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
logger.warning(
|
|
71
|
+
f"LLMNoResponseError with temperature={current_temp}, "
|
|
72
|
+
"keeping original temperature"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
|
|
76
|
+
before_sleep=before_sleep,
|
|
77
|
+
stop=stop_after_attempt(num_retries),
|
|
78
|
+
reraise=True,
|
|
79
|
+
retry=retry_if_exception_type(retry_exceptions),
|
|
80
|
+
wait=wait_exponential(
|
|
81
|
+
multiplier=retry_multiplier,
|
|
82
|
+
min=retry_min_wait,
|
|
83
|
+
max=retry_max_wait,
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
return retry_decorator
|
|
87
|
+
|
|
88
|
+
def log_retry_attempt(self, retry_state: RetryCallState) -> None:
|
|
89
|
+
"""Log retry attempts."""
|
|
90
|
+
|
|
91
|
+
if retry_state.outcome is None:
|
|
92
|
+
logger.error(
|
|
93
|
+
"retry_state.outcome is None. "
|
|
94
|
+
"This should not happen, please check the retry logic."
|
|
95
|
+
)
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
exc = retry_state.outcome.exception()
|
|
99
|
+
if exc is None:
|
|
100
|
+
logger.error("retry_state.outcome.exception() returned None.")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
# Try to get max attempts from the stop condition if present
|
|
104
|
+
max_attempts: int | None = None
|
|
105
|
+
retry_obj = getattr(retry_state, "retry_object", None)
|
|
106
|
+
stop_condition = getattr(retry_obj, "stop", None)
|
|
107
|
+
if stop_condition is not None:
|
|
108
|
+
# stop_any has .stops, single stop does not
|
|
109
|
+
stops: Iterable[Any]
|
|
110
|
+
if hasattr(stop_condition, "stops"):
|
|
111
|
+
stops = stop_condition.stops # type: ignore[attr-defined]
|
|
112
|
+
else:
|
|
113
|
+
stops = [stop_condition]
|
|
114
|
+
for stop_func in stops:
|
|
115
|
+
if hasattr(stop_func, "max_attempts"):
|
|
116
|
+
max_attempts = getattr(stop_func, "max_attempts")
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# Attach dynamic fields for downstream consumers (keep existing behavior)
|
|
120
|
+
setattr(cast(Any, exc), "retry_attempt", retry_state.attempt_number)
|
|
121
|
+
if max_attempts is not None:
|
|
122
|
+
setattr(cast(Any, exc), "max_retries", max_attempts)
|
|
123
|
+
|
|
124
|
+
logger.error(
|
|
125
|
+
"%s. Attempt #%d | You can customize retry values in the configuration.",
|
|
126
|
+
exc,
|
|
127
|
+
retry_state.attempt_number,
|
|
128
|
+
)
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
import warnings
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from typing import Any, ClassVar
|
|
8
|
+
|
|
9
|
+
from litellm.cost_calculator import completion_cost as litellm_completion_cost
|
|
10
|
+
from litellm.types.llms.openai import ResponseAPIUsage, ResponsesAPIResponse
|
|
11
|
+
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
13
|
+
|
|
14
|
+
from openhands.sdk.llm.utils.metrics import Metrics
|
|
15
|
+
from openhands.sdk.logger import get_logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Telemetry(BaseModel):
|
|
22
|
+
"""
|
|
23
|
+
Handles latency, token/cost accounting, and optional logging.
|
|
24
|
+
All runtime state (like start times) lives in private attrs.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# --- Config fields ---
|
|
28
|
+
model_name: str = Field(default="unknown", description="Name of the LLM model")
|
|
29
|
+
log_enabled: bool = Field(default=False, description="Whether to log completions")
|
|
30
|
+
log_dir: str | None = Field(
|
|
31
|
+
default=None, description="Directory to write logs if enabled"
|
|
32
|
+
)
|
|
33
|
+
input_cost_per_token: float | None = Field(
|
|
34
|
+
default=None, ge=0, description="Custom Input cost per token (USD)"
|
|
35
|
+
)
|
|
36
|
+
output_cost_per_token: float | None = Field(
|
|
37
|
+
default=None, ge=0, description="Custom Output cost per token (USD)"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
metrics: Metrics = Field(..., description="Metrics collector instance")
|
|
41
|
+
|
|
42
|
+
# --- Runtime fields (not serialized) ---
|
|
43
|
+
_req_start: float = PrivateAttr(default=0.0)
|
|
44
|
+
_req_ctx: dict[str, Any] = PrivateAttr(default_factory=dict)
|
|
45
|
+
_last_latency: float = PrivateAttr(default=0.0)
|
|
46
|
+
_log_completions_callback: Callable[[str, str], None] | None = PrivateAttr(
|
|
47
|
+
default=None
|
|
48
|
+
)
|
|
49
|
+
_stats_update_callback: Callable[[], None] | None = PrivateAttr(default=None)
|
|
50
|
+
|
|
51
|
+
model_config: ClassVar[ConfigDict] = ConfigDict(
|
|
52
|
+
extra="forbid", arbitrary_types_allowed=True
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# ---------- Lifecycle ----------
|
|
56
|
+
def set_log_completions_callback(
|
|
57
|
+
self, callback: Callable[[str, str], None] | None
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Set a callback function for logging instead of writing to file.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
callback: A function that takes (filename, log_data) and handles the log.
|
|
63
|
+
Used for streaming logs in remote execution contexts.
|
|
64
|
+
"""
|
|
65
|
+
self._log_completions_callback = callback
|
|
66
|
+
|
|
67
|
+
def set_stats_update_callback(self, callback: Callable[[], None] | None) -> None:
|
|
68
|
+
"""Set a callback function to be notified when stats are updated.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
callback: A function called whenever metrics are updated.
|
|
72
|
+
Used for streaming stats updates in remote execution contexts.
|
|
73
|
+
"""
|
|
74
|
+
self._stats_update_callback = callback
|
|
75
|
+
|
|
76
|
+
def on_request(self, log_ctx: dict | None) -> None:
|
|
77
|
+
self._req_start = time.time()
|
|
78
|
+
self._req_ctx = log_ctx or {}
|
|
79
|
+
|
|
80
|
+
def on_response(
|
|
81
|
+
self,
|
|
82
|
+
resp: ModelResponse | ResponsesAPIResponse,
|
|
83
|
+
raw_resp: ModelResponse | None = None,
|
|
84
|
+
) -> Metrics:
|
|
85
|
+
"""
|
|
86
|
+
Side-effects:
|
|
87
|
+
- records latency, tokens, cost into Metrics
|
|
88
|
+
- optionally writes a JSON log file
|
|
89
|
+
"""
|
|
90
|
+
# 1) latency
|
|
91
|
+
self._last_latency = time.time() - (self._req_start or time.time())
|
|
92
|
+
response_id = resp.id
|
|
93
|
+
self.metrics.add_response_latency(self._last_latency, response_id)
|
|
94
|
+
|
|
95
|
+
# 2) cost
|
|
96
|
+
cost = self._compute_cost(resp)
|
|
97
|
+
# Intentionally skip logging zero-cost (0.0) responses; only record
|
|
98
|
+
# positive cost
|
|
99
|
+
if cost:
|
|
100
|
+
self.metrics.add_cost(cost)
|
|
101
|
+
|
|
102
|
+
# 3) tokens - use typed usage field when available
|
|
103
|
+
usage = getattr(resp, "usage", None)
|
|
104
|
+
|
|
105
|
+
if usage and self._has_meaningful_usage(usage):
|
|
106
|
+
self._record_usage(
|
|
107
|
+
usage, response_id, self._req_ctx.get("context_window", 0)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# 4) optional logging
|
|
111
|
+
if self.log_enabled:
|
|
112
|
+
self.log_llm_call(resp, cost, raw_resp=raw_resp)
|
|
113
|
+
|
|
114
|
+
# 5) notify about stats update
|
|
115
|
+
if self._stats_update_callback is not None:
|
|
116
|
+
try:
|
|
117
|
+
self._stats_update_callback()
|
|
118
|
+
except Exception:
|
|
119
|
+
logger.exception("Stats update callback failed", exc_info=True)
|
|
120
|
+
|
|
121
|
+
return self.metrics.deep_copy()
|
|
122
|
+
|
|
123
|
+
def on_error(self, _err: BaseException) -> None:
|
|
124
|
+
# Stub for error tracking / counters
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# ---------- Helpers ----------
|
|
128
|
+
def _has_meaningful_usage(self, usage: Usage | ResponseAPIUsage | None) -> bool:
|
|
129
|
+
"""Check if usage has meaningful (non-zero) token counts.
|
|
130
|
+
|
|
131
|
+
Supports both Chat Completions Usage and Responses API Usage shapes.
|
|
132
|
+
"""
|
|
133
|
+
if usage is None:
|
|
134
|
+
return False
|
|
135
|
+
try:
|
|
136
|
+
prompt_tokens = getattr(usage, "prompt_tokens", None)
|
|
137
|
+
if prompt_tokens is None:
|
|
138
|
+
prompt_tokens = getattr(usage, "input_tokens", 0)
|
|
139
|
+
completion_tokens = getattr(usage, "completion_tokens", None)
|
|
140
|
+
if completion_tokens is None:
|
|
141
|
+
completion_tokens = getattr(usage, "output_tokens", 0)
|
|
142
|
+
|
|
143
|
+
pt = int(prompt_tokens or 0)
|
|
144
|
+
ct = int(completion_tokens or 0)
|
|
145
|
+
return pt > 0 or ct > 0
|
|
146
|
+
except Exception:
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
def _record_usage(
|
|
150
|
+
self, usage: Usage | ResponseAPIUsage, response_id: str, context_window: int
|
|
151
|
+
) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Record token usage, supporting both Chat Completions Usage and
|
|
154
|
+
Responses API Usage.
|
|
155
|
+
|
|
156
|
+
Chat shape:
|
|
157
|
+
- prompt_tokens, completion_tokens
|
|
158
|
+
- prompt_tokens_details.cached_tokens
|
|
159
|
+
- completion_tokens_details.reasoning_tokens
|
|
160
|
+
- _cache_creation_input_tokens for cache_write
|
|
161
|
+
Responses shape:
|
|
162
|
+
- input_tokens, output_tokens
|
|
163
|
+
- input_tokens_details.cached_tokens
|
|
164
|
+
- output_tokens_details.reasoning_tokens
|
|
165
|
+
"""
|
|
166
|
+
prompt_tokens = int(
|
|
167
|
+
getattr(usage, "prompt_tokens", None)
|
|
168
|
+
or getattr(usage, "input_tokens", 0)
|
|
169
|
+
or 0
|
|
170
|
+
)
|
|
171
|
+
completion_tokens = int(
|
|
172
|
+
getattr(usage, "completion_tokens", None)
|
|
173
|
+
or getattr(usage, "output_tokens", 0)
|
|
174
|
+
or 0
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
cache_read = 0
|
|
178
|
+
p_details = getattr(usage, "prompt_tokens_details", None) or getattr(
|
|
179
|
+
usage, "input_tokens_details", None
|
|
180
|
+
)
|
|
181
|
+
if p_details is not None:
|
|
182
|
+
cache_read = int(getattr(p_details, "cached_tokens", 0) or 0)
|
|
183
|
+
|
|
184
|
+
# Kimi-K2-thinking populate usage.cached_tokens field
|
|
185
|
+
if not cache_read and hasattr(usage, "cached_tokens"):
|
|
186
|
+
cache_read = int(getattr(usage, "cached_tokens", 0) or 0)
|
|
187
|
+
|
|
188
|
+
reasoning_tokens = 0
|
|
189
|
+
c_details = getattr(usage, "completion_tokens_details", None) or getattr(
|
|
190
|
+
usage, "output_tokens_details", None
|
|
191
|
+
)
|
|
192
|
+
if c_details is not None:
|
|
193
|
+
reasoning_tokens = int(getattr(c_details, "reasoning_tokens", 0) or 0)
|
|
194
|
+
|
|
195
|
+
# Chat-specific: litellm may set a hidden cache write field
|
|
196
|
+
cache_write = int(getattr(usage, "_cache_creation_input_tokens", 0) or 0)
|
|
197
|
+
|
|
198
|
+
self.metrics.add_token_usage(
|
|
199
|
+
prompt_tokens=prompt_tokens,
|
|
200
|
+
completion_tokens=completion_tokens,
|
|
201
|
+
cache_read_tokens=cache_read,
|
|
202
|
+
cache_write_tokens=cache_write,
|
|
203
|
+
reasoning_tokens=reasoning_tokens,
|
|
204
|
+
context_window=context_window,
|
|
205
|
+
response_id=response_id,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def _compute_cost(self, resp: ModelResponse | ResponsesAPIResponse) -> float | None:
|
|
209
|
+
"""Try provider header → litellm direct. Return None on failure."""
|
|
210
|
+
extra_kwargs = {}
|
|
211
|
+
if (
|
|
212
|
+
self.input_cost_per_token is not None
|
|
213
|
+
and self.output_cost_per_token is not None
|
|
214
|
+
):
|
|
215
|
+
cost_per_token = CostPerToken(
|
|
216
|
+
input_cost_per_token=self.input_cost_per_token,
|
|
217
|
+
output_cost_per_token=self.output_cost_per_token,
|
|
218
|
+
)
|
|
219
|
+
logger.debug(f"Using custom cost per token: {cost_per_token}")
|
|
220
|
+
extra_kwargs["custom_cost_per_token"] = cost_per_token
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
hidden = getattr(resp, "_hidden_params", {}) or {}
|
|
224
|
+
cost = hidden.get("additional_headers", {}).get(
|
|
225
|
+
"llm_provider-x-litellm-response-cost"
|
|
226
|
+
)
|
|
227
|
+
if cost is not None:
|
|
228
|
+
return float(cost)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
logger.debug(f"Failed to get cost from LiteLLM headers: {e}")
|
|
231
|
+
|
|
232
|
+
# move on to litellm cost calculator
|
|
233
|
+
# Handle model name properly - if it doesn't contain "/", use as-is
|
|
234
|
+
model_parts = self.model_name.split("/")
|
|
235
|
+
if len(model_parts) > 1:
|
|
236
|
+
extra_kwargs["model"] = "/".join(model_parts[1:])
|
|
237
|
+
else:
|
|
238
|
+
extra_kwargs["model"] = self.model_name
|
|
239
|
+
try:
|
|
240
|
+
return float(
|
|
241
|
+
litellm_completion_cost(completion_response=resp, **extra_kwargs)
|
|
242
|
+
)
|
|
243
|
+
except Exception as e:
|
|
244
|
+
warnings.warn(f"Cost calculation failed: {e}")
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
def log_llm_call(
|
|
248
|
+
self,
|
|
249
|
+
resp: ModelResponse | ResponsesAPIResponse,
|
|
250
|
+
cost: float | None,
|
|
251
|
+
raw_resp: ModelResponse | ResponsesAPIResponse | None = None,
|
|
252
|
+
) -> None:
|
|
253
|
+
# Skip if neither file logging nor callback is configured
|
|
254
|
+
if not self.log_dir and not self._log_completions_callback:
|
|
255
|
+
return
|
|
256
|
+
try:
|
|
257
|
+
# Prepare filename and log data
|
|
258
|
+
filename = (
|
|
259
|
+
f"{self.model_name.replace('/', '__')}-"
|
|
260
|
+
f"{time.time():.3f}-"
|
|
261
|
+
f"{uuid.uuid4().hex[:4]}.json"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
data = self._req_ctx.copy()
|
|
265
|
+
data["response"] = (
|
|
266
|
+
resp # ModelResponse | ResponsesAPIResponse;
|
|
267
|
+
# serialized via _safe_json
|
|
268
|
+
)
|
|
269
|
+
data["cost"] = float(cost or 0.0)
|
|
270
|
+
data["timestamp"] = time.time()
|
|
271
|
+
data["latency_sec"] = self._last_latency
|
|
272
|
+
|
|
273
|
+
# Usage summary (prompt, completion, reasoning tokens) for quick inspection
|
|
274
|
+
try:
|
|
275
|
+
usage = getattr(resp, "usage", None)
|
|
276
|
+
if usage:
|
|
277
|
+
prompt_tokens = int(
|
|
278
|
+
getattr(usage, "prompt_tokens", None)
|
|
279
|
+
or getattr(usage, "input_tokens", 0)
|
|
280
|
+
or 0
|
|
281
|
+
)
|
|
282
|
+
completion_tokens = int(
|
|
283
|
+
getattr(usage, "completion_tokens", None)
|
|
284
|
+
or getattr(usage, "output_tokens", 0)
|
|
285
|
+
or 0
|
|
286
|
+
)
|
|
287
|
+
details = getattr(
|
|
288
|
+
usage, "completion_tokens_details", None
|
|
289
|
+
) or getattr(usage, "output_tokens_details", None)
|
|
290
|
+
reasoning_tokens = (
|
|
291
|
+
int(getattr(details, "reasoning_tokens", 0) or 0)
|
|
292
|
+
if details
|
|
293
|
+
else 0
|
|
294
|
+
)
|
|
295
|
+
p_details = getattr(
|
|
296
|
+
usage, "prompt_tokens_details", None
|
|
297
|
+
) or getattr(usage, "input_tokens_details", None)
|
|
298
|
+
cache_read_tokens = (
|
|
299
|
+
int(getattr(p_details, "cached_tokens", 0) or 0)
|
|
300
|
+
if p_details
|
|
301
|
+
else 0
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
data["usage_summary"] = {
|
|
305
|
+
"prompt_tokens": prompt_tokens,
|
|
306
|
+
"completion_tokens": completion_tokens,
|
|
307
|
+
"reasoning_tokens": reasoning_tokens,
|
|
308
|
+
"cache_read_tokens": cache_read_tokens,
|
|
309
|
+
}
|
|
310
|
+
except Exception:
|
|
311
|
+
# Best-effort only; don't fail logging
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
# Raw response *before* nonfncall -> call conversion
|
|
315
|
+
if raw_resp:
|
|
316
|
+
data["raw_response"] = (
|
|
317
|
+
raw_resp # ModelResponse | ResponsesAPIResponse;
|
|
318
|
+
# serialized via _safe_json
|
|
319
|
+
)
|
|
320
|
+
# Pop duplicated tools to avoid logging twice
|
|
321
|
+
if (
|
|
322
|
+
"tools" in data
|
|
323
|
+
and isinstance(data.get("kwargs"), dict)
|
|
324
|
+
and "tools" in data["kwargs"]
|
|
325
|
+
):
|
|
326
|
+
data["kwargs"].pop("tools")
|
|
327
|
+
|
|
328
|
+
log_data = json.dumps(data, default=_safe_json, ensure_ascii=False)
|
|
329
|
+
|
|
330
|
+
# Use callback if set (for remote execution), otherwise write to file
|
|
331
|
+
if self._log_completions_callback:
|
|
332
|
+
self._log_completions_callback(filename, log_data)
|
|
333
|
+
elif self.log_dir:
|
|
334
|
+
# Create log directory if it doesn't exist
|
|
335
|
+
os.makedirs(self.log_dir, exist_ok=True)
|
|
336
|
+
if not os.access(self.log_dir, os.W_OK):
|
|
337
|
+
raise PermissionError(f"log_dir is not writable: {self.log_dir}")
|
|
338
|
+
|
|
339
|
+
fname = os.path.join(self.log_dir, filename)
|
|
340
|
+
with open(fname, "w", encoding="utf-8") as f:
|
|
341
|
+
f.write(log_data)
|
|
342
|
+
except Exception as e:
|
|
343
|
+
warnings.warn(f"Telemetry logging failed: {e}")
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _safe_json(obj: Any) -> Any:
|
|
347
|
+
# Centralized serializer for telemetry logs.
|
|
348
|
+
# Prefer robust serialization for Pydantic models first to avoid cycles.
|
|
349
|
+
# Typed LiteLLM responses
|
|
350
|
+
if isinstance(obj, ModelResponse) or isinstance(obj, ResponsesAPIResponse):
|
|
351
|
+
return obj.model_dump(mode="json", exclude_none=True)
|
|
352
|
+
|
|
353
|
+
# Any Pydantic BaseModel (e.g., ToolDefinition, ChatCompletionToolParam, etc.)
|
|
354
|
+
if isinstance(obj, BaseModel):
|
|
355
|
+
# Use Pydantic's serializer which respects field exclusions (e.g., executors)
|
|
356
|
+
return obj.model_dump(mode="json", exclude_none=True)
|
|
357
|
+
|
|
358
|
+
# Fallbacks for other non-serializable objects used elsewhere in the log payload
|
|
359
|
+
try:
|
|
360
|
+
return obj.__dict__
|
|
361
|
+
except Exception:
|
|
362
|
+
return str(obj)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
import litellm
|
|
4
|
+
from pydantic import SecretStr
|
|
5
|
+
|
|
6
|
+
from openhands.sdk.llm.utils.verified_models import VERIFIED_MODELS
|
|
7
|
+
from openhands.sdk.logger import get_logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_boto3():
|
|
11
|
+
"""Get boto3 module if available, otherwise return None."""
|
|
12
|
+
try:
|
|
13
|
+
return importlib.import_module("boto3")
|
|
14
|
+
except ModuleNotFoundError:
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _list_bedrock_foundation_models(
|
|
22
|
+
aws_region_name: str, aws_access_key_id: str, aws_secret_access_key: str
|
|
23
|
+
) -> list[str]:
|
|
24
|
+
boto3 = _get_boto3()
|
|
25
|
+
if boto3 is None:
|
|
26
|
+
logger.warning(
|
|
27
|
+
"boto3 is not installed. To use Bedrock models,"
|
|
28
|
+
"install with: openhands-sdk[boto3]"
|
|
29
|
+
)
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# The AWS bedrock model id is not queried, if no AWS parameters are configured.
|
|
34
|
+
client = boto3.client(
|
|
35
|
+
service_name="bedrock",
|
|
36
|
+
region_name=aws_region_name,
|
|
37
|
+
aws_access_key_id=aws_access_key_id,
|
|
38
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
39
|
+
)
|
|
40
|
+
foundation_models_list = client.list_foundation_models(
|
|
41
|
+
byOutputModality="TEXT", byInferenceType="ON_DEMAND"
|
|
42
|
+
)
|
|
43
|
+
model_summaries = foundation_models_list["modelSummaries"]
|
|
44
|
+
return ["bedrock/" + model["modelId"] for model in model_summaries]
|
|
45
|
+
except Exception as err:
|
|
46
|
+
logger.warning(
|
|
47
|
+
"%s. Please config AWS_REGION_NAME AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY"
|
|
48
|
+
" if you want use bedrock model.",
|
|
49
|
+
err,
|
|
50
|
+
)
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_supported_llm_models(
|
|
55
|
+
aws_region_name: str | None = None,
|
|
56
|
+
aws_access_key_id: SecretStr | None = None,
|
|
57
|
+
aws_secret_access_key: SecretStr | None = None,
|
|
58
|
+
) -> list[str]:
|
|
59
|
+
"""Get all models supported by LiteLLM.
|
|
60
|
+
|
|
61
|
+
This function combines models from litellm and Bedrock, removing any
|
|
62
|
+
error-prone Bedrock models.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
list[str]: A sorted list of unique model names.
|
|
66
|
+
"""
|
|
67
|
+
litellm_model_list = litellm.model_list + list(litellm.model_cost.keys())
|
|
68
|
+
litellm_model_list_without_bedrock = list(
|
|
69
|
+
filter(lambda m: not m.startswith("bedrock"), litellm_model_list)
|
|
70
|
+
)
|
|
71
|
+
bedrock_model_list = []
|
|
72
|
+
if aws_region_name and aws_access_key_id and aws_secret_access_key:
|
|
73
|
+
bedrock_model_list = _list_bedrock_foundation_models(
|
|
74
|
+
aws_region_name,
|
|
75
|
+
aws_access_key_id.get_secret_value(),
|
|
76
|
+
aws_secret_access_key.get_secret_value(),
|
|
77
|
+
)
|
|
78
|
+
model_list = litellm_model_list_without_bedrock + bedrock_model_list
|
|
79
|
+
return model_list
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _split_is_actually_version(split: list[str]) -> bool:
|
|
83
|
+
return (
|
|
84
|
+
len(split) > 1
|
|
85
|
+
and bool(split[1])
|
|
86
|
+
and bool(split[1][0])
|
|
87
|
+
and split[1][0].isdigit()
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _extract_model_and_provider(model: str) -> tuple[str, str, str]:
|
|
92
|
+
"""
|
|
93
|
+
Extract provider and model information from a model identifier.
|
|
94
|
+
"""
|
|
95
|
+
separator = "/"
|
|
96
|
+
split = model.split(separator)
|
|
97
|
+
|
|
98
|
+
if len(split) == 1:
|
|
99
|
+
# no "/" separator found, try with "."
|
|
100
|
+
separator = "."
|
|
101
|
+
split = model.split(separator)
|
|
102
|
+
if _split_is_actually_version(split):
|
|
103
|
+
split = [separator.join(split)] # undo the split
|
|
104
|
+
|
|
105
|
+
if len(split) == 1:
|
|
106
|
+
matched_provider = ""
|
|
107
|
+
for provider, models in VERIFIED_MODELS.items():
|
|
108
|
+
if split[0] in models:
|
|
109
|
+
matched_provider = provider
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
if matched_provider:
|
|
113
|
+
return matched_provider, split[0], "/"
|
|
114
|
+
|
|
115
|
+
return matched_provider, model, ""
|
|
116
|
+
|
|
117
|
+
provider = split[0]
|
|
118
|
+
model_id = separator.join(split[1:])
|
|
119
|
+
return provider, model_id, separator
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_unverified_models(
|
|
123
|
+
aws_region_name: str | None = None,
|
|
124
|
+
aws_access_key_id: SecretStr | None = None,
|
|
125
|
+
aws_secret_access_key: SecretStr | None = None,
|
|
126
|
+
) -> dict[str, list[str]]:
|
|
127
|
+
"""
|
|
128
|
+
Organize a mapping of unverified model identifiers by provider.
|
|
129
|
+
"""
|
|
130
|
+
result_dict: dict[str, list[str]] = {}
|
|
131
|
+
|
|
132
|
+
models = get_supported_llm_models(
|
|
133
|
+
aws_region_name, aws_access_key_id, aws_secret_access_key
|
|
134
|
+
)
|
|
135
|
+
for model in models:
|
|
136
|
+
provider, model_id, separator = _extract_model_and_provider(model)
|
|
137
|
+
|
|
138
|
+
# Ignore "anthropic" providers with a separator of "."
|
|
139
|
+
# These are outdated and incompatible providers.
|
|
140
|
+
if provider == "anthropic" and separator == ".":
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Dedup verified models
|
|
144
|
+
if provider in VERIFIED_MODELS and model_id in VERIFIED_MODELS[provider]:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
key = provider or "other"
|
|
148
|
+
if key not in result_dict:
|
|
149
|
+
result_dict[key] = []
|
|
150
|
+
|
|
151
|
+
result_dict[key].append(model_id)
|
|
152
|
+
|
|
153
|
+
return result_dict
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
UNVERIFIED_MODELS_EXCLUDING_BEDROCK = get_unverified_models()
|