openhands-sdk 1.5.0__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/__init__.py +9 -1
- openhands/sdk/agent/agent.py +35 -12
- openhands/sdk/agent/base.py +53 -7
- openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
- openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
- openhands/sdk/agent/prompts/system_prompt.j2 +29 -1
- openhands/sdk/agent/utils.py +18 -4
- openhands/sdk/context/__init__.py +2 -0
- openhands/sdk/context/agent_context.py +42 -10
- openhands/sdk/context/condenser/base.py +11 -6
- openhands/sdk/context/condenser/llm_summarizing_condenser.py +169 -20
- openhands/sdk/context/condenser/no_op_condenser.py +2 -1
- openhands/sdk/context/condenser/pipeline_condenser.py +10 -9
- openhands/sdk/context/condenser/utils.py +149 -0
- openhands/sdk/context/prompts/prompt.py +40 -2
- openhands/sdk/context/prompts/templates/system_message_suffix.j2 +3 -3
- openhands/sdk/context/skills/__init__.py +2 -0
- openhands/sdk/context/skills/skill.py +152 -1
- openhands/sdk/context/view.py +287 -27
- openhands/sdk/conversation/base.py +17 -0
- openhands/sdk/conversation/conversation.py +19 -0
- openhands/sdk/conversation/exceptions.py +29 -4
- openhands/sdk/conversation/impl/local_conversation.py +126 -9
- openhands/sdk/conversation/impl/remote_conversation.py +152 -3
- openhands/sdk/conversation/state.py +42 -1
- openhands/sdk/conversation/stuck_detector.py +81 -45
- openhands/sdk/conversation/types.py +30 -0
- openhands/sdk/event/llm_convertible/system.py +16 -20
- openhands/sdk/hooks/__init__.py +30 -0
- openhands/sdk/hooks/config.py +180 -0
- openhands/sdk/hooks/conversation_hooks.py +227 -0
- openhands/sdk/hooks/executor.py +155 -0
- openhands/sdk/hooks/manager.py +170 -0
- openhands/sdk/hooks/types.py +40 -0
- openhands/sdk/io/cache.py +85 -0
- openhands/sdk/io/local.py +39 -2
- openhands/sdk/llm/llm.py +3 -2
- openhands/sdk/llm/message.py +4 -3
- openhands/sdk/llm/mixins/fn_call_converter.py +61 -16
- openhands/sdk/llm/mixins/non_native_fc.py +5 -1
- openhands/sdk/llm/utils/model_features.py +64 -24
- openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
- openhands/sdk/llm/utils/verified_models.py +6 -4
- openhands/sdk/logger/logger.py +1 -1
- openhands/sdk/tool/schema.py +10 -0
- openhands/sdk/tool/tool.py +2 -2
- openhands/sdk/utils/async_executor.py +76 -67
- openhands/sdk/utils/models.py +1 -1
- openhands/sdk/utils/paging.py +63 -0
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/METADATA +3 -3
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/RECORD +56 -41
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/WHEEL +0 -0
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,44 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from enum import Enum
|
|
2
4
|
|
|
3
5
|
from pydantic import Field, model_validator
|
|
4
6
|
|
|
5
7
|
from openhands.sdk.context.condenser.base import RollingCondenser
|
|
8
|
+
from openhands.sdk.context.condenser.utils import (
|
|
9
|
+
get_suffix_length_for_token_reduction,
|
|
10
|
+
get_total_token_count,
|
|
11
|
+
)
|
|
6
12
|
from openhands.sdk.context.prompts import render_template
|
|
7
13
|
from openhands.sdk.context.view import View
|
|
14
|
+
from openhands.sdk.event.base import LLMConvertibleEvent
|
|
8
15
|
from openhands.sdk.event.condenser import Condensation
|
|
9
16
|
from openhands.sdk.event.llm_convertible import MessageEvent
|
|
10
17
|
from openhands.sdk.llm import LLM, Message, TextContent
|
|
11
18
|
from openhands.sdk.observability.laminar import observe
|
|
12
19
|
|
|
13
20
|
|
|
21
|
+
class Reason(Enum):
|
|
22
|
+
"""Reasons for condensation."""
|
|
23
|
+
|
|
24
|
+
REQUEST = "request"
|
|
25
|
+
TOKENS = "tokens"
|
|
26
|
+
EVENTS = "events"
|
|
27
|
+
|
|
28
|
+
|
|
14
29
|
class LLMSummarizingCondenser(RollingCondenser):
|
|
30
|
+
"""LLM-based condenser that summarizes forgotten events.
|
|
31
|
+
|
|
32
|
+
Uses an independent LLM (stored in the `llm` attribute) for generating summaries
|
|
33
|
+
of forgotten events. The optional `agent_llm` parameter passed to condense() is
|
|
34
|
+
the LLM used by the agent for token counting purposes, and you should not assume
|
|
35
|
+
it is the same as the one defined in this condenser.
|
|
36
|
+
"""
|
|
37
|
+
|
|
15
38
|
llm: LLM
|
|
16
|
-
max_size: int = Field(default=
|
|
17
|
-
|
|
39
|
+
max_size: int = Field(default=240, gt=0)
|
|
40
|
+
max_tokens: int | None = None
|
|
41
|
+
keep_first: int = Field(default=2, ge=0)
|
|
18
42
|
|
|
19
43
|
@model_validator(mode="after")
|
|
20
44
|
def validate_keep_first_vs_max_size(self):
|
|
@@ -29,23 +53,47 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
29
53
|
def handles_condensation_requests(self) -> bool:
|
|
30
54
|
return True
|
|
31
55
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
56
|
+
def get_condensation_reasons(
|
|
57
|
+
self, view: View, agent_llm: LLM | None = None
|
|
58
|
+
) -> set[Reason]:
|
|
59
|
+
"""Determine the reasons why the view should be condensed.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
view: The current view to evaluate.
|
|
63
|
+
agent_llm: The LLM used by the agent. Required if token counting is needed.
|
|
36
64
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
65
|
+
Returns:
|
|
66
|
+
A set of Reason enums indicating why condensation is needed.
|
|
67
|
+
"""
|
|
68
|
+
reasons = set()
|
|
69
|
+
|
|
70
|
+
# Reason 1: Unhandled condensation request. The view handles the detection of
|
|
71
|
+
# these requests while processing the event stream.
|
|
41
72
|
if view.unhandled_condensation_request:
|
|
42
|
-
|
|
43
|
-
# should be calculated based on the view size.
|
|
44
|
-
target_size = len(view) // 2
|
|
45
|
-
# Number of events to keep from the tail -- target size, minus however many
|
|
46
|
-
# prefix events from the head, minus one for the summarization event
|
|
47
|
-
events_from_tail = target_size - len(head) - 1
|
|
73
|
+
reasons.add(Reason.REQUEST)
|
|
48
74
|
|
|
75
|
+
# Reason 2: Token limit is provided and exceeded.
|
|
76
|
+
if self.max_tokens and agent_llm:
|
|
77
|
+
total_tokens = get_total_token_count(view.events, agent_llm)
|
|
78
|
+
if total_tokens > self.max_tokens:
|
|
79
|
+
reasons.add(Reason.TOKENS)
|
|
80
|
+
|
|
81
|
+
# Reason 3: View exceeds maximum size in number of events.
|
|
82
|
+
if len(view) > self.max_size:
|
|
83
|
+
reasons.add(Reason.EVENTS)
|
|
84
|
+
|
|
85
|
+
return reasons
|
|
86
|
+
|
|
87
|
+
def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
|
|
88
|
+
reasons = self.get_condensation_reasons(view, agent_llm)
|
|
89
|
+
return reasons != set()
|
|
90
|
+
|
|
91
|
+
def _get_summary_event_content(self, view: View) -> str:
|
|
92
|
+
"""Extract the text content from the summary event in the view, if any.
|
|
93
|
+
|
|
94
|
+
If there is no summary event or it does not contain text content, returns an
|
|
95
|
+
empty string.
|
|
96
|
+
"""
|
|
49
97
|
summary_event_content: str = ""
|
|
50
98
|
|
|
51
99
|
summary_event = view.summary_event
|
|
@@ -54,9 +102,25 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
54
102
|
if isinstance(message_content, TextContent):
|
|
55
103
|
summary_event_content = message_content.text
|
|
56
104
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
105
|
+
return summary_event_content
|
|
106
|
+
|
|
107
|
+
def _generate_condensation(
|
|
108
|
+
self,
|
|
109
|
+
summary_event_content: str,
|
|
110
|
+
forgotten_events: Sequence[LLMConvertibleEvent],
|
|
111
|
+
summary_offset: int,
|
|
112
|
+
) -> Condensation:
|
|
113
|
+
"""Generate a condensation by using the condenser's LLM to summarize forgotten
|
|
114
|
+
events.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
summary_event_content: The content of the previous summary event.
|
|
118
|
+
forgotten_events: The list of events to be summarized.
|
|
119
|
+
summary_offset: The index where the summary event should be inserted.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Condensation: The generated condensation object.
|
|
123
|
+
"""
|
|
60
124
|
# Convert events to strings for the template
|
|
61
125
|
event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
|
|
62
126
|
|
|
@@ -84,6 +148,91 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
84
148
|
return Condensation(
|
|
85
149
|
forgotten_event_ids=[event.id for event in forgotten_events],
|
|
86
150
|
summary=summary,
|
|
87
|
-
summary_offset=
|
|
151
|
+
summary_offset=summary_offset,
|
|
88
152
|
llm_response_id=llm_response.id,
|
|
89
153
|
)
|
|
154
|
+
|
|
155
|
+
def _get_forgotten_events(
|
|
156
|
+
self, view: View, agent_llm: LLM | None = None
|
|
157
|
+
) -> tuple[Sequence[LLMConvertibleEvent], int]:
|
|
158
|
+
"""Identify events to be forgotten and the summary offset.
|
|
159
|
+
|
|
160
|
+
Relies on the condensation reasons to determine how many events we need to drop
|
|
161
|
+
in order to maintain our resource constraints. Uses manipulation indices to
|
|
162
|
+
ensure forgetting ranges respect atomic unit boundaries.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
view: The current view from which to identify forgotten events.
|
|
166
|
+
agent_llm: The LLM used by the agent, required for token-based calculations.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A tuple of (events to forget, summary_offset).
|
|
170
|
+
"""
|
|
171
|
+
reasons = self.get_condensation_reasons(view, agent_llm=agent_llm)
|
|
172
|
+
assert reasons != set(), "No condensation reasons found."
|
|
173
|
+
|
|
174
|
+
suffix_events_to_keep: set[int] = set()
|
|
175
|
+
|
|
176
|
+
if Reason.REQUEST in reasons:
|
|
177
|
+
target_size = len(view) // 2
|
|
178
|
+
suffix_events_to_keep.add(target_size - self.keep_first - 1)
|
|
179
|
+
|
|
180
|
+
if Reason.EVENTS in reasons:
|
|
181
|
+
target_size = self.max_size // 2
|
|
182
|
+
suffix_events_to_keep.add(target_size - self.keep_first - 1)
|
|
183
|
+
|
|
184
|
+
if Reason.TOKENS in reasons:
|
|
185
|
+
# Compute the number of tokens we need to eliminate to be under half the
|
|
186
|
+
# max_tokens value. We know max_tokens and the agent LLM are not None here
|
|
187
|
+
# because we can't have Reason.TOKENS without them.
|
|
188
|
+
assert self.max_tokens is not None
|
|
189
|
+
assert agent_llm is not None
|
|
190
|
+
|
|
191
|
+
total_tokens = get_total_token_count(view.events, agent_llm)
|
|
192
|
+
tokens_to_reduce = total_tokens - (self.max_tokens // 2)
|
|
193
|
+
|
|
194
|
+
suffix_events_to_keep.add(
|
|
195
|
+
get_suffix_length_for_token_reduction(
|
|
196
|
+
events=view.events[self.keep_first :],
|
|
197
|
+
llm=agent_llm,
|
|
198
|
+
token_reduction=tokens_to_reduce,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# We might have multiple reasons to condense, so pick the strictest condensation
|
|
203
|
+
# to ensure all resource constraints are met.
|
|
204
|
+
events_from_tail = min(suffix_events_to_keep)
|
|
205
|
+
|
|
206
|
+
# Calculate naive forgetting end (without considering atomic boundaries)
|
|
207
|
+
naive_end = len(view) - events_from_tail
|
|
208
|
+
|
|
209
|
+
# Find actual forgetting_start: smallest manipulation index > keep_first
|
|
210
|
+
forgetting_start = view.find_next_manipulation_index(
|
|
211
|
+
self.keep_first, strict=True
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Find actual forgetting_end: smallest manipulation index >= naive_end
|
|
215
|
+
forgetting_end = view.find_next_manipulation_index(naive_end, strict=False)
|
|
216
|
+
|
|
217
|
+
# Extract events to forget using boundary-aware indices
|
|
218
|
+
forgotten_events = view[forgetting_start:forgetting_end]
|
|
219
|
+
|
|
220
|
+
# Summary offset is the same as forgetting_start
|
|
221
|
+
return forgotten_events, forgetting_start
|
|
222
|
+
|
|
223
|
+
@observe(ignore_inputs=["view", "agent_llm"])
|
|
224
|
+
def get_condensation(
|
|
225
|
+
self, view: View, agent_llm: LLM | None = None
|
|
226
|
+
) -> Condensation:
|
|
227
|
+
# The condensation is dependent on the events we want to drop and the previous
|
|
228
|
+
# summary.
|
|
229
|
+
summary_event_content = self._get_summary_event_content(view)
|
|
230
|
+
forgotten_events, summary_offset = self._get_forgotten_events(
|
|
231
|
+
view, agent_llm=agent_llm
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
return self._generate_condensation(
|
|
235
|
+
summary_event_content=summary_event_content,
|
|
236
|
+
forgotten_events=forgotten_events,
|
|
237
|
+
summary_offset=summary_offset,
|
|
238
|
+
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from openhands.sdk.context.condenser.base import CondenserBase
|
|
2
2
|
from openhands.sdk.context.view import View
|
|
3
3
|
from openhands.sdk.event.condenser import Condensation
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class NoOpCondenser(CondenserBase):
|
|
@@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
|
|
|
9
10
|
Primarily intended for testing purposes.
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
|
-
def condense(self, view: View) -> View | Condensation:
|
|
13
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation: # noqa: ARG002
|
|
13
14
|
return view
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
from openhands.sdk.context.condenser.base import CondenserBase
|
|
2
2
|
from openhands.sdk.context.view import View
|
|
3
3
|
from openhands.sdk.event.condenser import Condensation
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class PipelineCondenser(CondenserBase):
|
|
7
8
|
"""A condenser that applies a sequence of condensers in order.
|
|
8
9
|
|
|
9
10
|
All condensers are defined primarily by their `condense` method, which takes a
|
|
10
|
-
`View` and
|
|
11
|
-
|
|
12
|
-
condenser returns a `Condensation`.
|
|
11
|
+
`View` and an optional `agent_llm` parameter, returning either a new `View` or a
|
|
12
|
+
`Condensation` event. That means we can chain multiple condensers together by
|
|
13
|
+
passing `View`s along and exiting early if any condenser returns a `Condensation`.
|
|
13
14
|
|
|
14
15
|
For example:
|
|
15
16
|
|
|
@@ -20,20 +21,20 @@ class PipelineCondenser(CondenserBase):
|
|
|
20
21
|
CondenserC(...),
|
|
21
22
|
])
|
|
22
23
|
|
|
23
|
-
result = condenser.condense(view)
|
|
24
|
+
result = condenser.condense(view, agent_llm=agent_llm)
|
|
24
25
|
|
|
25
26
|
# Doing the same thing without the pipeline condenser requires more boilerplate
|
|
26
27
|
# for the monadic chaining
|
|
27
28
|
other_result = view
|
|
28
29
|
|
|
29
30
|
if isinstance(other_result, View):
|
|
30
|
-
other_result = CondenserA(...).condense(other_result)
|
|
31
|
+
other_result = CondenserA(...).condense(other_result, agent_llm=agent_llm)
|
|
31
32
|
|
|
32
33
|
if isinstance(other_result, View):
|
|
33
|
-
other_result = CondenserB(...).condense(other_result)
|
|
34
|
+
other_result = CondenserB(...).condense(other_result, agent_llm=agent_llm)
|
|
34
35
|
|
|
35
36
|
if isinstance(other_result, View):
|
|
36
|
-
other_result = CondenserC(...).condense(other_result)
|
|
37
|
+
other_result = CondenserC(...).condense(other_result, agent_llm=agent_llm)
|
|
37
38
|
|
|
38
39
|
assert result == other_result
|
|
39
40
|
"""
|
|
@@ -41,12 +42,12 @@ class PipelineCondenser(CondenserBase):
|
|
|
41
42
|
condensers: list[CondenserBase]
|
|
42
43
|
"""The list of condensers to apply in order."""
|
|
43
44
|
|
|
44
|
-
def condense(self, view: View) -> View | Condensation:
|
|
45
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
|
|
45
46
|
result: View | Condensation = view
|
|
46
47
|
for condenser in self.condensers:
|
|
47
48
|
if isinstance(result, Condensation):
|
|
48
49
|
break
|
|
49
|
-
result = condenser.condense(result)
|
|
50
|
+
result = condenser.condense(result, agent_llm=agent_llm)
|
|
50
51
|
return result
|
|
51
52
|
|
|
52
53
|
def handles_condensation_requests(self) -> bool:
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
from openhands.sdk.event.base import LLMConvertibleEvent
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_total_token_count(
|
|
8
|
+
events: Sequence[LLMConvertibleEvent],
|
|
9
|
+
llm: LLM,
|
|
10
|
+
) -> int:
|
|
11
|
+
"""Calculate the total token count for a list of LLM convertible events.
|
|
12
|
+
|
|
13
|
+
This function converts the events to LLM messages and uses the provided LLM
|
|
14
|
+
to count the total number of tokens. This is useful for understanding how many
|
|
15
|
+
tokens a sequence of events will consume in the context window.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
events: List of LLM convertible events to count tokens for
|
|
19
|
+
llm: The LLM instance to use for token counting (uses the litellm's token
|
|
20
|
+
counting utilities)
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Total token count for all events converted to messages
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> from openhands.sdk.llm import LLM
|
|
27
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
28
|
+
>>>
|
|
29
|
+
>>> llm = LLM(model="gpt-4")
|
|
30
|
+
>>> events = [
|
|
31
|
+
... MessageEvent.from_text("Hello, how are you?", source="user"),
|
|
32
|
+
... MessageEvent.from_text("I'm doing great!", source="agent"),
|
|
33
|
+
... ]
|
|
34
|
+
>>> token_count = get_total_token_count(events, llm)
|
|
35
|
+
>>> print(f"Total tokens: {token_count}")
|
|
36
|
+
"""
|
|
37
|
+
messages = LLMConvertibleEvent.events_to_messages(list(events))
|
|
38
|
+
return llm.get_token_count(messages)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_shortest_prefix_above_token_count(
|
|
42
|
+
events: Sequence[LLMConvertibleEvent],
|
|
43
|
+
llm: LLM,
|
|
44
|
+
token_count: int,
|
|
45
|
+
) -> int:
|
|
46
|
+
"""Find the length of the shortest prefix whose token count exceeds the target.
|
|
47
|
+
|
|
48
|
+
This function performs a binary search to efficiently find the shortest prefix
|
|
49
|
+
of events that, when converted to messages, has a total token count greater than
|
|
50
|
+
the specified target token count.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
events: List of LLM convertible events to search through
|
|
54
|
+
llm: The LLM instance to use for token counting (uses the model's tokenizer)
|
|
55
|
+
token_count: The target token count threshold
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The length of the shortest prefix that exceeds the token count.
|
|
59
|
+
Returns 0 if no events are provided.
|
|
60
|
+
Returns len(events) if all events combined don't exceed the token count.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> from openhands.sdk.llm import LLM
|
|
64
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
65
|
+
>>>
|
|
66
|
+
>>> llm = LLM(model="gpt-4")
|
|
67
|
+
>>> events = [
|
|
68
|
+
... MessageEvent.from_text("Hi", source="user"),
|
|
69
|
+
... MessageEvent.from_text("Hello", source="agent"),
|
|
70
|
+
... MessageEvent.from_text("How are you?", source="user"),
|
|
71
|
+
... MessageEvent.from_text("Great!", source="agent"),
|
|
72
|
+
... ]
|
|
73
|
+
>>> prefix_len = get_shortest_prefix_above_token_count(events, llm, 20)
|
|
74
|
+
>>> # prefix_len might be 2 if first 2 events exceed 20 tokens
|
|
75
|
+
"""
|
|
76
|
+
if not events:
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
# Check if all events combined don't exceed the token count
|
|
80
|
+
total_tokens = get_total_token_count(events, llm)
|
|
81
|
+
if total_tokens <= token_count:
|
|
82
|
+
return len(events)
|
|
83
|
+
|
|
84
|
+
# Binary search for the shortest prefix
|
|
85
|
+
left, right = 1, len(events)
|
|
86
|
+
|
|
87
|
+
while left < right:
|
|
88
|
+
mid = (left + right) // 2
|
|
89
|
+
prefix_tokens = get_total_token_count(events[:mid], llm)
|
|
90
|
+
|
|
91
|
+
if prefix_tokens > token_count:
|
|
92
|
+
# This prefix exceeds the count, try to find a shorter one
|
|
93
|
+
right = mid
|
|
94
|
+
else:
|
|
95
|
+
# This prefix doesn't exceed, we need a longer one
|
|
96
|
+
left = mid + 1
|
|
97
|
+
|
|
98
|
+
return left
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_suffix_length_for_token_reduction(
|
|
102
|
+
events: Sequence[LLMConvertibleEvent],
|
|
103
|
+
llm: LLM,
|
|
104
|
+
token_reduction: int,
|
|
105
|
+
) -> int:
|
|
106
|
+
"""Find how many suffix events can be kept while reducing tokens by target amount.
|
|
107
|
+
|
|
108
|
+
This function determines the maximum number of events from the end of the list
|
|
109
|
+
that can be retained while ensuring the total token count is reduced by at least
|
|
110
|
+
the specified amount. It uses the get_shortest_prefix_above_token_count function
|
|
111
|
+
to find the prefix that must be removed.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
events: List of LLM convertible events
|
|
115
|
+
llm: The LLM instance to use for token counting (uses the model's tokenizer)
|
|
116
|
+
token_reduction: The minimum number of tokens to reduce by
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The number of events from the end that can be kept (suffix length).
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
>>> from openhands.sdk.llm import LLM
|
|
123
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
124
|
+
>>>
|
|
125
|
+
>>> llm = LLM(model="gpt-4")
|
|
126
|
+
>>> events = [
|
|
127
|
+
... MessageEvent.from_text("Event 1", source="user"),
|
|
128
|
+
... MessageEvent.from_text("Event 2", source="agent"),
|
|
129
|
+
... MessageEvent.from_text("Event 3", source="user"),
|
|
130
|
+
... MessageEvent.from_text("Event 4", source="agent"),
|
|
131
|
+
... ]
|
|
132
|
+
>>> # Suppose total is 100 tokens, and we want to reduce by 40 tokens
|
|
133
|
+
>>> suffix_len = get_suffix_length_for_token_reduction(events, llm, 40)
|
|
134
|
+
>>> # suffix_len tells us how many events from the end we can keep
|
|
135
|
+
>>> # If first 2 events = 45 tokens, suffix_len = 2 (keep last 2 events)
|
|
136
|
+
"""
|
|
137
|
+
if not events:
|
|
138
|
+
return 0
|
|
139
|
+
|
|
140
|
+
if token_reduction <= 0:
|
|
141
|
+
return len(events)
|
|
142
|
+
|
|
143
|
+
# Find the shortest prefix that exceeds the token reduction target
|
|
144
|
+
prefix_length = get_shortest_prefix_above_token_count(events, llm, token_reduction)
|
|
145
|
+
|
|
146
|
+
# The suffix length is what remains after removing the prefix
|
|
147
|
+
suffix_length = len(events) - prefix_length
|
|
148
|
+
|
|
149
|
+
return suffix_length
|
|
@@ -4,7 +4,45 @@ import re
|
|
|
4
4
|
import sys
|
|
5
5
|
from functools import lru_cache
|
|
6
6
|
|
|
7
|
-
from jinja2 import
|
|
7
|
+
from jinja2 import (
|
|
8
|
+
BaseLoader,
|
|
9
|
+
Environment,
|
|
10
|
+
FileSystemBytecodeCache,
|
|
11
|
+
Template,
|
|
12
|
+
TemplateNotFound,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FlexibleFileSystemLoader(BaseLoader):
|
|
17
|
+
"""A Jinja2 loader that supports both relative paths (within a base directory)
|
|
18
|
+
and absolute paths anywhere on the filesystem.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, searchpath: str):
|
|
22
|
+
self.searchpath = os.path.abspath(searchpath)
|
|
23
|
+
|
|
24
|
+
def get_source(self, environment, template): # noqa: ARG002
|
|
25
|
+
# If template is an absolute path, use it directly
|
|
26
|
+
if os.path.isabs(template):
|
|
27
|
+
path = template
|
|
28
|
+
else:
|
|
29
|
+
# Otherwise, look for it in the searchpath
|
|
30
|
+
path = os.path.join(self.searchpath, template)
|
|
31
|
+
|
|
32
|
+
if not os.path.exists(path):
|
|
33
|
+
raise TemplateNotFound(template)
|
|
34
|
+
|
|
35
|
+
mtime = os.path.getmtime(path)
|
|
36
|
+
with open(path, encoding="utf-8") as f:
|
|
37
|
+
source = f.read()
|
|
38
|
+
|
|
39
|
+
def uptodate():
|
|
40
|
+
try:
|
|
41
|
+
return os.path.getmtime(path) == mtime
|
|
42
|
+
except OSError:
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
return source, path, uptodate
|
|
8
46
|
|
|
9
47
|
|
|
10
48
|
def refine(text: str) -> str:
|
|
@@ -27,7 +65,7 @@ def _get_env(prompt_dir: str) -> Environment:
|
|
|
27
65
|
os.makedirs(cache_folder, exist_ok=True)
|
|
28
66
|
bcc = FileSystemBytecodeCache(directory=cache_folder)
|
|
29
67
|
env = Environment(
|
|
30
|
-
loader=
|
|
68
|
+
loader=FlexibleFileSystemLoader(prompt_dir),
|
|
31
69
|
bytecode_cache=bcc,
|
|
32
70
|
autoescape=False,
|
|
33
71
|
)
|
|
@@ -14,7 +14,7 @@ Please follow them while working.
|
|
|
14
14
|
|
|
15
15
|
{{ system_message_suffix }}
|
|
16
16
|
{% endif %}
|
|
17
|
-
{% if
|
|
17
|
+
{% if secret_infos %}
|
|
18
18
|
<CUSTOM_SECRETS>
|
|
19
19
|
### Credential Access
|
|
20
20
|
* Automatic secret injection: When you reference a registered secret key in your bash command, the secret value will be automatically exported as an environment variable before your command executes.
|
|
@@ -25,8 +25,8 @@ Please follow them while working.
|
|
|
25
25
|
* If it still fails, report it to the user.
|
|
26
26
|
|
|
27
27
|
You have access to the following environment variables
|
|
28
|
-
{% for
|
|
29
|
-
* **${{
|
|
28
|
+
{% for secret_info in secret_infos %}
|
|
29
|
+
* **${{ secret_info.name }}**{% if secret_info.description %} - {{ secret_info.description }}{% endif %}
|
|
30
30
|
{% endfor %}
|
|
31
31
|
</CUSTOM_SECRETS>
|
|
32
32
|
{% endif %}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from openhands.sdk.context.skills.exceptions import SkillValidationError
|
|
2
2
|
from openhands.sdk.context.skills.skill import (
|
|
3
3
|
Skill,
|
|
4
|
+
load_project_skills,
|
|
4
5
|
load_public_skills,
|
|
5
6
|
load_skills_from_dir,
|
|
6
7
|
load_user_skills,
|
|
@@ -21,6 +22,7 @@ __all__ = [
|
|
|
21
22
|
"SkillKnowledge",
|
|
22
23
|
"load_skills_from_dir",
|
|
23
24
|
"load_user_skills",
|
|
25
|
+
"load_project_skills",
|
|
24
26
|
"load_public_skills",
|
|
25
27
|
"SkillValidationError",
|
|
26
28
|
]
|