openhands-sdk 1.7.0__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/agent/agent.py +31 -1
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +1 -2
- openhands/sdk/agent/utils.py +9 -4
- openhands/sdk/context/condenser/base.py +11 -6
- openhands/sdk/context/condenser/llm_summarizing_condenser.py +167 -18
- openhands/sdk/context/condenser/no_op_condenser.py +2 -1
- openhands/sdk/context/condenser/pipeline_condenser.py +10 -9
- openhands/sdk/context/condenser/utils.py +149 -0
- openhands/sdk/context/skills/skill.py +85 -0
- openhands/sdk/context/view.py +234 -37
- openhands/sdk/conversation/conversation.py +6 -0
- openhands/sdk/conversation/impl/local_conversation.py +33 -3
- openhands/sdk/conversation/impl/remote_conversation.py +36 -0
- openhands/sdk/conversation/state.py +41 -1
- openhands/sdk/hooks/__init__.py +30 -0
- openhands/sdk/hooks/config.py +180 -0
- openhands/sdk/hooks/conversation_hooks.py +227 -0
- openhands/sdk/hooks/executor.py +155 -0
- openhands/sdk/hooks/manager.py +170 -0
- openhands/sdk/hooks/types.py +40 -0
- openhands/sdk/io/cache.py +85 -0
- openhands/sdk/io/local.py +39 -2
- openhands/sdk/llm/mixins/fn_call_converter.py +61 -16
- openhands/sdk/llm/mixins/non_native_fc.py +5 -1
- openhands/sdk/tool/schema.py +10 -0
- {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/METADATA +1 -1
- {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/RECORD +29 -21
- {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/WHEEL +0 -0
- {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/top_level.txt +0 -0
openhands/sdk/agent/agent.py
CHANGED
|
@@ -25,6 +25,7 @@ from openhands.sdk.event import (
|
|
|
25
25
|
ObservationEvent,
|
|
26
26
|
SystemPromptEvent,
|
|
27
27
|
TokenEvent,
|
|
28
|
+
UserRejectObservation,
|
|
28
29
|
)
|
|
29
30
|
from openhands.sdk.event.condenser import Condensation, CondensationRequest
|
|
30
31
|
from openhands.sdk.llm import (
|
|
@@ -144,9 +145,20 @@ class Agent(AgentBase):
|
|
|
144
145
|
self._execute_actions(conversation, pending_actions, on_event)
|
|
145
146
|
return
|
|
146
147
|
|
|
148
|
+
# Check if the last user message was blocked by a UserPromptSubmit hook
|
|
149
|
+
# If so, skip processing and mark conversation as finished
|
|
150
|
+
for event in reversed(list(state.events)):
|
|
151
|
+
if isinstance(event, MessageEvent) and event.source == "user":
|
|
152
|
+
reason = state.pop_blocked_message(event.id)
|
|
153
|
+
if reason is not None:
|
|
154
|
+
logger.info(f"User message blocked by hook: {reason}")
|
|
155
|
+
state.execution_status = ConversationExecutionStatus.FINISHED
|
|
156
|
+
return
|
|
157
|
+
break # Only check the most recent user message
|
|
158
|
+
|
|
147
159
|
# Prepare LLM messages using the utility function
|
|
148
160
|
_messages_or_condensation = prepare_llm_messages(
|
|
149
|
-
state.events, condenser=self.condenser
|
|
161
|
+
state.events, condenser=self.condenser, llm=self.llm
|
|
150
162
|
)
|
|
151
163
|
|
|
152
164
|
# Process condensation event before agent sampels another action
|
|
@@ -462,8 +474,26 @@ class Agent(AgentBase):
|
|
|
462
474
|
|
|
463
475
|
It will call the tool's executor and update the state & call callback fn
|
|
464
476
|
with the observation.
|
|
477
|
+
|
|
478
|
+
If the action was blocked by a PreToolUse hook (recorded in
|
|
479
|
+
state.blocked_actions), a UserRejectObservation is emitted instead
|
|
480
|
+
of executing the action.
|
|
465
481
|
"""
|
|
466
482
|
state = conversation.state
|
|
483
|
+
|
|
484
|
+
# Check if this action was blocked by a PreToolUse hook
|
|
485
|
+
reason = state.pop_blocked_action(action_event.id)
|
|
486
|
+
if reason is not None:
|
|
487
|
+
logger.info(f"Action '{action_event.tool_name}' blocked by hook: {reason}")
|
|
488
|
+
rejection = UserRejectObservation(
|
|
489
|
+
action_id=action_event.id,
|
|
490
|
+
tool_name=action_event.tool_name,
|
|
491
|
+
tool_call_id=action_event.tool_call_id,
|
|
492
|
+
rejection_reason=reason,
|
|
493
|
+
)
|
|
494
|
+
on_event(rejection)
|
|
495
|
+
return rejection
|
|
496
|
+
|
|
467
497
|
tool = self.tools_map.get(action_event.tool_name, None)
|
|
468
498
|
if tool is None:
|
|
469
499
|
raise RuntimeError(
|
|
@@ -1,3 +1,2 @@
|
|
|
1
1
|
* Stream your thinking and responses while staying concise; surface key assumptions and environment prerequisites explicitly.
|
|
2
|
-
*
|
|
3
|
-
* You have access to external resources and should actively use available tools to try accessing them first, rather than claiming you can’t access something without making an attempt.
|
|
2
|
+
* You have access to external resources and should actively use available tools to try accessing them first, rather than claiming you can’t access something without making an attempt.
|
openhands/sdk/agent/utils.py
CHANGED
|
@@ -117,6 +117,7 @@ def prepare_llm_messages(
|
|
|
117
117
|
events: Sequence[Event],
|
|
118
118
|
condenser: None = None,
|
|
119
119
|
additional_messages: list[Message] | None = None,
|
|
120
|
+
llm: LLM | None = None,
|
|
120
121
|
) -> list[Message]: ...
|
|
121
122
|
|
|
122
123
|
|
|
@@ -125,6 +126,7 @@ def prepare_llm_messages(
|
|
|
125
126
|
events: Sequence[Event],
|
|
126
127
|
condenser: CondenserBase,
|
|
127
128
|
additional_messages: list[Message] | None = None,
|
|
129
|
+
llm: LLM | None = None,
|
|
128
130
|
) -> list[Message] | Condensation: ...
|
|
129
131
|
|
|
130
132
|
|
|
@@ -132,6 +134,7 @@ def prepare_llm_messages(
|
|
|
132
134
|
events: Sequence[Event],
|
|
133
135
|
condenser: CondenserBase | None = None,
|
|
134
136
|
additional_messages: list[Message] | None = None,
|
|
137
|
+
llm: LLM | None = None,
|
|
135
138
|
) -> list[Message] | Condensation:
|
|
136
139
|
"""Prepare LLM messages from conversation context.
|
|
137
140
|
|
|
@@ -140,13 +143,15 @@ def prepare_llm_messages(
|
|
|
140
143
|
It handles condensation internally and calls the callback when needed.
|
|
141
144
|
|
|
142
145
|
Args:
|
|
143
|
-
|
|
146
|
+
events: Sequence of events to prepare messages from
|
|
144
147
|
condenser: Optional condenser for handling context window limits
|
|
145
148
|
additional_messages: Optional additional messages to append
|
|
146
|
-
|
|
149
|
+
llm: Optional LLM instance from the agent, passed to condenser for
|
|
150
|
+
token counting or other LLM features
|
|
147
151
|
|
|
148
152
|
Returns:
|
|
149
|
-
List of messages ready for LLM completion
|
|
153
|
+
List of messages ready for LLM completion, or a Condensation event
|
|
154
|
+
if condensation is needed
|
|
150
155
|
|
|
151
156
|
Raises:
|
|
152
157
|
RuntimeError: If condensation is needed but no callback is provided
|
|
@@ -160,7 +165,7 @@ def prepare_llm_messages(
|
|
|
160
165
|
# produce a list of events, exactly as expected, or a
|
|
161
166
|
# new condensation that needs to be processed
|
|
162
167
|
if condenser is not None:
|
|
163
|
-
condensation_result = condenser.condense(view)
|
|
168
|
+
condensation_result = condenser.condense(view, agent_llm=llm)
|
|
164
169
|
|
|
165
170
|
match condensation_result:
|
|
166
171
|
case View():
|
|
@@ -3,6 +3,7 @@ from logging import getLogger
|
|
|
3
3
|
|
|
4
4
|
from openhands.sdk.context.view import View
|
|
5
5
|
from openhands.sdk.event.condenser import Condensation
|
|
6
|
+
from openhands.sdk.llm import LLM
|
|
6
7
|
from openhands.sdk.utils.models import (
|
|
7
8
|
DiscriminatedUnionMixin,
|
|
8
9
|
)
|
|
@@ -28,7 +29,7 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
|
|
|
28
29
|
"""
|
|
29
30
|
|
|
30
31
|
@abstractmethod
|
|
31
|
-
def condense(self, view: View) -> View | Condensation:
|
|
32
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
|
|
32
33
|
"""Condense a sequence of events into a potentially smaller list.
|
|
33
34
|
|
|
34
35
|
New condenser strategies should override this method to implement their own
|
|
@@ -37,6 +38,8 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
|
|
|
37
38
|
|
|
38
39
|
Args:
|
|
39
40
|
view: A view of the history containing all events that should be condensed.
|
|
41
|
+
agent_llm: LLM instance used by the agent. Condensers use this for token
|
|
42
|
+
counting purposes. Defaults to None.
|
|
40
43
|
|
|
41
44
|
Returns:
|
|
42
45
|
View | Condensation: A condensed view of the events or an event indicating
|
|
@@ -77,18 +80,20 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
|
|
|
77
80
|
"""
|
|
78
81
|
|
|
79
82
|
@abstractmethod
|
|
80
|
-
def should_condense(self, view: View) -> bool:
|
|
83
|
+
def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
|
|
81
84
|
"""Determine if a view should be condensed."""
|
|
82
85
|
|
|
83
86
|
@abstractmethod
|
|
84
|
-
def get_condensation(
|
|
87
|
+
def get_condensation(
|
|
88
|
+
self, view: View, agent_llm: LLM | None = None
|
|
89
|
+
) -> Condensation:
|
|
85
90
|
"""Get the condensation from a view."""
|
|
86
91
|
|
|
87
|
-
def condense(self, view: View) -> View | Condensation:
|
|
92
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
|
|
88
93
|
# If we trigger the condenser-specific condensation threshold, compute and
|
|
89
94
|
# return the condensation.
|
|
90
|
-
if self.should_condense(view):
|
|
91
|
-
return self.get_condensation(view)
|
|
95
|
+
if self.should_condense(view, agent_llm=agent_llm):
|
|
96
|
+
return self.get_condensation(view, agent_llm=agent_llm)
|
|
92
97
|
|
|
93
98
|
# Otherwise we're safe to just return the view.
|
|
94
99
|
else:
|
|
@@ -1,19 +1,43 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from enum import Enum
|
|
2
4
|
|
|
3
5
|
from pydantic import Field, model_validator
|
|
4
6
|
|
|
5
7
|
from openhands.sdk.context.condenser.base import RollingCondenser
|
|
8
|
+
from openhands.sdk.context.condenser.utils import (
|
|
9
|
+
get_suffix_length_for_token_reduction,
|
|
10
|
+
get_total_token_count,
|
|
11
|
+
)
|
|
6
12
|
from openhands.sdk.context.prompts import render_template
|
|
7
13
|
from openhands.sdk.context.view import View
|
|
14
|
+
from openhands.sdk.event.base import LLMConvertibleEvent
|
|
8
15
|
from openhands.sdk.event.condenser import Condensation
|
|
9
16
|
from openhands.sdk.event.llm_convertible import MessageEvent
|
|
10
17
|
from openhands.sdk.llm import LLM, Message, TextContent
|
|
11
18
|
from openhands.sdk.observability.laminar import observe
|
|
12
19
|
|
|
13
20
|
|
|
21
|
+
class Reason(Enum):
|
|
22
|
+
"""Reasons for condensation."""
|
|
23
|
+
|
|
24
|
+
REQUEST = "request"
|
|
25
|
+
TOKENS = "tokens"
|
|
26
|
+
EVENTS = "events"
|
|
27
|
+
|
|
28
|
+
|
|
14
29
|
class LLMSummarizingCondenser(RollingCondenser):
|
|
30
|
+
"""LLM-based condenser that summarizes forgotten events.
|
|
31
|
+
|
|
32
|
+
Uses an independent LLM (stored in the `llm` attribute) for generating summaries
|
|
33
|
+
of forgotten events. The optional `agent_llm` parameter passed to condense() is
|
|
34
|
+
the LLM used by the agent for token counting purposes, and you should not assume
|
|
35
|
+
it is the same as the one defined in this condenser.
|
|
36
|
+
"""
|
|
37
|
+
|
|
15
38
|
llm: LLM
|
|
16
39
|
max_size: int = Field(default=120, gt=0)
|
|
40
|
+
max_tokens: int | None = None
|
|
17
41
|
keep_first: int = Field(default=4, ge=0)
|
|
18
42
|
|
|
19
43
|
@model_validator(mode="after")
|
|
@@ -29,23 +53,47 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
29
53
|
def handles_condensation_requests(self) -> bool:
|
|
30
54
|
return True
|
|
31
55
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
56
|
+
def get_condensation_reasons(
|
|
57
|
+
self, view: View, agent_llm: LLM | None = None
|
|
58
|
+
) -> set[Reason]:
|
|
59
|
+
"""Determine the reasons why the view should be condensed.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
view: The current view to evaluate.
|
|
63
|
+
agent_llm: The LLM used by the agent. Required if token counting is needed.
|
|
36
64
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
65
|
+
Returns:
|
|
66
|
+
A set of Reason enums indicating why condensation is needed.
|
|
67
|
+
"""
|
|
68
|
+
reasons = set()
|
|
69
|
+
|
|
70
|
+
# Reason 1: Unhandled condensation request. The view handles the detection of
|
|
71
|
+
# these requests while processing the event stream.
|
|
41
72
|
if view.unhandled_condensation_request:
|
|
42
|
-
|
|
43
|
-
# should be calculated based on the view size.
|
|
44
|
-
target_size = len(view) // 2
|
|
45
|
-
# Number of events to keep from the tail -- target size, minus however many
|
|
46
|
-
# prefix events from the head, minus one for the summarization event
|
|
47
|
-
events_from_tail = target_size - len(head) - 1
|
|
73
|
+
reasons.add(Reason.REQUEST)
|
|
48
74
|
|
|
75
|
+
# Reason 2: Token limit is provided and exceeded.
|
|
76
|
+
if self.max_tokens and agent_llm:
|
|
77
|
+
total_tokens = get_total_token_count(view.events, agent_llm)
|
|
78
|
+
if total_tokens > self.max_tokens:
|
|
79
|
+
reasons.add(Reason.TOKENS)
|
|
80
|
+
|
|
81
|
+
# Reason 3: View exceeds maximum size in number of events.
|
|
82
|
+
if len(view) > self.max_size:
|
|
83
|
+
reasons.add(Reason.EVENTS)
|
|
84
|
+
|
|
85
|
+
return reasons
|
|
86
|
+
|
|
87
|
+
def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
|
|
88
|
+
reasons = self.get_condensation_reasons(view, agent_llm)
|
|
89
|
+
return reasons != set()
|
|
90
|
+
|
|
91
|
+
def _get_summary_event_content(self, view: View) -> str:
|
|
92
|
+
"""Extract the text content from the summary event in the view, if any.
|
|
93
|
+
|
|
94
|
+
If there is no summary event or it does not contain text content, returns an
|
|
95
|
+
empty string.
|
|
96
|
+
"""
|
|
49
97
|
summary_event_content: str = ""
|
|
50
98
|
|
|
51
99
|
summary_event = view.summary_event
|
|
@@ -54,9 +102,25 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
54
102
|
if isinstance(message_content, TextContent):
|
|
55
103
|
summary_event_content = message_content.text
|
|
56
104
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
105
|
+
return summary_event_content
|
|
106
|
+
|
|
107
|
+
def _generate_condensation(
|
|
108
|
+
self,
|
|
109
|
+
summary_event_content: str,
|
|
110
|
+
forgotten_events: Sequence[LLMConvertibleEvent],
|
|
111
|
+
summary_offset: int,
|
|
112
|
+
) -> Condensation:
|
|
113
|
+
"""Generate a condensation by using the condenser's LLM to summarize forgotten
|
|
114
|
+
events.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
summary_event_content: The content of the previous summary event.
|
|
118
|
+
forgotten_events: The list of events to be summarized.
|
|
119
|
+
summary_offset: The index where the summary event should be inserted.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Condensation: The generated condensation object.
|
|
123
|
+
"""
|
|
60
124
|
# Convert events to strings for the template
|
|
61
125
|
event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
|
|
62
126
|
|
|
@@ -84,6 +148,91 @@ class LLMSummarizingCondenser(RollingCondenser):
|
|
|
84
148
|
return Condensation(
|
|
85
149
|
forgotten_event_ids=[event.id for event in forgotten_events],
|
|
86
150
|
summary=summary,
|
|
87
|
-
summary_offset=
|
|
151
|
+
summary_offset=summary_offset,
|
|
88
152
|
llm_response_id=llm_response.id,
|
|
89
153
|
)
|
|
154
|
+
|
|
155
|
+
def _get_forgotten_events(
|
|
156
|
+
self, view: View, agent_llm: LLM | None = None
|
|
157
|
+
) -> tuple[Sequence[LLMConvertibleEvent], int]:
|
|
158
|
+
"""Identify events to be forgotten and the summary offset.
|
|
159
|
+
|
|
160
|
+
Relies on the condensation reasons to determine how many events we need to drop
|
|
161
|
+
in order to maintain our resource constraints. Uses manipulation indices to
|
|
162
|
+
ensure forgetting ranges respect atomic unit boundaries.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
view: The current view from which to identify forgotten events.
|
|
166
|
+
agent_llm: The LLM used by the agent, required for token-based calculations.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A tuple of (events to forget, summary_offset).
|
|
170
|
+
"""
|
|
171
|
+
reasons = self.get_condensation_reasons(view, agent_llm=agent_llm)
|
|
172
|
+
assert reasons != set(), "No condensation reasons found."
|
|
173
|
+
|
|
174
|
+
suffix_events_to_keep: set[int] = set()
|
|
175
|
+
|
|
176
|
+
if Reason.REQUEST in reasons:
|
|
177
|
+
target_size = len(view) // 2
|
|
178
|
+
suffix_events_to_keep.add(target_size - self.keep_first - 1)
|
|
179
|
+
|
|
180
|
+
if Reason.EVENTS in reasons:
|
|
181
|
+
target_size = self.max_size // 2
|
|
182
|
+
suffix_events_to_keep.add(target_size - self.keep_first - 1)
|
|
183
|
+
|
|
184
|
+
if Reason.TOKENS in reasons:
|
|
185
|
+
# Compute the number of tokens we need to eliminate to be under half the
|
|
186
|
+
# max_tokens value. We know max_tokens and the agent LLM are not None here
|
|
187
|
+
# because we can't have Reason.TOKENS without them.
|
|
188
|
+
assert self.max_tokens is not None
|
|
189
|
+
assert agent_llm is not None
|
|
190
|
+
|
|
191
|
+
total_tokens = get_total_token_count(view.events, agent_llm)
|
|
192
|
+
tokens_to_reduce = total_tokens - (self.max_tokens // 2)
|
|
193
|
+
|
|
194
|
+
suffix_events_to_keep.add(
|
|
195
|
+
get_suffix_length_for_token_reduction(
|
|
196
|
+
events=view.events[self.keep_first :],
|
|
197
|
+
llm=agent_llm,
|
|
198
|
+
token_reduction=tokens_to_reduce,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# We might have multiple reasons to condense, so pick the strictest condensation
|
|
203
|
+
# to ensure all resource constraints are met.
|
|
204
|
+
events_from_tail = min(suffix_events_to_keep)
|
|
205
|
+
|
|
206
|
+
# Calculate naive forgetting end (without considering atomic boundaries)
|
|
207
|
+
naive_end = len(view) - events_from_tail
|
|
208
|
+
|
|
209
|
+
# Find actual forgetting_start: smallest manipulation index > keep_first
|
|
210
|
+
forgetting_start = view.find_next_manipulation_index(
|
|
211
|
+
self.keep_first, strict=True
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Find actual forgetting_end: smallest manipulation index >= naive_end
|
|
215
|
+
forgetting_end = view.find_next_manipulation_index(naive_end, strict=False)
|
|
216
|
+
|
|
217
|
+
# Extract events to forget using boundary-aware indices
|
|
218
|
+
forgotten_events = view[forgetting_start:forgetting_end]
|
|
219
|
+
|
|
220
|
+
# Summary offset is the same as forgetting_start
|
|
221
|
+
return forgotten_events, forgetting_start
|
|
222
|
+
|
|
223
|
+
@observe(ignore_inputs=["view", "agent_llm"])
|
|
224
|
+
def get_condensation(
|
|
225
|
+
self, view: View, agent_llm: LLM | None = None
|
|
226
|
+
) -> Condensation:
|
|
227
|
+
# The condensation is dependent on the events we want to drop and the previous
|
|
228
|
+
# summary.
|
|
229
|
+
summary_event_content = self._get_summary_event_content(view)
|
|
230
|
+
forgotten_events, summary_offset = self._get_forgotten_events(
|
|
231
|
+
view, agent_llm=agent_llm
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
return self._generate_condensation(
|
|
235
|
+
summary_event_content=summary_event_content,
|
|
236
|
+
forgotten_events=forgotten_events,
|
|
237
|
+
summary_offset=summary_offset,
|
|
238
|
+
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from openhands.sdk.context.condenser.base import CondenserBase
|
|
2
2
|
from openhands.sdk.context.view import View
|
|
3
3
|
from openhands.sdk.event.condenser import Condensation
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class NoOpCondenser(CondenserBase):
|
|
@@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
|
|
|
9
10
|
Primarily intended for testing purposes.
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
|
-
def condense(self, view: View) -> View | Condensation:
|
|
13
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation: # noqa: ARG002
|
|
13
14
|
return view
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
from openhands.sdk.context.condenser.base import CondenserBase
|
|
2
2
|
from openhands.sdk.context.view import View
|
|
3
3
|
from openhands.sdk.event.condenser import Condensation
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class PipelineCondenser(CondenserBase):
|
|
7
8
|
"""A condenser that applies a sequence of condensers in order.
|
|
8
9
|
|
|
9
10
|
All condensers are defined primarily by their `condense` method, which takes a
|
|
10
|
-
`View` and
|
|
11
|
-
|
|
12
|
-
condenser returns a `Condensation`.
|
|
11
|
+
`View` and an optional `agent_llm` parameter, returning either a new `View` or a
|
|
12
|
+
`Condensation` event. That means we can chain multiple condensers together by
|
|
13
|
+
passing `View`s along and exiting early if any condenser returns a `Condensation`.
|
|
13
14
|
|
|
14
15
|
For example:
|
|
15
16
|
|
|
@@ -20,20 +21,20 @@ class PipelineCondenser(CondenserBase):
|
|
|
20
21
|
CondenserC(...),
|
|
21
22
|
])
|
|
22
23
|
|
|
23
|
-
result = condenser.condense(view)
|
|
24
|
+
result = condenser.condense(view, agent_llm=agent_llm)
|
|
24
25
|
|
|
25
26
|
# Doing the same thing without the pipeline condenser requires more boilerplate
|
|
26
27
|
# for the monadic chaining
|
|
27
28
|
other_result = view
|
|
28
29
|
|
|
29
30
|
if isinstance(other_result, View):
|
|
30
|
-
other_result = CondenserA(...).condense(other_result)
|
|
31
|
+
other_result = CondenserA(...).condense(other_result, agent_llm=agent_llm)
|
|
31
32
|
|
|
32
33
|
if isinstance(other_result, View):
|
|
33
|
-
other_result = CondenserB(...).condense(other_result)
|
|
34
|
+
other_result = CondenserB(...).condense(other_result, agent_llm=agent_llm)
|
|
34
35
|
|
|
35
36
|
if isinstance(other_result, View):
|
|
36
|
-
other_result = CondenserC(...).condense(other_result)
|
|
37
|
+
other_result = CondenserC(...).condense(other_result, agent_llm=agent_llm)
|
|
37
38
|
|
|
38
39
|
assert result == other_result
|
|
39
40
|
"""
|
|
@@ -41,12 +42,12 @@ class PipelineCondenser(CondenserBase):
|
|
|
41
42
|
condensers: list[CondenserBase]
|
|
42
43
|
"""The list of condensers to apply in order."""
|
|
43
44
|
|
|
44
|
-
def condense(self, view: View) -> View | Condensation:
|
|
45
|
+
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
|
|
45
46
|
result: View | Condensation = view
|
|
46
47
|
for condenser in self.condensers:
|
|
47
48
|
if isinstance(result, Condensation):
|
|
48
49
|
break
|
|
49
|
-
result = condenser.condense(result)
|
|
50
|
+
result = condenser.condense(result, agent_llm=agent_llm)
|
|
50
51
|
return result
|
|
51
52
|
|
|
52
53
|
def handles_condensation_requests(self) -> bool:
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
from openhands.sdk.event.base import LLMConvertibleEvent
|
|
4
|
+
from openhands.sdk.llm import LLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_total_token_count(
|
|
8
|
+
events: Sequence[LLMConvertibleEvent],
|
|
9
|
+
llm: LLM,
|
|
10
|
+
) -> int:
|
|
11
|
+
"""Calculate the total token count for a list of LLM convertible events.
|
|
12
|
+
|
|
13
|
+
This function converts the events to LLM messages and uses the provided LLM
|
|
14
|
+
to count the total number of tokens. This is useful for understanding how many
|
|
15
|
+
tokens a sequence of events will consume in the context window.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
events: List of LLM convertible events to count tokens for
|
|
19
|
+
llm: The LLM instance to use for token counting (uses the litellm's token
|
|
20
|
+
counting utilities)
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Total token count for all events converted to messages
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> from openhands.sdk.llm import LLM
|
|
27
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
28
|
+
>>>
|
|
29
|
+
>>> llm = LLM(model="gpt-4")
|
|
30
|
+
>>> events = [
|
|
31
|
+
... MessageEvent.from_text("Hello, how are you?", source="user"),
|
|
32
|
+
... MessageEvent.from_text("I'm doing great!", source="agent"),
|
|
33
|
+
... ]
|
|
34
|
+
>>> token_count = get_total_token_count(events, llm)
|
|
35
|
+
>>> print(f"Total tokens: {token_count}")
|
|
36
|
+
"""
|
|
37
|
+
messages = LLMConvertibleEvent.events_to_messages(list(events))
|
|
38
|
+
return llm.get_token_count(messages)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_shortest_prefix_above_token_count(
|
|
42
|
+
events: Sequence[LLMConvertibleEvent],
|
|
43
|
+
llm: LLM,
|
|
44
|
+
token_count: int,
|
|
45
|
+
) -> int:
|
|
46
|
+
"""Find the length of the shortest prefix whose token count exceeds the target.
|
|
47
|
+
|
|
48
|
+
This function performs a binary search to efficiently find the shortest prefix
|
|
49
|
+
of events that, when converted to messages, has a total token count greater than
|
|
50
|
+
the specified target token count.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
events: List of LLM convertible events to search through
|
|
54
|
+
llm: The LLM instance to use for token counting (uses the model's tokenizer)
|
|
55
|
+
token_count: The target token count threshold
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The length of the shortest prefix that exceeds the token count.
|
|
59
|
+
Returns 0 if no events are provided.
|
|
60
|
+
Returns len(events) if all events combined don't exceed the token count.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> from openhands.sdk.llm import LLM
|
|
64
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
65
|
+
>>>
|
|
66
|
+
>>> llm = LLM(model="gpt-4")
|
|
67
|
+
>>> events = [
|
|
68
|
+
... MessageEvent.from_text("Hi", source="user"),
|
|
69
|
+
... MessageEvent.from_text("Hello", source="agent"),
|
|
70
|
+
... MessageEvent.from_text("How are you?", source="user"),
|
|
71
|
+
... MessageEvent.from_text("Great!", source="agent"),
|
|
72
|
+
... ]
|
|
73
|
+
>>> prefix_len = get_shortest_prefix_above_token_count(events, llm, 20)
|
|
74
|
+
>>> # prefix_len might be 2 if first 2 events exceed 20 tokens
|
|
75
|
+
"""
|
|
76
|
+
if not events:
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
# Check if all events combined don't exceed the token count
|
|
80
|
+
total_tokens = get_total_token_count(events, llm)
|
|
81
|
+
if total_tokens <= token_count:
|
|
82
|
+
return len(events)
|
|
83
|
+
|
|
84
|
+
# Binary search for the shortest prefix
|
|
85
|
+
left, right = 1, len(events)
|
|
86
|
+
|
|
87
|
+
while left < right:
|
|
88
|
+
mid = (left + right) // 2
|
|
89
|
+
prefix_tokens = get_total_token_count(events[:mid], llm)
|
|
90
|
+
|
|
91
|
+
if prefix_tokens > token_count:
|
|
92
|
+
# This prefix exceeds the count, try to find a shorter one
|
|
93
|
+
right = mid
|
|
94
|
+
else:
|
|
95
|
+
# This prefix doesn't exceed, we need a longer one
|
|
96
|
+
left = mid + 1
|
|
97
|
+
|
|
98
|
+
return left
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_suffix_length_for_token_reduction(
|
|
102
|
+
events: Sequence[LLMConvertibleEvent],
|
|
103
|
+
llm: LLM,
|
|
104
|
+
token_reduction: int,
|
|
105
|
+
) -> int:
|
|
106
|
+
"""Find how many suffix events can be kept while reducing tokens by target amount.
|
|
107
|
+
|
|
108
|
+
This function determines the maximum number of events from the end of the list
|
|
109
|
+
that can be retained while ensuring the total token count is reduced by at least
|
|
110
|
+
the specified amount. It uses the get_shortest_prefix_above_token_count function
|
|
111
|
+
to find the prefix that must be removed.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
events: List of LLM convertible events
|
|
115
|
+
llm: The LLM instance to use for token counting (uses the model's tokenizer)
|
|
116
|
+
token_reduction: The minimum number of tokens to reduce by
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The number of events from the end that can be kept (suffix length).
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
>>> from openhands.sdk.llm import LLM
|
|
123
|
+
>>> from openhands.sdk.event.llm_convertible import MessageEvent
|
|
124
|
+
>>>
|
|
125
|
+
>>> llm = LLM(model="gpt-4")
|
|
126
|
+
>>> events = [
|
|
127
|
+
... MessageEvent.from_text("Event 1", source="user"),
|
|
128
|
+
... MessageEvent.from_text("Event 2", source="agent"),
|
|
129
|
+
... MessageEvent.from_text("Event 3", source="user"),
|
|
130
|
+
... MessageEvent.from_text("Event 4", source="agent"),
|
|
131
|
+
... ]
|
|
132
|
+
>>> # Suppose total is 100 tokens, and we want to reduce by 40 tokens
|
|
133
|
+
>>> suffix_len = get_suffix_length_for_token_reduction(events, llm, 40)
|
|
134
|
+
>>> # suffix_len tells us how many events from the end we can keep
|
|
135
|
+
>>> # If first 2 events = 45 tokens, suffix_len = 2 (keep last 2 events)
|
|
136
|
+
"""
|
|
137
|
+
if not events:
|
|
138
|
+
return 0
|
|
139
|
+
|
|
140
|
+
if token_reduction <= 0:
|
|
141
|
+
return len(events)
|
|
142
|
+
|
|
143
|
+
# Find the shortest prefix that exceeds the token reduction target
|
|
144
|
+
prefix_length = get_shortest_prefix_above_token_count(events, llm, token_reduction)
|
|
145
|
+
|
|
146
|
+
# The suffix length is what remains after removing the prefix
|
|
147
|
+
suffix_length = len(events) - prefix_length
|
|
148
|
+
|
|
149
|
+
return suffix_length
|