openhands-sdk 1.7.0__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. openhands/sdk/agent/agent.py +31 -1
  2. openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +1 -2
  3. openhands/sdk/agent/utils.py +9 -4
  4. openhands/sdk/context/condenser/base.py +11 -6
  5. openhands/sdk/context/condenser/llm_summarizing_condenser.py +167 -18
  6. openhands/sdk/context/condenser/no_op_condenser.py +2 -1
  7. openhands/sdk/context/condenser/pipeline_condenser.py +10 -9
  8. openhands/sdk/context/condenser/utils.py +149 -0
  9. openhands/sdk/context/skills/skill.py +85 -0
  10. openhands/sdk/context/view.py +234 -37
  11. openhands/sdk/conversation/conversation.py +6 -0
  12. openhands/sdk/conversation/impl/local_conversation.py +33 -3
  13. openhands/sdk/conversation/impl/remote_conversation.py +36 -0
  14. openhands/sdk/conversation/state.py +41 -1
  15. openhands/sdk/hooks/__init__.py +30 -0
  16. openhands/sdk/hooks/config.py +180 -0
  17. openhands/sdk/hooks/conversation_hooks.py +227 -0
  18. openhands/sdk/hooks/executor.py +155 -0
  19. openhands/sdk/hooks/manager.py +170 -0
  20. openhands/sdk/hooks/types.py +40 -0
  21. openhands/sdk/io/cache.py +85 -0
  22. openhands/sdk/io/local.py +39 -2
  23. openhands/sdk/llm/mixins/fn_call_converter.py +61 -16
  24. openhands/sdk/llm/mixins/non_native_fc.py +5 -1
  25. openhands/sdk/tool/schema.py +10 -0
  26. {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/METADATA +1 -1
  27. {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/RECORD +29 -21
  28. {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/WHEEL +0 -0
  29. {openhands_sdk-1.7.0.dist-info → openhands_sdk-1.7.1.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ from openhands.sdk.event import (
25
25
  ObservationEvent,
26
26
  SystemPromptEvent,
27
27
  TokenEvent,
28
+ UserRejectObservation,
28
29
  )
29
30
  from openhands.sdk.event.condenser import Condensation, CondensationRequest
30
31
  from openhands.sdk.llm import (
@@ -144,9 +145,20 @@ class Agent(AgentBase):
144
145
  self._execute_actions(conversation, pending_actions, on_event)
145
146
  return
146
147
 
148
+ # Check if the last user message was blocked by a UserPromptSubmit hook
149
+ # If so, skip processing and mark conversation as finished
150
+ for event in reversed(list(state.events)):
151
+ if isinstance(event, MessageEvent) and event.source == "user":
152
+ reason = state.pop_blocked_message(event.id)
153
+ if reason is not None:
154
+ logger.info(f"User message blocked by hook: {reason}")
155
+ state.execution_status = ConversationExecutionStatus.FINISHED
156
+ return
157
+ break # Only check the most recent user message
158
+
147
159
  # Prepare LLM messages using the utility function
148
160
  _messages_or_condensation = prepare_llm_messages(
149
- state.events, condenser=self.condenser
161
+ state.events, condenser=self.condenser, llm=self.llm
150
162
  )
151
163
 
152
164
  # Process condensation event before agent sampels another action
@@ -462,8 +474,26 @@ class Agent(AgentBase):
462
474
 
463
475
  It will call the tool's executor and update the state & call callback fn
464
476
  with the observation.
477
+
478
+ If the action was blocked by a PreToolUse hook (recorded in
479
+ state.blocked_actions), a UserRejectObservation is emitted instead
480
+ of executing the action.
465
481
  """
466
482
  state = conversation.state
483
+
484
+ # Check if this action was blocked by a PreToolUse hook
485
+ reason = state.pop_blocked_action(action_event.id)
486
+ if reason is not None:
487
+ logger.info(f"Action '{action_event.tool_name}' blocked by hook: {reason}")
488
+ rejection = UserRejectObservation(
489
+ action_id=action_event.id,
490
+ tool_name=action_event.tool_name,
491
+ tool_call_id=action_event.tool_call_id,
492
+ rejection_reason=reason,
493
+ )
494
+ on_event(rejection)
495
+ return rejection
496
+
467
497
  tool = self.tools_map.get(action_event.tool_name, None)
468
498
  if tool is None:
469
499
  raise RuntimeError(
@@ -1,3 +1,2 @@
1
1
  * Stream your thinking and responses while staying concise; surface key assumptions and environment prerequisites explicitly.
2
- * ALWAYS send a brief preamble to the user explaining what you're about to do before each tool call, using 8 - 12 words, with a friendly and curious tone.
3
- * You have access to external resources and should actively use available tools to try accessing them first, rather than claiming you can’t access something without making an attempt.
2
+ * You have access to external resources and should actively use available tools to try accessing them first, rather than claiming you can’t access something without making an attempt.
@@ -117,6 +117,7 @@ def prepare_llm_messages(
117
117
  events: Sequence[Event],
118
118
  condenser: None = None,
119
119
  additional_messages: list[Message] | None = None,
120
+ llm: LLM | None = None,
120
121
  ) -> list[Message]: ...
121
122
 
122
123
 
@@ -125,6 +126,7 @@ def prepare_llm_messages(
125
126
  events: Sequence[Event],
126
127
  condenser: CondenserBase,
127
128
  additional_messages: list[Message] | None = None,
129
+ llm: LLM | None = None,
128
130
  ) -> list[Message] | Condensation: ...
129
131
 
130
132
 
@@ -132,6 +134,7 @@ def prepare_llm_messages(
132
134
  events: Sequence[Event],
133
135
  condenser: CondenserBase | None = None,
134
136
  additional_messages: list[Message] | None = None,
137
+ llm: LLM | None = None,
135
138
  ) -> list[Message] | Condensation:
136
139
  """Prepare LLM messages from conversation context.
137
140
 
@@ -140,13 +143,15 @@ def prepare_llm_messages(
140
143
  It handles condensation internally and calls the callback when needed.
141
144
 
142
145
  Args:
143
- state: The conversation state containing events
146
+ events: Sequence of events to prepare messages from
144
147
  condenser: Optional condenser for handling context window limits
145
148
  additional_messages: Optional additional messages to append
146
- on_event: Optional callback for handling condensation events
149
+ llm: Optional LLM instance from the agent, passed to condenser for
150
+ token counting or other LLM features
147
151
 
148
152
  Returns:
149
- List of messages ready for LLM completion
153
+ List of messages ready for LLM completion, or a Condensation event
154
+ if condensation is needed
150
155
 
151
156
  Raises:
152
157
  RuntimeError: If condensation is needed but no callback is provided
@@ -160,7 +165,7 @@ def prepare_llm_messages(
160
165
  # produce a list of events, exactly as expected, or a
161
166
  # new condensation that needs to be processed
162
167
  if condenser is not None:
163
- condensation_result = condenser.condense(view)
168
+ condensation_result = condenser.condense(view, agent_llm=llm)
164
169
 
165
170
  match condensation_result:
166
171
  case View():
@@ -3,6 +3,7 @@ from logging import getLogger
3
3
 
4
4
  from openhands.sdk.context.view import View
5
5
  from openhands.sdk.event.condenser import Condensation
6
+ from openhands.sdk.llm import LLM
6
7
  from openhands.sdk.utils.models import (
7
8
  DiscriminatedUnionMixin,
8
9
  )
@@ -28,7 +29,7 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
28
29
  """
29
30
 
30
31
  @abstractmethod
31
- def condense(self, view: View) -> View | Condensation:
32
+ def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
32
33
  """Condense a sequence of events into a potentially smaller list.
33
34
 
34
35
  New condenser strategies should override this method to implement their own
@@ -37,6 +38,8 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
37
38
 
38
39
  Args:
39
40
  view: A view of the history containing all events that should be condensed.
41
+ agent_llm: LLM instance used by the agent. Condensers use this for token
42
+ counting purposes. Defaults to None.
40
43
 
41
44
  Returns:
42
45
  View | Condensation: A condensed view of the events or an event indicating
@@ -77,18 +80,20 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
77
80
  """
78
81
 
79
82
  @abstractmethod
80
- def should_condense(self, view: View) -> bool:
83
+ def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
81
84
  """Determine if a view should be condensed."""
82
85
 
83
86
  @abstractmethod
84
- def get_condensation(self, view: View) -> Condensation:
87
+ def get_condensation(
88
+ self, view: View, agent_llm: LLM | None = None
89
+ ) -> Condensation:
85
90
  """Get the condensation from a view."""
86
91
 
87
- def condense(self, view: View) -> View | Condensation:
92
+ def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
88
93
  # If we trigger the condenser-specific condensation threshold, compute and
89
94
  # return the condensation.
90
- if self.should_condense(view):
91
- return self.get_condensation(view)
95
+ if self.should_condense(view, agent_llm=agent_llm):
96
+ return self.get_condensation(view, agent_llm=agent_llm)
92
97
 
93
98
  # Otherwise we're safe to just return the view.
94
99
  else:
@@ -1,19 +1,43 @@
1
1
  import os
2
+ from collections.abc import Sequence
3
+ from enum import Enum
2
4
 
3
5
  from pydantic import Field, model_validator
4
6
 
5
7
  from openhands.sdk.context.condenser.base import RollingCondenser
8
+ from openhands.sdk.context.condenser.utils import (
9
+ get_suffix_length_for_token_reduction,
10
+ get_total_token_count,
11
+ )
6
12
  from openhands.sdk.context.prompts import render_template
7
13
  from openhands.sdk.context.view import View
14
+ from openhands.sdk.event.base import LLMConvertibleEvent
8
15
  from openhands.sdk.event.condenser import Condensation
9
16
  from openhands.sdk.event.llm_convertible import MessageEvent
10
17
  from openhands.sdk.llm import LLM, Message, TextContent
11
18
  from openhands.sdk.observability.laminar import observe
12
19
 
13
20
 
21
+ class Reason(Enum):
22
+ """Reasons for condensation."""
23
+
24
+ REQUEST = "request"
25
+ TOKENS = "tokens"
26
+ EVENTS = "events"
27
+
28
+
14
29
  class LLMSummarizingCondenser(RollingCondenser):
30
+ """LLM-based condenser that summarizes forgotten events.
31
+
32
+ Uses an independent LLM (stored in the `llm` attribute) for generating summaries
33
+ of forgotten events. The optional `agent_llm` parameter passed to condense() is
34
+ the LLM used by the agent for token counting purposes, and you should not assume
35
+ it is the same as the one defined in this condenser.
36
+ """
37
+
15
38
  llm: LLM
16
39
  max_size: int = Field(default=120, gt=0)
40
+ max_tokens: int | None = None
17
41
  keep_first: int = Field(default=4, ge=0)
18
42
 
19
43
  @model_validator(mode="after")
@@ -29,23 +53,47 @@ class LLMSummarizingCondenser(RollingCondenser):
29
53
  def handles_condensation_requests(self) -> bool:
30
54
  return True
31
55
 
32
- def should_condense(self, view: View) -> bool:
33
- if view.unhandled_condensation_request:
34
- return True
35
- return len(view) > self.max_size
56
+ def get_condensation_reasons(
57
+ self, view: View, agent_llm: LLM | None = None
58
+ ) -> set[Reason]:
59
+ """Determine the reasons why the view should be condensed.
60
+
61
+ Args:
62
+ view: The current view to evaluate.
63
+ agent_llm: The LLM used by the agent. Required if token counting is needed.
36
64
 
37
- @observe(ignore_inputs=["view"])
38
- def get_condensation(self, view: View) -> Condensation:
39
- head = view[: self.keep_first]
40
- target_size = self.max_size // 2
65
+ Returns:
66
+ A set of Reason enums indicating why condensation is needed.
67
+ """
68
+ reasons = set()
69
+
70
+ # Reason 1: Unhandled condensation request. The view handles the detection of
71
+ # these requests while processing the event stream.
41
72
  if view.unhandled_condensation_request:
42
- # Condensation triggered by a condensation request
43
- # should be calculated based on the view size.
44
- target_size = len(view) // 2
45
- # Number of events to keep from the tail -- target size, minus however many
46
- # prefix events from the head, minus one for the summarization event
47
- events_from_tail = target_size - len(head) - 1
73
+ reasons.add(Reason.REQUEST)
48
74
 
75
+ # Reason 2: Token limit is provided and exceeded.
76
+ if self.max_tokens and agent_llm:
77
+ total_tokens = get_total_token_count(view.events, agent_llm)
78
+ if total_tokens > self.max_tokens:
79
+ reasons.add(Reason.TOKENS)
80
+
81
+ # Reason 3: View exceeds maximum size in number of events.
82
+ if len(view) > self.max_size:
83
+ reasons.add(Reason.EVENTS)
84
+
85
+ return reasons
86
+
87
+ def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
88
+ reasons = self.get_condensation_reasons(view, agent_llm)
89
+ return reasons != set()
90
+
91
+ def _get_summary_event_content(self, view: View) -> str:
92
+ """Extract the text content from the summary event in the view, if any.
93
+
94
+ If there is no summary event or it does not contain text content, returns an
95
+ empty string.
96
+ """
49
97
  summary_event_content: str = ""
50
98
 
51
99
  summary_event = view.summary_event
@@ -54,9 +102,25 @@ class LLMSummarizingCondenser(RollingCondenser):
54
102
  if isinstance(message_content, TextContent):
55
103
  summary_event_content = message_content.text
56
104
 
57
- # Identify events to be forgotten (those not in head or tail)
58
- forgotten_events = view[self.keep_first : -events_from_tail]
59
-
105
+ return summary_event_content
106
+
107
+ def _generate_condensation(
108
+ self,
109
+ summary_event_content: str,
110
+ forgotten_events: Sequence[LLMConvertibleEvent],
111
+ summary_offset: int,
112
+ ) -> Condensation:
113
+ """Generate a condensation by using the condenser's LLM to summarize forgotten
114
+ events.
115
+
116
+ Args:
117
+ summary_event_content: The content of the previous summary event.
118
+ forgotten_events: The list of events to be summarized.
119
+ summary_offset: The index where the summary event should be inserted.
120
+
121
+ Returns:
122
+ Condensation: The generated condensation object.
123
+ """
60
124
  # Convert events to strings for the template
61
125
  event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
62
126
 
@@ -84,6 +148,91 @@ class LLMSummarizingCondenser(RollingCondenser):
84
148
  return Condensation(
85
149
  forgotten_event_ids=[event.id for event in forgotten_events],
86
150
  summary=summary,
87
- summary_offset=self.keep_first,
151
+ summary_offset=summary_offset,
88
152
  llm_response_id=llm_response.id,
89
153
  )
154
+
155
+ def _get_forgotten_events(
156
+ self, view: View, agent_llm: LLM | None = None
157
+ ) -> tuple[Sequence[LLMConvertibleEvent], int]:
158
+ """Identify events to be forgotten and the summary offset.
159
+
160
+ Relies on the condensation reasons to determine how many events we need to drop
161
+ in order to maintain our resource constraints. Uses manipulation indices to
162
+ ensure forgetting ranges respect atomic unit boundaries.
163
+
164
+ Args:
165
+ view: The current view from which to identify forgotten events.
166
+ agent_llm: The LLM used by the agent, required for token-based calculations.
167
+
168
+ Returns:
169
+ A tuple of (events to forget, summary_offset).
170
+ """
171
+ reasons = self.get_condensation_reasons(view, agent_llm=agent_llm)
172
+ assert reasons != set(), "No condensation reasons found."
173
+
174
+ suffix_events_to_keep: set[int] = set()
175
+
176
+ if Reason.REQUEST in reasons:
177
+ target_size = len(view) // 2
178
+ suffix_events_to_keep.add(target_size - self.keep_first - 1)
179
+
180
+ if Reason.EVENTS in reasons:
181
+ target_size = self.max_size // 2
182
+ suffix_events_to_keep.add(target_size - self.keep_first - 1)
183
+
184
+ if Reason.TOKENS in reasons:
185
+ # Compute the number of tokens we need to eliminate to be under half the
186
+ # max_tokens value. We know max_tokens and the agent LLM are not None here
187
+ # because we can't have Reason.TOKENS without them.
188
+ assert self.max_tokens is not None
189
+ assert agent_llm is not None
190
+
191
+ total_tokens = get_total_token_count(view.events, agent_llm)
192
+ tokens_to_reduce = total_tokens - (self.max_tokens // 2)
193
+
194
+ suffix_events_to_keep.add(
195
+ get_suffix_length_for_token_reduction(
196
+ events=view.events[self.keep_first :],
197
+ llm=agent_llm,
198
+ token_reduction=tokens_to_reduce,
199
+ )
200
+ )
201
+
202
+ # We might have multiple reasons to condense, so pick the strictest condensation
203
+ # to ensure all resource constraints are met.
204
+ events_from_tail = min(suffix_events_to_keep)
205
+
206
+ # Calculate naive forgetting end (without considering atomic boundaries)
207
+ naive_end = len(view) - events_from_tail
208
+
209
+ # Find actual forgetting_start: smallest manipulation index > keep_first
210
+ forgetting_start = view.find_next_manipulation_index(
211
+ self.keep_first, strict=True
212
+ )
213
+
214
+ # Find actual forgetting_end: smallest manipulation index >= naive_end
215
+ forgetting_end = view.find_next_manipulation_index(naive_end, strict=False)
216
+
217
+ # Extract events to forget using boundary-aware indices
218
+ forgotten_events = view[forgetting_start:forgetting_end]
219
+
220
+ # Summary offset is the same as forgetting_start
221
+ return forgotten_events, forgetting_start
222
+
223
+ @observe(ignore_inputs=["view", "agent_llm"])
224
+ def get_condensation(
225
+ self, view: View, agent_llm: LLM | None = None
226
+ ) -> Condensation:
227
+ # The condensation is dependent on the events we want to drop and the previous
228
+ # summary.
229
+ summary_event_content = self._get_summary_event_content(view)
230
+ forgotten_events, summary_offset = self._get_forgotten_events(
231
+ view, agent_llm=agent_llm
232
+ )
233
+
234
+ return self._generate_condensation(
235
+ summary_event_content=summary_event_content,
236
+ forgotten_events=forgotten_events,
237
+ summary_offset=summary_offset,
238
+ )
@@ -1,6 +1,7 @@
1
1
  from openhands.sdk.context.condenser.base import CondenserBase
2
2
  from openhands.sdk.context.view import View
3
3
  from openhands.sdk.event.condenser import Condensation
4
+ from openhands.sdk.llm import LLM
4
5
 
5
6
 
6
7
  class NoOpCondenser(CondenserBase):
@@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
9
10
  Primarily intended for testing purposes.
10
11
  """
11
12
 
12
- def condense(self, view: View) -> View | Condensation:
13
+ def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation: # noqa: ARG002
13
14
  return view
@@ -1,15 +1,16 @@
1
1
  from openhands.sdk.context.condenser.base import CondenserBase
2
2
  from openhands.sdk.context.view import View
3
3
  from openhands.sdk.event.condenser import Condensation
4
+ from openhands.sdk.llm import LLM
4
5
 
5
6
 
6
7
  class PipelineCondenser(CondenserBase):
7
8
  """A condenser that applies a sequence of condensers in order.
8
9
 
9
10
  All condensers are defined primarily by their `condense` method, which takes a
10
- `View` and returns either a new `View` or a `Condensation` event. That means we can
11
- chain multiple condensers together by passing `View`s along and exiting early if any
12
- condenser returns a `Condensation`.
11
+ `View` and an optional `agent_llm` parameter, returning either a new `View` or a
12
+ `Condensation` event. That means we can chain multiple condensers together by
13
+ passing `View`s along and exiting early if any condenser returns a `Condensation`.
13
14
 
14
15
  For example:
15
16
 
@@ -20,20 +21,20 @@ class PipelineCondenser(CondenserBase):
20
21
  CondenserC(...),
21
22
  ])
22
23
 
23
- result = condenser.condense(view)
24
+ result = condenser.condense(view, agent_llm=agent_llm)
24
25
 
25
26
  # Doing the same thing without the pipeline condenser requires more boilerplate
26
27
  # for the monadic chaining
27
28
  other_result = view
28
29
 
29
30
  if isinstance(other_result, View):
30
- other_result = CondenserA(...).condense(other_result)
31
+ other_result = CondenserA(...).condense(other_result, agent_llm=agent_llm)
31
32
 
32
33
  if isinstance(other_result, View):
33
- other_result = CondenserB(...).condense(other_result)
34
+ other_result = CondenserB(...).condense(other_result, agent_llm=agent_llm)
34
35
 
35
36
  if isinstance(other_result, View):
36
- other_result = CondenserC(...).condense(other_result)
37
+ other_result = CondenserC(...).condense(other_result, agent_llm=agent_llm)
37
38
 
38
39
  assert result == other_result
39
40
  """
@@ -41,12 +42,12 @@ class PipelineCondenser(CondenserBase):
41
42
  condensers: list[CondenserBase]
42
43
  """The list of condensers to apply in order."""
43
44
 
44
- def condense(self, view: View) -> View | Condensation:
45
+ def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
45
46
  result: View | Condensation = view
46
47
  for condenser in self.condensers:
47
48
  if isinstance(result, Condensation):
48
49
  break
49
- result = condenser.condense(result)
50
+ result = condenser.condense(result, agent_llm=agent_llm)
50
51
  return result
51
52
 
52
53
  def handles_condensation_requests(self) -> bool:
@@ -0,0 +1,149 @@
1
+ from collections.abc import Sequence
2
+
3
+ from openhands.sdk.event.base import LLMConvertibleEvent
4
+ from openhands.sdk.llm import LLM
5
+
6
+
7
+ def get_total_token_count(
8
+ events: Sequence[LLMConvertibleEvent],
9
+ llm: LLM,
10
+ ) -> int:
11
+ """Calculate the total token count for a list of LLM convertible events.
12
+
13
+ This function converts the events to LLM messages and uses the provided LLM
14
+ to count the total number of tokens. This is useful for understanding how many
15
+ tokens a sequence of events will consume in the context window.
16
+
17
+ Args:
18
+ events: List of LLM convertible events to count tokens for
19
+ llm: The LLM instance to use for token counting (uses the litellm's token
20
+ counting utilities)
21
+
22
+ Returns:
23
+ Total token count for all events converted to messages
24
+
25
+ Example:
26
+ >>> from openhands.sdk.llm import LLM
27
+ >>> from openhands.sdk.event.llm_convertible import MessageEvent
28
+ >>>
29
+ >>> llm = LLM(model="gpt-4")
30
+ >>> events = [
31
+ ... MessageEvent.from_text("Hello, how are you?", source="user"),
32
+ ... MessageEvent.from_text("I'm doing great!", source="agent"),
33
+ ... ]
34
+ >>> token_count = get_total_token_count(events, llm)
35
+ >>> print(f"Total tokens: {token_count}")
36
+ """
37
+ messages = LLMConvertibleEvent.events_to_messages(list(events))
38
+ return llm.get_token_count(messages)
39
+
40
+
41
+ def get_shortest_prefix_above_token_count(
42
+ events: Sequence[LLMConvertibleEvent],
43
+ llm: LLM,
44
+ token_count: int,
45
+ ) -> int:
46
+ """Find the length of the shortest prefix whose token count exceeds the target.
47
+
48
+ This function performs a binary search to efficiently find the shortest prefix
49
+ of events that, when converted to messages, has a total token count greater than
50
+ the specified target token count.
51
+
52
+ Args:
53
+ events: List of LLM convertible events to search through
54
+ llm: The LLM instance to use for token counting (uses the model's tokenizer)
55
+ token_count: The target token count threshold
56
+
57
+ Returns:
58
+ The length of the shortest prefix that exceeds the token count.
59
+ Returns 0 if no events are provided.
60
+ Returns len(events) if all events combined don't exceed the token count.
61
+
62
+ Example:
63
+ >>> from openhands.sdk.llm import LLM
64
+ >>> from openhands.sdk.event.llm_convertible import MessageEvent
65
+ >>>
66
+ >>> llm = LLM(model="gpt-4")
67
+ >>> events = [
68
+ ... MessageEvent.from_text("Hi", source="user"),
69
+ ... MessageEvent.from_text("Hello", source="agent"),
70
+ ... MessageEvent.from_text("How are you?", source="user"),
71
+ ... MessageEvent.from_text("Great!", source="agent"),
72
+ ... ]
73
+ >>> prefix_len = get_shortest_prefix_above_token_count(events, llm, 20)
74
+ >>> # prefix_len might be 2 if first 2 events exceed 20 tokens
75
+ """
76
+ if not events:
77
+ return 0
78
+
79
+ # Check if all events combined don't exceed the token count
80
+ total_tokens = get_total_token_count(events, llm)
81
+ if total_tokens <= token_count:
82
+ return len(events)
83
+
84
+ # Binary search for the shortest prefix
85
+ left, right = 1, len(events)
86
+
87
+ while left < right:
88
+ mid = (left + right) // 2
89
+ prefix_tokens = get_total_token_count(events[:mid], llm)
90
+
91
+ if prefix_tokens > token_count:
92
+ # This prefix exceeds the count, try to find a shorter one
93
+ right = mid
94
+ else:
95
+ # This prefix doesn't exceed, we need a longer one
96
+ left = mid + 1
97
+
98
+ return left
99
+
100
+
101
+ def get_suffix_length_for_token_reduction(
102
+ events: Sequence[LLMConvertibleEvent],
103
+ llm: LLM,
104
+ token_reduction: int,
105
+ ) -> int:
106
+ """Find how many suffix events can be kept while reducing tokens by target amount.
107
+
108
+ This function determines the maximum number of events from the end of the list
109
+ that can be retained while ensuring the total token count is reduced by at least
110
+ the specified amount. It uses the get_shortest_prefix_above_token_count function
111
+ to find the prefix that must be removed.
112
+
113
+ Args:
114
+ events: List of LLM convertible events
115
+ llm: The LLM instance to use for token counting (uses the model's tokenizer)
116
+ token_reduction: The minimum number of tokens to reduce by
117
+
118
+ Returns:
119
+ The number of events from the end that can be kept (suffix length).
120
+
121
+ Example:
122
+ >>> from openhands.sdk.llm import LLM
123
+ >>> from openhands.sdk.event.llm_convertible import MessageEvent
124
+ >>>
125
+ >>> llm = LLM(model="gpt-4")
126
+ >>> events = [
127
+ ... MessageEvent.from_text("Event 1", source="user"),
128
+ ... MessageEvent.from_text("Event 2", source="agent"),
129
+ ... MessageEvent.from_text("Event 3", source="user"),
130
+ ... MessageEvent.from_text("Event 4", source="agent"),
131
+ ... ]
132
+ >>> # Suppose total is 100 tokens, and we want to reduce by 40 tokens
133
+ >>> suffix_len = get_suffix_length_for_token_reduction(events, llm, 40)
134
+ >>> # suffix_len tells us how many events from the end we can keep
135
+ >>> # If first 2 events = 45 tokens, suffix_len = 2 (keep last 2 events)
136
+ """
137
+ if not events:
138
+ return 0
139
+
140
+ if token_reduction <= 0:
141
+ return len(events)
142
+
143
+ # Find the shortest prefix that exceeds the token reduction target
144
+ prefix_length = get_shortest_prefix_above_token_count(events, llm, token_reduction)
145
+
146
+ # The suffix length is what remains after removing the prefix
147
+ suffix_length = len(events) - prefix_length
148
+
149
+ return suffix_length