openhands-sdk 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. openhands/sdk/agent/agent.py +60 -27
  2. openhands/sdk/agent/base.py +1 -1
  3. openhands/sdk/context/condenser/base.py +36 -3
  4. openhands/sdk/context/condenser/llm_summarizing_condenser.py +65 -1
  5. openhands/sdk/context/prompts/templates/system_message_suffix.j2 +2 -1
  6. openhands/sdk/context/skills/skill.py +15 -30
  7. openhands/sdk/conversation/base.py +31 -0
  8. openhands/sdk/conversation/conversation.py +5 -0
  9. openhands/sdk/conversation/impl/local_conversation.py +63 -13
  10. openhands/sdk/conversation/impl/remote_conversation.py +128 -13
  11. openhands/sdk/conversation/state.py +19 -0
  12. openhands/sdk/conversation/stuck_detector.py +18 -9
  13. openhands/sdk/llm/__init__.py +16 -0
  14. openhands/sdk/llm/auth/__init__.py +28 -0
  15. openhands/sdk/llm/auth/credentials.py +157 -0
  16. openhands/sdk/llm/auth/openai.py +762 -0
  17. openhands/sdk/llm/llm.py +175 -20
  18. openhands/sdk/llm/message.py +21 -11
  19. openhands/sdk/llm/options/responses_options.py +8 -7
  20. openhands/sdk/llm/utils/model_features.py +2 -0
  21. openhands/sdk/llm/utils/verified_models.py +3 -0
  22. openhands/sdk/mcp/tool.py +27 -4
  23. openhands/sdk/secret/secrets.py +13 -1
  24. openhands/sdk/workspace/remote/base.py +8 -3
  25. openhands/sdk/workspace/remote/remote_workspace_mixin.py +40 -7
  26. {openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/METADATA +1 -1
  27. {openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/RECORD +29 -26
  28. {openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/WHEEL +0 -0
  29. {openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/top_level.txt +0 -0
@@ -67,6 +67,10 @@ from openhands.sdk.tool.builtins import (
67
67
  logger = get_logger(__name__)
68
68
  maybe_init_laminar()
69
69
 
70
+ # Maximum number of events to scan during init_state defensive checks.
71
+ # SystemPromptEvent must appear within this prefix (at index 0 or 1).
72
+ INIT_STATE_PREFIX_SCAN_WINDOW = 3
73
+
70
74
 
71
75
  class Agent(AgentBase):
72
76
  """Main agent implementation for OpenHands.
@@ -102,53 +106,82 @@ class Agent(AgentBase):
102
106
  state: ConversationState,
103
107
  on_event: ConversationCallbackType,
104
108
  ) -> None:
109
+ """Initialize conversation state.
110
+
111
+ Invariants enforced by this method:
112
+ - If a SystemPromptEvent is already present, it must be within the first 3
113
+ events (index 0 or 1 in practice; index 2 is included in the scan window
114
+ to detect a user message appearing before the system prompt).
115
+ - A user MessageEvent should not appear before the SystemPromptEvent.
116
+
117
+ These invariants keep event ordering predictable for downstream components
118
+ (condenser, UI, etc.) and also prevent accidentally materializing the full
119
+ event history during initialization.
120
+ """
105
121
  super().init_state(state, on_event=on_event)
106
- # TODO(openhands): we should add test to test this init_state will actually
107
- # modify state in-place
108
122
 
109
123
  # Defensive check: Analyze state to detect unexpected initialization scenarios
110
124
  # These checks help diagnose issues related to lazy loading and event ordering
111
125
  # See: https://github.com/OpenHands/software-agent-sdk/issues/1785
112
- events = list(state.events)
113
- has_system_prompt = any(isinstance(e, SystemPromptEvent) for e in events)
126
+ #
127
+ # NOTE: len() is O(1) for EventLog (file-backed implementation).
128
+ event_count = len(state.events)
129
+
130
+ # NOTE: state.events is intentionally an EventsListBase (Sequence-like), not
131
+ # a plain list. Avoid materializing the full history via list(state.events)
132
+ # here (conversations can reach 30k+ events).
133
+ #
134
+ # Invariant: when init_state is called, SystemPromptEvent (if present) must be
135
+ # at index 0 or 1.
136
+ #
137
+ # Rationale:
138
+ # - Local conversations start empty and init_state is responsible for adding
139
+ # the SystemPromptEvent as the first event.
140
+ # - Remote conversations may receive an initial ConversationStateUpdateEvent
141
+ # from the agent-server immediately after subscription. In a typical remote
142
+ # session prefix you may see:
143
+ # [ConversationStateUpdateEvent, SystemPromptEvent, MessageEvent, ...]
144
+ #
145
+ # We intentionally only inspect the first few events (cheap for both local and
146
+ # remote) to enforce this invariant.
147
+ prefix_events = state.events[:INIT_STATE_PREFIX_SCAN_WINDOW]
148
+
149
+ has_system_prompt = any(isinstance(e, SystemPromptEvent) for e in prefix_events)
114
150
  has_user_message = any(
115
- isinstance(e, MessageEvent) and e.source == "user" for e in events
151
+ isinstance(e, MessageEvent) and e.source == "user" for e in prefix_events
116
152
  )
117
- has_any_llm_event = any(isinstance(e, LLMConvertibleEvent) for e in events)
118
-
119
153
  # Log state for debugging initialization order issues
120
154
  logger.debug(
121
155
  f"init_state called: conversation_id={state.id}, "
122
- f"event_count={len(events)}, "
156
+ f"event_count={event_count}, "
123
157
  f"has_system_prompt={has_system_prompt}, "
124
- f"has_user_message={has_user_message}, "
125
- f"has_any_llm_event={has_any_llm_event}"
158
+ f"has_user_message={has_user_message}"
126
159
  )
127
160
 
128
161
  if has_system_prompt:
129
- # SystemPromptEvent already exists - this is unexpected during normal flow
130
- # but could happen in persistence/resume scenarios
131
- logger.warning(
132
- f"init_state called but SystemPromptEvent already exists. "
133
- f"conversation_id={state.id}, event_count={len(events)}. "
134
- f"This may indicate double initialization or a resume scenario."
162
+ # Restoring/resuming conversations is normal: a system prompt already
163
+ # present means this conversation was initialized previously.
164
+ logger.debug(
165
+ "init_state: SystemPromptEvent already present; skipping init. "
166
+ f"conversation_id={state.id}, event_count={event_count}."
135
167
  )
136
168
  return
137
169
 
138
- # Assert: If there are user messages but no system prompt, something is wrong
139
- # The system prompt should always be added before any user messages
170
+ # Assert: A user message should never appear before the system prompt.
171
+ #
172
+ # NOTE: This is a best-effort check based on the first few events only.
173
+ # Remote conversations can include a ConversationStateUpdateEvent near the
174
+ # start, so we scan a small prefix window.
140
175
  if has_user_message:
141
- event_types = [type(e).__name__ for e in events]
176
+ event_types = [type(e).__name__ for e in prefix_events]
142
177
  logger.error(
143
- f"init_state: User message exists without SystemPromptEvent! "
144
- f"conversation_id={state.id}, events={event_types}"
178
+ f"init_state: User message found in prefix before SystemPromptEvent! "
179
+ f"conversation_id={state.id}, prefix_events={event_types}"
145
180
  )
146
- assert not has_user_message, (
147
- f"Unexpected state: User message exists before SystemPromptEvent. "
148
- f"conversation_id={state.id}, event_count={len(events)}, "
149
- f"event_types={event_types}. "
150
- f"This indicates an initialization order bug - init_state should be "
151
- f"called before any user messages are added to the conversation."
181
+ raise AssertionError(
182
+ "Unexpected state: user message exists before SystemPromptEvent. "
183
+ f"conversation_id={state.id}, event_count={event_count}, "
184
+ f"prefix_event_types={event_types}."
152
185
  )
153
186
 
154
187
  # Prepare system message
@@ -503,5 +503,5 @@ class AgentBase(DiscriminatedUnionMixin, ABC):
503
503
  RuntimeError: If the agent has not been initialized.
504
504
  """
505
505
  if not self._initialized:
506
- raise RuntimeError("Agent not initialized; call initialize() before use")
506
+ raise RuntimeError("Agent not initialized; call _initialize() before use")
507
507
  return self._tools
@@ -103,6 +103,23 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
103
103
  `View` to be passed to the LLM.
104
104
  """
105
105
 
106
+ def hard_context_reset(
107
+ self,
108
+ view: View, # noqa: ARG002
109
+ agent_llm: LLM | None = None, # noqa: ARG002
110
+ ) -> Condensation | None:
111
+ """Perform a hard context reset, if supported by the condenser.
112
+
113
+ By default, rolling condensers do not support hard context resets. Override this
114
+ method to implement hard context reset logic by returning a `Condensation`
115
+ object.
116
+
117
+ This method is invoked when:
118
+ - A HARD condensation requirement is triggered (e.g., by user request)
119
+ - But the condenser raises a NoCondensationAvailableException error
120
+ """
121
+ return None
122
+
106
123
  @abstractmethod
107
124
  def condensation_requirement(
108
125
  self, view: View, agent_llm: LLM | None = None
@@ -142,9 +159,25 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
142
159
  # we do so immediately.
143
160
  return view
144
161
 
145
- # Otherwise re-raise the exception.
146
- else:
147
- raise e
162
+ elif request == CondensationRequirement.HARD:
163
+ # The agent has found itself in a situation where it cannot proceed
164
+ # without condensation, but the condenser cannot provide one. We'll
165
+ # try to recover from this situation by performing a hard context
166
+ # reset, if supported by the condenser.
167
+ try:
168
+ hard_reset_condensation = self.hard_context_reset(
169
+ view, agent_llm=agent_llm
170
+ )
171
+ if hard_reset_condensation is not None:
172
+ return hard_reset_condensation
173
+
174
+ # And if something goes wrong with the hard reset make sure we keep
175
+ # both errors in the stack
176
+ except Exception as hard_reset_exception:
177
+ raise hard_reset_exception from e
178
+
179
+ # In all other situations re-raise the exception.
180
+ raise e
148
181
 
149
182
  # Otherwise we're safe to just return the view.
150
183
  else:
@@ -18,7 +18,12 @@ from openhands.sdk.context.view import View
18
18
  from openhands.sdk.event.base import LLMConvertibleEvent
19
19
  from openhands.sdk.event.condenser import Condensation
20
20
  from openhands.sdk.llm import LLM, Message, TextContent
21
+ from openhands.sdk.logger import get_logger
21
22
  from openhands.sdk.observability.laminar import observe
23
+ from openhands.sdk.utils import maybe_truncate
24
+
25
+
26
+ logger = get_logger(__name__)
22
27
 
23
28
 
24
29
  class Reason(Enum):
@@ -47,6 +52,14 @@ class LLMSummarizingCondenser(RollingCondenser):
47
52
  `keep_first` events in the conversation will never be condensed or summarized.
48
53
  """
49
54
 
55
+ hard_context_reset_max_retries: int = Field(default=5, gt=0)
56
+ """Number of attempts to perform hard context reset before raising an error."""
57
+
58
+ hard_context_reset_context_scaling: float = Field(default=0.8, gt=0.0, lt=1.0)
59
+ """When performing hard context reset, if the summarization fails, reduce the max
60
+ size of each event string by this factor and retry.
61
+ """
62
+
50
63
  @model_validator(mode="after")
51
64
  def validate_keep_first_vs_max_size(self):
52
65
  events_from_tail = self.max_size // 2 - self.keep_first - 1
@@ -120,6 +133,7 @@ class LLMSummarizingCondenser(RollingCondenser):
120
133
  self,
121
134
  forgotten_events: Sequence[LLMConvertibleEvent],
122
135
  summary_offset: int,
136
+ max_event_str_length: int | None = None,
123
137
  ) -> Condensation:
124
138
  """Generate a condensation by using the condenser's LLM to summarize forgotten
125
139
  events.
@@ -127,6 +141,8 @@ class LLMSummarizingCondenser(RollingCondenser):
127
141
  Args:
128
142
  forgotten_events: The list of events to be summarized.
129
143
  summary_offset: The index where the summary event should be inserted.
144
+ max_event_str_length: Optional maximum length for each event string. If
145
+ provided, event strings longer than this will be truncated.
130
146
 
131
147
  Returns:
132
148
  Condensation: The generated condensation object.
@@ -137,7 +153,10 @@ class LLMSummarizingCondenser(RollingCondenser):
137
153
  assert len(forgotten_events) > 0, "No events to condense."
138
154
 
139
155
  # Convert events to strings for the template
140
- event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
156
+ event_strings = [
157
+ maybe_truncate(str(forgotten_event), truncate_after=max_event_str_length)
158
+ for forgotten_event in forgotten_events
159
+ ]
141
160
 
142
161
  prompt = render_template(
143
162
  os.path.join(os.path.dirname(__file__), "prompts"),
@@ -232,6 +251,51 @@ class LLMSummarizingCondenser(RollingCondenser):
232
251
  # Summary offset is the same as forgetting_start
233
252
  return forgotten_events, forgetting_start
234
253
 
254
+ @observe(ignore_inputs=["view", "agent_llm"])
255
+ def hard_context_reset(
256
+ self,
257
+ view: View,
258
+ agent_llm: LLM | None = None, # noqa: ARG002
259
+ ) -> Condensation | None:
260
+ """Perform a hard context reset by summarizing all events in the view.
261
+
262
+ Depending on how the hard context reset is triggered, this may fail (e.g., if
263
+ the view is too large for the summarizing LLM to handle). In that case, we keep
264
+ trimming down the contents until a summary can be generated.
265
+ """
266
+ max_event_str_length: int | None = None
267
+ attempts_remaining: int = self.hard_context_reset_max_retries
268
+
269
+ while attempts_remaining > 0:
270
+ try:
271
+ return self._generate_condensation(
272
+ forgotten_events=view.events,
273
+ summary_offset=0,
274
+ max_event_str_length=max_event_str_length,
275
+ )
276
+ except Exception as e:
277
+ # If we haven't set a max_event_str_length yet, set it as the largest
278
+ # event string length.
279
+ if max_event_str_length is None:
280
+ max_event_str_length = max(len(str(event)) for event in view.events)
281
+
282
+ # Since the summarization failed, reduce the max_event_str_length by 20%
283
+ assert max_event_str_length is not None
284
+ max_event_str_length = int(
285
+ max_event_str_length * self.hard_context_reset_context_scaling
286
+ )
287
+
288
+ # Log the exception so we can track these failures
289
+ logger.warning(
290
+ f"Hard context reset summarization failed with exception: {e}. "
291
+ f"Reducing max event size to {max_event_str_length} and retrying."
292
+ )
293
+
294
+ attempts_remaining -= 1
295
+
296
+ logger.error("Hard context reset summarization failed after multiple attempts.")
297
+ return None
298
+
235
299
  @observe(ignore_inputs=["view", "agent_llm"])
236
300
  def get_condensation(
237
301
  self, view: View, agent_llm: LLM | None = None
@@ -27,9 +27,10 @@ You can also directly look up a skill's full content by reading its location pat
27
27
  <CUSTOM_SECRETS>
28
28
  ### Credential Access
29
29
  * Automatic secret injection: When you reference a registered secret key in your bash command, the secret value will be automatically exported as an environment variable before your command executes.
30
- * How to use secrets: Simply reference the secret key in your command (e.g., `echo ${GITHUB_TOKEN:0:8}` or `curl -H "Authorization: Bearer $API_KEY" https://api.example.com`). The system will detect the key name in your command text and export it as environment variable before it executes your command.
30
+ * How to use secrets: Simply reference the secret key in your command (e.g., `curl -H "Authorization: Bearer $API_KEY" https://api.example.com`). The system will detect the key name in your command text and export it as environment variable before it executes your command.
31
31
  * Secret detection: The system performs case-insensitive matching to find secret keys in your command text. If a registered secret key appears anywhere in your command, its value will be made available as an environment variable.
32
32
  * Security: Secret values are automatically masked in command output to prevent accidental exposure. You will see `<secret-hidden>` instead of the actual secret value in the output.
33
+ * Avoid exposing raw secrets: Never echo or print the full value of secrets (e.g., avoid `echo $SECRET`). The conversation history may be logged or shared, and exposing raw secret values could compromise security. Instead, use secrets directly in commands where they serve their intended purpose (e.g., in curl headers or git URLs).
33
34
  * Refreshing expired secrets: Some secrets (like GITHUB_TOKEN) may be updated periodically or expire over time. If a secret stops working (e.g., authentication failures), try using it again in a new command - the system should automatically use the refreshed value. For example, if GITHUB_TOKEN was used in a git remote URL and later expired, you can update the remote URL with the current token: `git remote set-url origin https://${GITHUB_TOKEN}@github.com/username/repo.git` to pick up the refreshed token value.
34
35
  * If it still fails, report it to the user.
35
36
 
@@ -27,15 +27,10 @@ from openhands.sdk.context.skills.utils import (
27
27
  validate_skill_name,
28
28
  )
29
29
  from openhands.sdk.logger import get_logger
30
- from openhands.sdk.utils import maybe_truncate
31
30
 
32
31
 
33
32
  logger = get_logger(__name__)
34
33
 
35
- # Maximum characters for third-party skill files (e.g., AGENTS.md, CLAUDE.md, GEMINI.md)
36
- # These files are always active, so we want to keep them reasonably sized
37
- THIRD_PARTY_SKILL_MAX_CHARS = 10_000
38
-
39
34
 
40
35
  class SkillInfo(BaseModel):
41
36
  """Lightweight representation of a skill's essential information.
@@ -485,32 +480,14 @@ class Skill(BaseModel):
485
480
  """Handle third-party skill files (e.g., .cursorrules, AGENTS.md).
486
481
 
487
482
  Creates a Skill with None trigger (always active) if the file type
488
- is recognized. Truncates content if it exceeds the limit.
483
+ is recognized.
489
484
  """
490
485
  skill_name = cls.PATH_TO_THIRD_PARTY_SKILL_NAME.get(path.name.lower())
491
486
 
492
487
  if skill_name is not None:
493
- truncated_content = maybe_truncate(
494
- file_content,
495
- truncate_after=THIRD_PARTY_SKILL_MAX_CHARS,
496
- truncate_notice=(
497
- f"\n\n<TRUNCATED><NOTE>The file {path} exceeded the "
498
- f"maximum length ({THIRD_PARTY_SKILL_MAX_CHARS} "
499
- f"characters) and has been truncated. Only the "
500
- f"beginning and end are shown. You can read the full "
501
- f"file if needed.</NOTE>\n\n"
502
- ),
503
- )
504
-
505
- if len(file_content) > THIRD_PARTY_SKILL_MAX_CHARS:
506
- logger.warning(
507
- f"Third-party skill file {path} ({len(file_content)} chars) "
508
- f"exceeded limit ({THIRD_PARTY_SKILL_MAX_CHARS} chars), truncating"
509
- )
510
-
511
488
  return Skill(
512
489
  name=skill_name,
513
- content=truncated_content,
490
+ content=file_content,
514
491
  source=str(path),
515
492
  trigger=None,
516
493
  )
@@ -732,10 +709,16 @@ def load_user_skills() -> list[Skill]:
732
709
  def load_project_skills(work_dir: str | Path) -> list[Skill]:
733
710
  """Load skills from project-specific directories.
734
711
 
735
- Searches for skills in {work_dir}/.openhands/skills/ and
736
- {work_dir}/.openhands/microagents/ (legacy). Skills from both
737
- directories are merged, with skills/ taking precedence for
738
- duplicate names.
712
+ Searches for skills in {work_dir}/.agents/skills/,
713
+ {work_dir}/.openhands/skills/, and {work_dir}/.openhands/microagents/
714
+ (legacy). Skills are merged in priority order, with earlier directories
715
+ taking precedence for duplicate names.
716
+
717
+ Use .agents/skills for new skills. .openhands/skills is the legacy
718
+ OpenHands location, and .openhands/microagents is deprecated.
719
+
720
+ Example: If "my-skill" exists in both .agents/skills/ and
721
+ .openhands/skills/, the version from .agents/skills/ is used.
739
722
 
740
723
  Also loads third-party skill files (AGENTS.md, .cursorrules, etc.)
741
724
  directly from the work directory.
@@ -768,8 +751,10 @@ def load_project_skills(work_dir: str | Path) -> list[Skill]:
768
751
  except (SkillError, OSError) as e:
769
752
  logger.warning(f"Failed to load third-party skill from {path}: {e}")
770
753
 
771
- # Load project-specific skills from .openhands/skills and legacy microagents
754
+ # Load project-specific skills from .agents/skills, .openhands/skills,
755
+ # and legacy microagents (priority order; first wins for duplicates)
772
756
  project_skills_dirs = [
757
+ work_dir / ".agents" / "skills",
773
758
  work_dir / ".openhands" / "skills",
774
759
  work_dir / ".openhands" / "microagents", # Legacy support
775
760
  ]
@@ -23,6 +23,7 @@ from openhands.sdk.security.confirmation_policy import (
23
23
  ConfirmationPolicyBase,
24
24
  NeverConfirm,
25
25
  )
26
+ from openhands.sdk.tool.schema import Action, Observation
26
27
  from openhands.sdk.workspace.base import BaseWorkspace
27
28
 
28
29
 
@@ -267,6 +268,36 @@ class BaseConversation(ABC):
267
268
  """
268
269
  ...
269
270
 
271
+ @abstractmethod
272
+ def execute_tool(self, tool_name: str, action: Action) -> Observation:
273
+ """Execute a tool directly without going through the agent loop.
274
+
275
+ This method allows executing tools before or outside of the normal
276
+ conversation.run() flow. It handles agent initialization automatically,
277
+ so tools can be executed before the first run() call.
278
+
279
+ Note: This method bypasses the agent loop, including confirmation
280
+ policies and security analyzer checks. Callers are responsible for
281
+ applying any safeguards before executing potentially destructive tools.
282
+
283
+ This is useful for:
284
+ - Pre-run setup operations (e.g., indexing repositories)
285
+ - Manual tool execution for environment setup
286
+ - Testing tool behavior outside the agent loop
287
+
288
+ Args:
289
+ tool_name: The name of the tool to execute (e.g., "sleeptime_compute")
290
+ action: The action to pass to the tool executor
291
+
292
+ Returns:
293
+ The observation returned by the tool execution
294
+
295
+ Raises:
296
+ KeyError: If the tool is not found in the agent's tools
297
+ NotImplementedError: If the tool has no executor
298
+ """
299
+ ...
300
+
270
301
  @staticmethod
271
302
  def compose_callbacks(callbacks: Iterable[CallbackType]) -> CallbackType:
272
303
  """Compose multiple callbacks into a single callback function.
@@ -74,6 +74,7 @@ class Conversation:
74
74
  type[ConversationVisualizerBase] | ConversationVisualizerBase | None
75
75
  ) = DefaultConversationVisualizer,
76
76
  secrets: dict[str, SecretValue] | dict[str, str] | None = None,
77
+ delete_on_close: bool = False,
77
78
  ) -> "LocalConversation": ...
78
79
 
79
80
  @overload
@@ -96,6 +97,7 @@ class Conversation:
96
97
  type[ConversationVisualizerBase] | ConversationVisualizerBase | None
97
98
  ) = DefaultConversationVisualizer,
98
99
  secrets: dict[str, SecretValue] | dict[str, str] | None = None,
100
+ delete_on_close: bool = False,
99
101
  ) -> "RemoteConversation": ...
100
102
 
101
103
  def __new__(
@@ -118,6 +120,7 @@ class Conversation:
118
120
  type[ConversationVisualizerBase] | ConversationVisualizerBase | None
119
121
  ) = DefaultConversationVisualizer,
120
122
  secrets: dict[str, SecretValue] | dict[str, str] | None = None,
123
+ delete_on_close: bool = False,
121
124
  ) -> BaseConversation:
122
125
  from openhands.sdk.conversation.impl.local_conversation import LocalConversation
123
126
  from openhands.sdk.conversation.impl.remote_conversation import (
@@ -143,6 +146,7 @@ class Conversation:
143
146
  visualizer=visualizer,
144
147
  workspace=workspace,
145
148
  secrets=secrets,
149
+ delete_on_close=delete_on_close,
146
150
  )
147
151
 
148
152
  return LocalConversation(
@@ -159,4 +163,5 @@ class Conversation:
159
163
  workspace=workspace,
160
164
  persistence_dir=persistence_dir,
161
165
  secrets=secrets,
166
+ delete_on_close=delete_on_close,
162
167
  )
@@ -46,6 +46,7 @@ from openhands.sdk.security.analyzer import SecurityAnalyzerBase
46
46
  from openhands.sdk.security.confirmation_policy import (
47
47
  ConfirmationPolicyBase,
48
48
  )
49
+ from openhands.sdk.tool.schema import Action, Observation
49
50
  from openhands.sdk.utils.cipher import Cipher
50
51
  from openhands.sdk.workspace import LocalWorkspace
51
52
 
@@ -65,6 +66,7 @@ class LocalConversation(BaseConversation):
65
66
  llm_registry: LLMRegistry
66
67
  _cleanup_initiated: bool
67
68
  _hook_processor: HookEventProcessor | None
69
+ delete_on_close: bool = True
68
70
  # Plugin lazy loading state
69
71
  _plugin_specs: list[PluginSource] | None
70
72
  _resolved_plugins: list[ResolvedPluginSource] | None
@@ -90,6 +92,7 @@ class LocalConversation(BaseConversation):
90
92
  type[ConversationVisualizerBase] | ConversationVisualizerBase | None
91
93
  ) = DefaultConversationVisualizer,
92
94
  secrets: Mapping[str, SecretValue] | None = None,
95
+ delete_on_close: bool = True,
93
96
  cipher: Cipher | None = None,
94
97
  **_: object,
95
98
  ):
@@ -242,6 +245,7 @@ class LocalConversation(BaseConversation):
242
245
 
243
246
  atexit.register(self.close)
244
247
  self._start_observability_span(str(desired_id))
248
+ self.delete_on_close = delete_on_close
245
249
 
246
250
  @property
247
251
  def id(self) -> ConversationID:
@@ -708,20 +712,23 @@ class LocalConversation(BaseConversation):
708
712
  except AttributeError:
709
713
  # Object may be partially constructed; span fields may be missing.
710
714
  pass
711
- try:
712
- tools_map = self.agent.tools_map
713
- except (AttributeError, RuntimeError):
714
- # Agent not initialized or partially constructed
715
- return
716
- for tool in tools_map.values():
715
+ if self.delete_on_close:
717
716
  try:
718
- executable_tool = tool.as_executable()
719
- executable_tool.executor.close()
720
- except NotImplementedError:
721
- # Tool has no executor, skip it without erroring
722
- continue
723
- except Exception as e:
724
- logger.warning(f"Error closing executor for tool '{tool.name}': {e}")
717
+ tools_map = self.agent.tools_map
718
+ except (AttributeError, RuntimeError):
719
+ # Agent not initialized or partially constructed
720
+ return
721
+ for tool in tools_map.values():
722
+ try:
723
+ executable_tool = tool.as_executable()
724
+ executable_tool.executor.close()
725
+ except NotImplementedError:
726
+ # Tool has no executor, skip it without erroring
727
+ continue
728
+ except Exception as e:
729
+ logger.warning(
730
+ f"Error closing executor for tool '{tool.name}': {e}"
731
+ )
725
732
 
726
733
  def ask_agent(self, question: str) -> str:
727
734
  """Ask the agent a simple, stateless question and get a direct LLM response.
@@ -861,6 +868,49 @@ class LocalConversation(BaseConversation):
861
868
 
862
869
  logger.info("Condensation request processed")
863
870
 
871
+ def execute_tool(self, tool_name: str, action: Action) -> Observation:
872
+ """Execute a tool directly without going through the agent loop.
873
+
874
+ This method allows executing tools before or outside of the normal
875
+ conversation.run() flow. It handles agent initialization automatically,
876
+ so tools can be executed before the first run() call.
877
+
878
+ Note: This method bypasses the agent loop, including confirmation
879
+ policies and security analyzer checks. Callers are responsible for
880
+ applying any safeguards before executing potentially destructive tools.
881
+
882
+ This is useful for:
883
+ - Pre-run setup operations (e.g., indexing repositories)
884
+ - Manual tool execution for environment setup
885
+ - Testing tool behavior outside the agent loop
886
+
887
+ Args:
888
+ tool_name: The name of the tool to execute (e.g., "sleeptime_compute")
889
+ action: The action to pass to the tool executor
890
+
891
+ Returns:
892
+ The observation returned by the tool execution
893
+
894
+ Raises:
895
+ KeyError: If the tool is not found in the agent's tools
896
+ NotImplementedError: If the tool has no executor
897
+ """
898
+ # Ensure agent is initialized (loads plugins and initializes tools)
899
+ self._ensure_agent_ready()
900
+
901
+ # Get the tool from the agent's tools_map
902
+ tool = self.agent.tools_map.get(tool_name)
903
+ if tool is None:
904
+ available_tools = list(self.agent.tools_map.keys())
905
+ raise KeyError(
906
+ f"Tool '{tool_name}' not found. Available tools: {available_tools}"
907
+ )
908
+
909
+ # Execute the tool
910
+ if not tool.executor:
911
+ raise NotImplementedError(f"Tool '{tool_name}' has no executor")
912
+ return tool(action, self)
913
+
864
914
  def __del__(self) -> None:
865
915
  """Ensure cleanup happens when conversation is destroyed."""
866
916
  try: