react-agent-harness 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {react_agent_harness-0.1.0/react_agent_harness.egg-info → react_agent_harness-0.2.0}/PKG-INFO +2 -1
  2. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/README.md +92 -0
  3. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/agents/base.py +92 -22
  4. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/events.py +2 -0
  5. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/hitl.py +36 -2
  6. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/runtime.py +96 -65
  7. react_agent_harness-0.2.0/harness/steering.py +674 -0
  8. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/pyproject.toml +7 -2
  9. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0/react_agent_harness.egg-info}/PKG-INFO +2 -1
  10. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/SOURCES.txt +2 -0
  11. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/requires.txt +1 -0
  12. react_agent_harness-0.2.0/tests/test_steering.py +663 -0
  13. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/LICENSE +0 -0
  14. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/agents/__init__.py +0 -0
  15. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/__init__.py +0 -0
  16. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/annotation.py +0 -0
  17. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/checkpoint.py +0 -0
  18. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/executor_bridge.py +0 -0
  19. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/llm/__init__.py +0 -0
  20. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/llm/openai.py +0 -0
  21. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/otel.py +0 -0
  22. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/harness/utils.py +0 -0
  23. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/__init__.py +0 -0
  24. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/episodic_lance.py +0 -0
  25. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/manager.py +0 -0
  26. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/redis_store.py +0 -0
  27. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/stores.py +0 -0
  28. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/memory/working.py +0 -0
  29. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/orchestrator/__init__.py +0 -0
  30. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/orchestrator/planner.py +0 -0
  31. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  32. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  33. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/setup.cfg +0 -0
  34. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_agents_base.py +0 -0
  35. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_annotation.py +0 -0
  36. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_checkpoint_resume.py +0 -0
  37. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_executor_bridge.py +0 -0
  38. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_http_fetch.py +0 -0
  39. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_mcp_adapter.py +0 -0
  40. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_memory.py +0 -0
  41. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_openai_llm.py +0 -0
  42. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_orchestrator.py +0 -0
  43. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_otel.py +0 -0
  44. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_parse_action_json.py +0 -0
  45. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_redis_store.py +0 -0
  46. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_streaming.py +0 -0
  47. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_vision.py +0 -0
  48. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tests/test_working_memory.py +0 -0
  49. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/__init__.py +0 -0
  50. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/builtin/__init__.py +0 -0
  51. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/builtin/fetch_image.py +0 -0
  52. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/builtin/http_fetch.py +0 -0
  53. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/mcp/__init__.py +0 -0
  54. {react_agent_harness-0.1.0 → react_agent_harness-0.2.0}/tools/mcp/adapter.py +0 -0
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
7
+ Requires-Dist: prompt_toolkit>=3.0
7
8
  Provides-Extra: lance
8
9
  Requires-Dist: lancedb>=0.6; extra == "lance"
9
10
  Requires-Dist: pyarrow>=14; extra == "lance"
@@ -38,6 +38,7 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
38
38
  harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
39
39
  harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
40
40
  harness/hitl.py HITL approval gate — interactive CLI, session-allow list
41
+ harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
41
42
  harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
42
43
  harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
43
44
  harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
@@ -720,3 +721,94 @@ When the human types a correction instead of y/n:
720
721
 
721
722
  The `annotation_store` and `checkpoint_store` are independent — both can be
722
723
  wired simultaneously for RLHF data collection with HITL review.
724
+
725
+ ## Async steering
726
+
727
+ HITL is synchronous — it only fires when a gated tool is about to run. For
728
+ out-of-band course-correction (HTTP handler, supervisor agent, file watcher,
729
+ or a human typing in the terminal), each `BaseAgent` exposes a
730
+ non-blocking `steer(text)` method. Items are drained at the **top of each
731
+ ReAct iteration**, before the per-step checkpoint write and before the
732
+ next think, then appended to `WorkingMemory` as a `Human guidance: <text>`
733
+ user message. The LLM sees them on the next think and adjusts. One
734
+ `HUMAN_GUIDANCE` `BusEvent` fires per drained item.
735
+
736
+ Why a queue instead of writing straight to `WorkingMemory`: `steer()` is
737
+ synchronous and callable from any coroutine; `WorkingMemory.append` is
738
+ async (eviction can call the LLM). The queue is the producer/consumer
739
+ boundary, enforces step-boundary delivery, and keeps WM single-writer.
740
+
741
+ ### Programmatic API (always available)
742
+
743
+ ```python
744
+ agent.steer("skip the legal database, use academic sources only")
745
+ ```
746
+
747
+ Fires immediately; the agent picks it up at the next step boundary.
748
+ Worst-case latency = remaining tool time + next-think time.
749
+
750
+ ### Sources via factory (so orchestrated agents are reachable)
751
+
752
+ `BaseAgent` and `AgentRuntime` both accept `steering_source_factory` — a
753
+ callable `(agent) -> async ctx mgr`. The agent enters the source on
754
+ `run_stream`, exits on completion. No live-agent registry; agents the
755
+ runtime constructs internally still get steering.
756
+
757
+ Two built-in factories:
758
+
759
+ ```python
760
+ from harness.steering import file_steering_factory, stdin_steering_factory
761
+
762
+ # 1. File-based — one file per agent, polled for appends (no shared resource)
763
+ runtime = AgentRuntime(
764
+ ...,
765
+ steering_source_factory=file_steering_factory(
766
+ "/tmp/ah-{run_id}-{agent_id}.steer"
767
+ ),
768
+ )
769
+ # Steer from any other terminal:
770
+ # echo "wrap up and synthesise" >> /tmp/ah-<run_id>-researcher.steer
771
+
772
+ # 2. Stdin-based — single shared StdinRouter with prefix routing
773
+ runtime = AgentRuntime(
774
+ ...,
775
+ steering_source_factory=stdin_steering_factory(),
776
+ )
777
+ # At the terminal:
778
+ # researcher: skip the legal db, focus on academic
779
+ # writer: keep the report under 500 words
780
+ # *: stop after this step
781
+ ```
782
+
783
+ Single-agent stdin runs accept lines with no prefix. Multi-agent runs
784
+ require `agent_id: text` (or `*: text` for broadcast); unknown or
785
+ unprefixed lines print a stderr hint and are discarded.
786
+
787
+ The stdin factory's underlying `StdinRouter` is started/stopped
788
+ automatically — the runtime detects the factory's async-context-manager
789
+ shape and wraps `dispatch_stream` / `run_stream` / `run_routed_stream`
790
+ around it. Ref-counted so nested calls (`dispatch_stream → run_stream`)
791
+ don't double-start the router.
792
+
793
+ ### HITL coordination
794
+
795
+ When a `StdinRouter` is active, HITL calls `router.claim_next_line()`
796
+ **before** printing its approval banner — the next stdin line resolves
797
+ HITL's pending Future and bypasses pub/sub. After resolution, subsequent
798
+ lines route to steering subscribers normally. When no router is active,
799
+ HITL falls back to a standalone `prompt_toolkit` session, ensuring consistent
800
+ key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths.
801
+
802
+ ### Constraints
803
+
804
+ - Steering arrives **between steps**, never mid-tool, never mid-think.
805
+ Tools that are already running complete; the LLM stream that's
806
+ already producing completes; guidance lands at the next safe boundary.
807
+ - Guidance queued **after** the LLM emits `action: "finish"` is lost —
808
+ the agent already decided it's done.
809
+ - Crash between drain and next checkpoint write → the queued items are
810
+ in the persisted WM. Crash between checkpoint write and next drain →
811
+ lost; re-steer after `--resume`.
812
+
813
+ See `examples/complex_sysaudit_demo.py` for stdin steering across three
814
+ agents alongside HITL on the shell tool.
@@ -27,6 +27,7 @@ Token management:
27
27
  from __future__ import annotations
28
28
 
29
29
  import asyncio
30
+ import contextlib
30
31
  import json
31
32
  import logging
32
33
  import uuid
@@ -132,6 +133,7 @@ class BaseAgent:
132
133
  guard,
133
134
  llm,
134
135
  checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
136
+ steering_source_factory: Any | None = None, # (BaseAgent) -> async ctx mgr
135
137
  ) -> None:
136
138
  self.config = config
137
139
  self.role = config.role # exposed for orchestrator planner prompt
@@ -145,10 +147,60 @@ class BaseAgent:
145
147
  self._task: str = ""
146
148
  self._last_think_error: str | None = None
147
149
  self._ckp_id: str = "" # f"{run_id}:{agent_id}" — unique per agent per run
150
+ # Async steering queue — items drained at the top of each ReAct
151
+ # step (before checkpoint, before think). Created eagerly so
152
+ # callers can steer() before run_stream starts.
153
+ self._steering: asyncio.Queue[str] = asyncio.Queue()
154
+ # Optional factory: called once at run_stream entry. Must return an
155
+ # async context manager that, while active, may call agent.steer().
156
+ # The agent owns the source's lifecycle — no live-instance registry.
157
+ self._steering_source_factory = steering_source_factory
148
158
  self._resume_key: str = (
149
159
  "" # key printed in --resume banner; set by orchestrator to outer run_id
150
160
  )
151
161
 
162
+ # ── Async steering ────────────────────────────────────────────────────────
163
+
164
+ def steer(self, text: str) -> None:
165
+ """Inject human guidance to be consumed at the next ReAct step boundary.
166
+
167
+ Non-blocking and safe to call concurrently from any coroutine in the
168
+ same event loop. Drained at the top of the next iteration (before
169
+ the per-step checkpoint write and before the next think call), then
170
+ appended to WorkingMemory as a user message and emitted as a
171
+ HUMAN_GUIDANCE BusEvent.
172
+
173
+ Worst-case latency = time remaining in the current tool +
174
+ next-think duration. Guidance arriving after the LLM has already
175
+ emitted action="finish" is lost — the agent has decided it's done.
176
+ """
177
+ if not text or not text.strip():
178
+ return
179
+ self._steering.put_nowait(text.strip())
180
+
181
+ async def _drain_steering(self, step: int) -> AsyncGenerator[BusEvent, None]:
182
+ """Drain any queued guidance into WorkingMemory; yield one event each.
183
+
184
+ Called at the top of each ReAct iteration. Items are FIFO. Empty
185
+ queue is a no-op (zero overhead when no one is steering).
186
+ """
187
+ while not self._steering.empty():
188
+ try:
189
+ text = self._steering.get_nowait()
190
+ except asyncio.QueueEmpty:
191
+ break # defensive — single consumer, should never fire
192
+ await self._working_memory.append("user", f"Human guidance: {text}")
193
+ self._tracer.log(
194
+ "human_guidance",
195
+ self.config.agent_id,
196
+ {"step": step, "text": text},
197
+ )
198
+ yield BusEvent(
199
+ type=EventType.HUMAN_GUIDANCE,
200
+ agent_id=self.config.agent_id,
201
+ payload={"step": step, "text": text},
202
+ )
203
+
152
204
  # ── Streaming entry point (canonical) ─────────────────────────────────────
153
205
 
154
206
  async def run_stream(
@@ -170,17 +222,25 @@ class BaseAgent:
170
222
  await self._working_memory.append("system", system, pinned=True)
171
223
  await self._working_memory.append("user", task)
172
224
 
173
- async with _ResumeHint(
174
- self._resume_key,
175
- self._checkpoint_store,
176
- f"Agent {self.config.agent_id}",
177
- check_key=self._ckp_id,
178
- ) as hint:
179
- async for event in self._run_stream_internal(run_id):
180
- if event.type == EventType.TASK_DONE:
181
- await self._clear_checkpoint(run_id)
182
- hint.done = True
183
- yield event
225
+ # Steering source is owned by the agent for the duration of the run.
226
+ # nullcontext when no factory is configured — zero overhead.
227
+ source_cm = (
228
+ self._steering_source_factory(self)
229
+ if self._steering_source_factory is not None
230
+ else contextlib.nullcontext()
231
+ )
232
+ async with source_cm:
233
+ async with _ResumeHint(
234
+ self._resume_key,
235
+ self._checkpoint_store,
236
+ f"Agent {self.config.agent_id}",
237
+ check_key=self._ckp_id,
238
+ ) as hint:
239
+ async for event in self._run_stream_internal(run_id):
240
+ if event.type == EventType.TASK_DONE:
241
+ await self._clear_checkpoint(run_id)
242
+ hint.done = True
243
+ yield event
184
244
 
185
245
  async def _resume_stream(
186
246
  self,
@@ -203,17 +263,23 @@ class BaseAgent:
203
263
  yield event
204
264
  start_step = pending["step"] + 1
205
265
 
206
- async with _ResumeHint(
207
- self._resume_key,
208
- self._checkpoint_store,
209
- f"Agent {self.config.agent_id}",
210
- check_key=self._ckp_id,
211
- ) as hint:
212
- async for event in self._run_stream_internal(run_id, start_step=start_step):
213
- if event.type == EventType.TASK_DONE:
214
- await self._clear_checkpoint(run_id)
215
- hint.done = True
216
- yield event
266
+ source_cm = (
267
+ self._steering_source_factory(self)
268
+ if self._steering_source_factory is not None
269
+ else contextlib.nullcontext()
270
+ )
271
+ async with source_cm:
272
+ async with _ResumeHint(
273
+ self._resume_key,
274
+ self._checkpoint_store,
275
+ f"Agent {self.config.agent_id}",
276
+ check_key=self._ckp_id,
277
+ ) as hint:
278
+ async for event in self._run_stream_internal(run_id, start_step=start_step):
279
+ if event.type == EventType.TASK_DONE:
280
+ await self._clear_checkpoint(run_id)
281
+ hint.done = True
282
+ yield event
217
283
 
218
284
  async def _run_stream_internal(
219
285
  self,
@@ -295,6 +361,10 @@ class BaseAgent:
295
361
  ) -> AsyncGenerator[BusEvent, None]:
296
362
  for step in range(start_step, self.config.max_steps):
297
363
  self._guard.check()
364
+ # Drain steering queue BEFORE the checkpoint write so any
365
+ # queued guidance is captured by the persisted WM.
366
+ async for guidance_event in self._drain_steering(step):
367
+ yield guidance_event
298
368
  if (
299
369
  self._checkpoint_store is not None
300
370
  and self.config.checkpoint_every > 0
@@ -18,6 +18,7 @@ Event lifecycle within a single goal:
18
18
  Orchestrated path (run / run_stream):
19
19
  PLAN — orchestrator emitted a static DAG
20
20
  (per task in DAG)
21
+ HUMAN_GUIDANCE? — async steering drained at top of step
21
22
  THOUGHT — agent's next-step reasoning
22
23
  TOKEN* — partial LLM output (only when client streams)
23
24
  ACTION — agent chose a tool + args
@@ -46,6 +47,7 @@ class EventType(str, Enum):
46
47
  TOKEN = "token"
47
48
  ACTION = "action"
48
49
  OBSERVATION = "observation"
50
+ HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
49
51
  TASK_DONE = "task_done"
50
52
  REPLAN = "replan"
51
53
  SYNTHESIS = "synthesis"
@@ -176,14 +176,48 @@ async def request_approval(
176
176
 
177
177
  Holds stdout_lock for the duration so concurrent agent events don't
178
178
  interleave with the banner or the input prompt.
179
+
180
+ Input always goes through prompt_toolkit:
181
+ - If a steering router is active, HITL claims the next stdin read
182
+ via the router. Text submitted at the active steering prompt is
183
+ routed to HITL instead of subscribers; if the router reaches a
184
+ pending claim between steering prompt cycles, it shows HITL's
185
+ approval prompt directly.
186
+ - If no router is active, HITL spins up a one-shot PromptSession
187
+ for the approval prompt. Same UX either way.
179
188
  """
189
+ from harness.steering import get_active_router
190
+
180
191
  async with stdout_lock:
192
+ router = get_active_router()
193
+ approve_prompt = " Approve? [y/n/a/correction]: "
194
+ # If a router is active, reserve the next stdin read BEFORE printing
195
+ # the banner so the user's typed answer routes to HITL (not steering).
196
+ hitl_future: Any = (
197
+ router.claim_next_line(prompt=approve_prompt) if router is not None else None
198
+ )
199
+
181
200
  _print_banner(req)
182
201
 
183
202
  guard.suspend()
184
203
  try:
185
- loop = asyncio.get_running_loop()
186
- raw = await loop.run_in_executor(None, input, " Approve? [y/n/a/correction]: ")
204
+ if hitl_future is not None:
205
+ raw = await hitl_future
206
+ else:
207
+ # Standalone: one-shot prompt_toolkit session with the same
208
+ # Enter-submits / Ctrl+J-newline bindings as steering so
209
+ # single-token answers (y/n/a) and multi-line corrections
210
+ # both compose naturally.
211
+ from prompt_toolkit import PromptSession
212
+
213
+ from harness.steering import StdinRouter
214
+
215
+ session: PromptSession = PromptSession()
216
+ raw = await session.prompt_async(
217
+ approve_prompt,
218
+ multiline=True,
219
+ key_bindings=StdinRouter._build_key_bindings(),
220
+ )
187
221
  finally:
188
222
  guard.resume()
189
223
 
@@ -284,6 +284,7 @@ class AgentRuntime:
284
284
  enable_otel: bool = False,
285
285
  annotation_store: Any | None = None, # InMemoryAnnotationStore or compatible
286
286
  checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
287
+ steering_source_factory: Any | None = None, # passed to each spawned BaseAgent
287
288
  ) -> None:
288
289
  self._agent_registry = agent_registry
289
290
  self._tool_registry = tool_registry
@@ -292,6 +293,7 @@ class AgentRuntime:
292
293
  self._guardrail_config = guardrail_config or GuardrailConfig()
293
294
  self._enable_otel = enable_otel
294
295
  self._annotation_store = annotation_store
296
+ self._steering_source_factory = steering_source_factory
295
297
  # Auto-create a FileCheckpointStore if any agent uses hitl_tools or
296
298
  # checkpoint_every — zero-dep default, no configuration required.
297
299
  if checkpoint_store is None and any(
@@ -304,6 +306,21 @@ class AgentRuntime:
304
306
  checkpoint_store = FileCheckpointStore()
305
307
  self._checkpoint_store = checkpoint_store
306
308
 
309
+ def _steering_lifecycle(self):
310
+ """Wrap the dispatch in the steering factory's lifecycle if it has one.
311
+
312
+ Factories with shared resources (e.g. a StdinRouter) expose
313
+ `__aenter__/__aexit__`. File-based factories don't. We detect at
314
+ runtime and use nullcontext for the latter so the wrapping is
315
+ always safe.
316
+ """
317
+ import contextlib
318
+
319
+ f = self._steering_source_factory
320
+ if f is not None and hasattr(f, "__aenter__") and hasattr(f, "__aexit__"):
321
+ return f
322
+ return contextlib.nullcontext()
323
+
307
324
  def _make_tracer(self) -> Tracer:
308
325
  """Create a fresh Tracer, attaching configured hooks."""
309
326
  tracer = Tracer()
@@ -337,6 +354,7 @@ class AgentRuntime:
337
354
  guard=guard,
338
355
  llm=self._llm,
339
356
  checkpoint_store=self._checkpoint_store,
357
+ steering_source_factory=self._steering_source_factory,
340
358
  )
341
359
  async for event in agent.run_stream(task, run_id=run_id):
342
360
  yield event
@@ -386,6 +404,7 @@ class AgentRuntime:
386
404
  guard=guard,
387
405
  llm=self._llm,
388
406
  checkpoint_store=self._checkpoint_store,
407
+ steering_source_factory=self._steering_source_factory,
389
408
  )
390
409
  agent._working_memory = wm
391
410
  agent._task = checkpoint["task"]
@@ -492,6 +511,7 @@ class AgentRuntime:
492
511
  guard=guard,
493
512
  llm=self._llm,
494
513
  checkpoint_store=self._checkpoint_store,
514
+ steering_source_factory=self._steering_source_factory,
495
515
  )
496
516
  agent._working_memory = wm
497
517
  agent._task = checkpoint["task"]
@@ -551,6 +571,7 @@ class AgentRuntime:
551
571
  guard=guard,
552
572
  llm=self._llm,
553
573
  checkpoint_store=self._checkpoint_store,
574
+ steering_source_factory=self._steering_source_factory,
554
575
  )
555
576
  for agent_id in self._agent_registry.all_ids()
556
577
  }
@@ -603,50 +624,58 @@ class AgentRuntime:
603
624
  Auto-resume: when --resume <key> is in sys.argv and a checkpoint store is
604
625
  configured, the saved run is transparently restored and streamed — callers
605
626
  need no resume-specific handling.
627
+
628
+ Steering: if `steering_source_factory` exposes async context manager
629
+ methods (e.g. `stdin_steering_factory()` which owns a shared
630
+ StdinRouter), this method wraps the entire dispatch in that
631
+ lifecycle so callers don't manage the shared resource themselves.
606
632
  """
607
633
  from harness.events import BusEvent, EventType
608
634
 
609
- if self._checkpoint_store is not None:
610
- from harness.checkpoint import maybe_resume_key
635
+ async with self._steering_lifecycle():
636
+ if self._checkpoint_store is not None:
637
+ from harness.checkpoint import maybe_resume_key
611
638
 
612
- resume_key = maybe_resume_key()
613
- if resume_key:
614
- async for event in self.resume_stream(resume_key):
615
- yield event
616
- return
617
-
618
- complexity = await self._classify(goal)
619
- path = "routed" if complexity == "simple" else "orchestrated"
620
- yield BusEvent(
621
- type=EventType.DISPATCH,
622
- agent_id="orchestrator",
623
- payload={"complexity": complexity, "path": path},
624
- )
639
+ resume_key = maybe_resume_key()
640
+ if resume_key:
641
+ async for event in self.resume_stream(resume_key):
642
+ yield event
643
+ return
625
644
 
626
- if complexity == "simple":
627
- # Own the full OTEL lifecycle for the simple path so dispatch + route
628
- # events appear in the same trace as the agent work.
629
- tracer = self._make_tracer()
630
- run_id = str(uuid.uuid4())
631
- tracer.start_run(run_id, goal)
632
- try:
633
- tracer.log("dispatch", "orchestrator", {"complexity": complexity, "path": path})
634
- agent_id, rationale = await self.route(goal)
635
- logger.info("Router %s (%s)", agent_id, rationale)
636
- tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
637
- yield BusEvent(
638
- type=EventType.ROUTE,
639
- agent_id=agent_id,
640
- payload={"agent_id": agent_id, "rationale": rationale},
641
- )
642
- async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
645
+ complexity = await self._classify(goal)
646
+ path = "routed" if complexity == "simple" else "orchestrated"
647
+ yield BusEvent(
648
+ type=EventType.DISPATCH,
649
+ agent_id="orchestrator",
650
+ payload={"complexity": complexity, "path": path},
651
+ )
652
+
653
+ if complexity == "simple":
654
+ # Own the full OTEL lifecycle for the simple path so dispatch + route
655
+ # events appear in the same trace as the agent work.
656
+ tracer = self._make_tracer()
657
+ run_id = str(uuid.uuid4())
658
+ tracer.start_run(run_id, goal)
659
+ try:
660
+ tracer.log("dispatch", "orchestrator", {"complexity": complexity, "path": path})
661
+ agent_id, rationale = await self.route(goal)
662
+ logger.info("Router → %s (%s)", agent_id, rationale)
663
+ tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
664
+ yield BusEvent(
665
+ type=EventType.ROUTE,
666
+ agent_id=agent_id,
667
+ payload={"agent_id": agent_id, "rationale": rationale},
668
+ )
669
+ async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
670
+ yield event
671
+ finally:
672
+ tracer.end_run()
673
+ else:
674
+ # Orchestrated path owns its own trace via _build_orchestrator.
675
+ # run_stream re-enters _steering_lifecycle as nullcontext when
676
+ # the factory is already active (idempotent), so no double-start.
677
+ async for event in self.run_stream(goal):
643
678
  yield event
644
- finally:
645
- tracer.end_run()
646
- else:
647
- # Orchestrated path owns its own trace via _build_orchestrator.
648
- async for event in self.run_stream(goal):
649
- yield event
650
679
 
651
680
  async def dispatch(self, goal: str) -> dict:
652
681
  """Blocking dispatch. Returns TASK_DONE payload for simple goals,
@@ -707,22 +736,23 @@ class AgentRuntime:
707
736
  """
708
737
  from harness.events import BusEvent, EventType
709
738
 
710
- tracer = self._make_tracer()
711
- run_id = str(uuid.uuid4())
712
- tracer.start_run(run_id, goal)
713
- try:
714
- agent_id, rationale = await self.route(goal)
715
- logger.info("Router %s (%s)", agent_id, rationale)
716
- tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
717
- yield BusEvent(
718
- type=EventType.ROUTE,
719
- agent_id=agent_id,
720
- payload={"agent_id": agent_id, "rationale": rationale},
721
- )
722
- async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
723
- yield event
724
- finally:
725
- tracer.end_run()
739
+ async with self._steering_lifecycle():
740
+ tracer = self._make_tracer()
741
+ run_id = str(uuid.uuid4())
742
+ tracer.start_run(run_id, goal)
743
+ try:
744
+ agent_id, rationale = await self.route(goal)
745
+ logger.info("Router %s (%s)", agent_id, rationale)
746
+ tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
747
+ yield BusEvent(
748
+ type=EventType.ROUTE,
749
+ agent_id=agent_id,
750
+ payload={"agent_id": agent_id, "rationale": rationale},
751
+ )
752
+ async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
753
+ yield event
754
+ finally:
755
+ tracer.end_run()
726
756
 
727
757
  async def run_routed(self, goal: str) -> dict:
728
758
  """Blocking routed run. Returns the TASK_DONE payload dict."""
@@ -793,17 +823,18 @@ class AgentRuntime:
793
823
  Auto-resume: when --resume <key> is in sys.argv and a checkpoint store is
794
824
  configured, the saved run is transparently restored and streamed.
795
825
  """
796
- if self._checkpoint_store is not None:
797
- from harness.checkpoint import maybe_resume_key
798
-
799
- resume_key = maybe_resume_key()
800
- if resume_key:
801
- async for event in self.resume_stream(resume_key):
802
- yield event
803
- return
804
- orchestrator, _tracer, _guard = self._build_orchestrator()
805
- async for event in orchestrator.run_stream(goal):
806
- yield event
826
+ async with self._steering_lifecycle():
827
+ if self._checkpoint_store is not None:
828
+ from harness.checkpoint import maybe_resume_key
829
+
830
+ resume_key = maybe_resume_key()
831
+ if resume_key:
832
+ async for event in self.resume_stream(resume_key):
833
+ yield event
834
+ return
835
+ orchestrator, _tracer, _guard = self._build_orchestrator()
836
+ async for event in orchestrator.run_stream(goal):
837
+ yield event
807
838
 
808
839
 
809
840
  # ── Helpers ───────────────────────────────────────────────────────────────────