react-agent-harness 0.0.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.0.2/react_agent_harness.egg-info → react_agent_harness-0.2.0}/PKG-INFO +2 -1
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/README.md +92 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/agents/base.py +92 -22
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/events.py +2 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/hitl.py +36 -2
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/runtime.py +96 -65
- react_agent_harness-0.2.0/harness/steering.py +674 -0
- react_agent_harness-0.2.0/memory/working.py +418 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/pyproject.toml +7 -2
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0/react_agent_harness.egg-info}/PKG-INFO +2 -1
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/SOURCES.txt +2 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/requires.txt +1 -0
- react_agent_harness-0.2.0/tests/test_steering.py +663 -0
- react_agent_harness-0.2.0/tests/test_working_memory.py +394 -0
- react_agent_harness-0.0.2/memory/working.py +0 -277
- react_agent_harness-0.0.2/tests/test_working_memory.py +0 -190
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/LICENSE +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/agents/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/annotation.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/checkpoint.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/llm/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/llm/openai.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/otel.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/harness/utils.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/memory/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/memory/manager.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/memory/redis_store.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/memory/stores.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/orchestrator/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/orchestrator/planner.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/setup.cfg +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_checkpoint_resume.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_mcp_adapter.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_memory.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_openai_llm.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_orchestrator.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_otel.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_streaming.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tests/test_vision.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/builtin/http_fetch.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/mcp/__init__.py +0 -0
- {react_agent_harness-0.0.2 → react_agent_harness-0.2.0}/tools/mcp/adapter.py +0 -0
{react_agent_harness-0.0.2/react_agent_harness.egg-info → react_agent_harness-0.2.0}/PKG-INFO
RENAMED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: react-agent-harness
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
License-File: LICENSE
|
|
7
|
+
Requires-Dist: prompt_toolkit>=3.0
|
|
7
8
|
Provides-Extra: lance
|
|
8
9
|
Requires-Dist: lancedb>=0.6; extra == "lance"
|
|
9
10
|
Requires-Dist: pyarrow>=14; extra == "lance"
|
|
@@ -38,6 +38,7 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
|
|
|
38
38
|
harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
|
|
39
39
|
harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
|
|
40
40
|
harness/hitl.py HITL approval gate — interactive CLI, session-allow list
|
|
41
|
+
harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
|
|
41
42
|
harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
|
|
42
43
|
harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
|
|
43
44
|
harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
|
|
@@ -720,3 +721,94 @@ When the human types a correction instead of y/n:
|
|
|
720
721
|
|
|
721
722
|
The `annotation_store` and `checkpoint_store` are independent — both can be
|
|
722
723
|
wired simultaneously for RLHF data collection with HITL review.
|
|
724
|
+
|
|
725
|
+
## Async steering
|
|
726
|
+
|
|
727
|
+
HITL is synchronous — it only fires when a gated tool is about to run. For
|
|
728
|
+
out-of-band course-correction (HTTP handler, supervisor agent, file watcher,
|
|
729
|
+
or a human typing in the terminal), each `BaseAgent` exposes a
|
|
730
|
+
non-blocking `steer(text)` method. Items are drained at the **top of each
|
|
731
|
+
ReAct iteration**, before the per-step checkpoint write and before the
|
|
732
|
+
next think, then appended to `WorkingMemory` as a `Human guidance: <text>`
|
|
733
|
+
user message. The LLM sees them on the next think and adjusts. One
|
|
734
|
+
`HUMAN_GUIDANCE` `BusEvent` fires per drained item.
|
|
735
|
+
|
|
736
|
+
Why a queue instead of writing straight to `WorkingMemory`: `steer()` is
|
|
737
|
+
synchronous and callable from any coroutine; `WorkingMemory.append` is
|
|
738
|
+
async (eviction can call the LLM). The queue is the producer/consumer
|
|
739
|
+
boundary, enforces step-boundary delivery, and keeps WM single-writer.
|
|
740
|
+
|
|
741
|
+
### Programmatic API (always available)
|
|
742
|
+
|
|
743
|
+
```python
|
|
744
|
+
agent.steer("skip the legal database, use academic sources only")
|
|
745
|
+
```
|
|
746
|
+
|
|
747
|
+
Fires immediately; the agent picks it up at the next step boundary.
|
|
748
|
+
Worst-case latency = remaining tool time + next-think time.
|
|
749
|
+
|
|
750
|
+
### Sources via factory (so orchestrated agents are reachable)
|
|
751
|
+
|
|
752
|
+
`BaseAgent` and `AgentRuntime` both accept `steering_source_factory` — a
|
|
753
|
+
callable `(agent) -> async ctx mgr`. The agent enters the source on
|
|
754
|
+
`run_stream`, exits on completion. No live-agent registry; agents the
|
|
755
|
+
runtime constructs internally still get steering.
|
|
756
|
+
|
|
757
|
+
Two built-in factories:
|
|
758
|
+
|
|
759
|
+
```python
|
|
760
|
+
from harness.steering import file_steering_factory, stdin_steering_factory
|
|
761
|
+
|
|
762
|
+
# 1. File-based — one file per agent, polled for appends (no shared resource)
|
|
763
|
+
runtime = AgentRuntime(
|
|
764
|
+
...,
|
|
765
|
+
steering_source_factory=file_steering_factory(
|
|
766
|
+
"/tmp/ah-{run_id}-{agent_id}.steer"
|
|
767
|
+
),
|
|
768
|
+
)
|
|
769
|
+
# Steer from any other terminal:
|
|
770
|
+
# echo "wrap up and synthesise" >> /tmp/ah-<run_id>-researcher.steer
|
|
771
|
+
|
|
772
|
+
# 2. Stdin-based — single shared StdinRouter with prefix routing
|
|
773
|
+
runtime = AgentRuntime(
|
|
774
|
+
...,
|
|
775
|
+
steering_source_factory=stdin_steering_factory(),
|
|
776
|
+
)
|
|
777
|
+
# At the terminal:
|
|
778
|
+
# researcher: skip the legal db, focus on academic
|
|
779
|
+
# writer: keep the report under 500 words
|
|
780
|
+
# *: stop after this step
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
Single-agent stdin runs accept lines with no prefix. Multi-agent runs
|
|
784
|
+
require `agent_id: text` (or `*: text` for broadcast); unknown or
|
|
785
|
+
unprefixed lines print a stderr hint and are discarded.
|
|
786
|
+
|
|
787
|
+
The stdin factory's underlying `StdinRouter` is started/stopped
|
|
788
|
+
automatically — the runtime detects the factory's async-context-manager
|
|
789
|
+
shape and wraps `dispatch_stream` / `run_stream` / `run_routed_stream`
|
|
790
|
+
around it. Ref-counted so nested calls (`dispatch_stream → run_stream`)
|
|
791
|
+
don't double-start the router.
|
|
792
|
+
|
|
793
|
+
### HITL coordination
|
|
794
|
+
|
|
795
|
+
When a `StdinRouter` is active, HITL calls `router.claim_next_line()`
|
|
796
|
+
**before** printing its approval banner — the next stdin line resolves
|
|
797
|
+
HITL's pending Future and bypasses pub/sub. After resolution, subsequent
|
|
798
|
+
lines route to steering subscribers normally. When no router is active,
|
|
799
|
+
HITL falls back to a standalone `prompt_toolkit` session, ensuring consistent
|
|
800
|
+
key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths.
|
|
801
|
+
|
|
802
|
+
### Constraints
|
|
803
|
+
|
|
804
|
+
- Steering arrives **between steps**, never mid-tool, never mid-think.
|
|
805
|
+
Tools that are already running complete; the LLM stream that's
|
|
806
|
+
already producing completes; guidance lands at the next safe boundary.
|
|
807
|
+
- Guidance queued **after** the LLM emits `action: "finish"` is lost —
|
|
808
|
+
the agent already decided it's done.
|
|
809
|
+
- Crash between drain and next checkpoint write → the queued items are
|
|
810
|
+
in the persisted WM. Crash between checkpoint write and next drain →
|
|
811
|
+
lost; re-steer after `--resume`.
|
|
812
|
+
|
|
813
|
+
See `examples/complex_sysaudit_demo.py` for stdin steering across three
|
|
814
|
+
agents alongside HITL on the shell tool.
|
|
@@ -27,6 +27,7 @@ Token management:
|
|
|
27
27
|
from __future__ import annotations
|
|
28
28
|
|
|
29
29
|
import asyncio
|
|
30
|
+
import contextlib
|
|
30
31
|
import json
|
|
31
32
|
import logging
|
|
32
33
|
import uuid
|
|
@@ -132,6 +133,7 @@ class BaseAgent:
|
|
|
132
133
|
guard,
|
|
133
134
|
llm,
|
|
134
135
|
checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
|
|
136
|
+
steering_source_factory: Any | None = None, # (BaseAgent) -> async ctx mgr
|
|
135
137
|
) -> None:
|
|
136
138
|
self.config = config
|
|
137
139
|
self.role = config.role # exposed for orchestrator planner prompt
|
|
@@ -145,10 +147,60 @@ class BaseAgent:
|
|
|
145
147
|
self._task: str = ""
|
|
146
148
|
self._last_think_error: str | None = None
|
|
147
149
|
self._ckp_id: str = "" # f"{run_id}:{agent_id}" — unique per agent per run
|
|
150
|
+
# Async steering queue — items drained at the top of each ReAct
|
|
151
|
+
# step (before checkpoint, before think). Created eagerly so
|
|
152
|
+
# callers can steer() before run_stream starts.
|
|
153
|
+
self._steering: asyncio.Queue[str] = asyncio.Queue()
|
|
154
|
+
# Optional factory: called once at run_stream entry. Must return an
|
|
155
|
+
# async context manager that, while active, may call agent.steer().
|
|
156
|
+
# The agent owns the source's lifecycle — no live-instance registry.
|
|
157
|
+
self._steering_source_factory = steering_source_factory
|
|
148
158
|
self._resume_key: str = (
|
|
149
159
|
"" # key printed in --resume banner; set by orchestrator to outer run_id
|
|
150
160
|
)
|
|
151
161
|
|
|
162
|
+
# ── Async steering ────────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
def steer(self, text: str) -> None:
|
|
165
|
+
"""Inject human guidance to be consumed at the next ReAct step boundary.
|
|
166
|
+
|
|
167
|
+
Non-blocking and safe to call concurrently from any coroutine in the
|
|
168
|
+
same event loop. Drained at the top of the next iteration (before
|
|
169
|
+
the per-step checkpoint write and before the next think call), then
|
|
170
|
+
appended to WorkingMemory as a user message and emitted as a
|
|
171
|
+
HUMAN_GUIDANCE BusEvent.
|
|
172
|
+
|
|
173
|
+
Worst-case latency = time remaining in the current tool +
|
|
174
|
+
next-think duration. Guidance arriving after the LLM has already
|
|
175
|
+
emitted action="finish" is lost — the agent has decided it's done.
|
|
176
|
+
"""
|
|
177
|
+
if not text or not text.strip():
|
|
178
|
+
return
|
|
179
|
+
self._steering.put_nowait(text.strip())
|
|
180
|
+
|
|
181
|
+
async def _drain_steering(self, step: int) -> AsyncGenerator[BusEvent, None]:
|
|
182
|
+
"""Drain any queued guidance into WorkingMemory; yield one event each.
|
|
183
|
+
|
|
184
|
+
Called at the top of each ReAct iteration. Items are FIFO. Empty
|
|
185
|
+
queue is a no-op (zero overhead when no one is steering).
|
|
186
|
+
"""
|
|
187
|
+
while not self._steering.empty():
|
|
188
|
+
try:
|
|
189
|
+
text = self._steering.get_nowait()
|
|
190
|
+
except asyncio.QueueEmpty:
|
|
191
|
+
break # defensive — single consumer, should never fire
|
|
192
|
+
await self._working_memory.append("user", f"Human guidance: {text}")
|
|
193
|
+
self._tracer.log(
|
|
194
|
+
"human_guidance",
|
|
195
|
+
self.config.agent_id,
|
|
196
|
+
{"step": step, "text": text},
|
|
197
|
+
)
|
|
198
|
+
yield BusEvent(
|
|
199
|
+
type=EventType.HUMAN_GUIDANCE,
|
|
200
|
+
agent_id=self.config.agent_id,
|
|
201
|
+
payload={"step": step, "text": text},
|
|
202
|
+
)
|
|
203
|
+
|
|
152
204
|
# ── Streaming entry point (canonical) ─────────────────────────────────────
|
|
153
205
|
|
|
154
206
|
async def run_stream(
|
|
@@ -170,17 +222,25 @@ class BaseAgent:
|
|
|
170
222
|
await self._working_memory.append("system", system, pinned=True)
|
|
171
223
|
await self._working_memory.append("user", task)
|
|
172
224
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
225
|
+
# Steering source is owned by the agent for the duration of the run.
|
|
226
|
+
# nullcontext when no factory is configured — zero overhead.
|
|
227
|
+
source_cm = (
|
|
228
|
+
self._steering_source_factory(self)
|
|
229
|
+
if self._steering_source_factory is not None
|
|
230
|
+
else contextlib.nullcontext()
|
|
231
|
+
)
|
|
232
|
+
async with source_cm:
|
|
233
|
+
async with _ResumeHint(
|
|
234
|
+
self._resume_key,
|
|
235
|
+
self._checkpoint_store,
|
|
236
|
+
f"Agent {self.config.agent_id}",
|
|
237
|
+
check_key=self._ckp_id,
|
|
238
|
+
) as hint:
|
|
239
|
+
async for event in self._run_stream_internal(run_id):
|
|
240
|
+
if event.type == EventType.TASK_DONE:
|
|
241
|
+
await self._clear_checkpoint(run_id)
|
|
242
|
+
hint.done = True
|
|
243
|
+
yield event
|
|
184
244
|
|
|
185
245
|
async def _resume_stream(
|
|
186
246
|
self,
|
|
@@ -203,17 +263,23 @@ class BaseAgent:
|
|
|
203
263
|
yield event
|
|
204
264
|
start_step = pending["step"] + 1
|
|
205
265
|
|
|
206
|
-
|
|
207
|
-
self.
|
|
208
|
-
self.
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
async
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
266
|
+
source_cm = (
|
|
267
|
+
self._steering_source_factory(self)
|
|
268
|
+
if self._steering_source_factory is not None
|
|
269
|
+
else contextlib.nullcontext()
|
|
270
|
+
)
|
|
271
|
+
async with source_cm:
|
|
272
|
+
async with _ResumeHint(
|
|
273
|
+
self._resume_key,
|
|
274
|
+
self._checkpoint_store,
|
|
275
|
+
f"Agent {self.config.agent_id}",
|
|
276
|
+
check_key=self._ckp_id,
|
|
277
|
+
) as hint:
|
|
278
|
+
async for event in self._run_stream_internal(run_id, start_step=start_step):
|
|
279
|
+
if event.type == EventType.TASK_DONE:
|
|
280
|
+
await self._clear_checkpoint(run_id)
|
|
281
|
+
hint.done = True
|
|
282
|
+
yield event
|
|
217
283
|
|
|
218
284
|
async def _run_stream_internal(
|
|
219
285
|
self,
|
|
@@ -295,6 +361,10 @@ class BaseAgent:
|
|
|
295
361
|
) -> AsyncGenerator[BusEvent, None]:
|
|
296
362
|
for step in range(start_step, self.config.max_steps):
|
|
297
363
|
self._guard.check()
|
|
364
|
+
# Drain steering queue BEFORE the checkpoint write so any
|
|
365
|
+
# queued guidance is captured by the persisted WM.
|
|
366
|
+
async for guidance_event in self._drain_steering(step):
|
|
367
|
+
yield guidance_event
|
|
298
368
|
if (
|
|
299
369
|
self._checkpoint_store is not None
|
|
300
370
|
and self.config.checkpoint_every > 0
|
|
@@ -18,6 +18,7 @@ Event lifecycle within a single goal:
|
|
|
18
18
|
Orchestrated path (run / run_stream):
|
|
19
19
|
PLAN — orchestrator emitted a static DAG
|
|
20
20
|
(per task in DAG)
|
|
21
|
+
HUMAN_GUIDANCE? — async steering drained at top of step
|
|
21
22
|
THOUGHT — agent's next-step reasoning
|
|
22
23
|
TOKEN* — partial LLM output (only when client streams)
|
|
23
24
|
ACTION — agent chose a tool + args
|
|
@@ -46,6 +47,7 @@ class EventType(str, Enum):
|
|
|
46
47
|
TOKEN = "token"
|
|
47
48
|
ACTION = "action"
|
|
48
49
|
OBSERVATION = "observation"
|
|
50
|
+
HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
|
|
49
51
|
TASK_DONE = "task_done"
|
|
50
52
|
REPLAN = "replan"
|
|
51
53
|
SYNTHESIS = "synthesis"
|
|
@@ -176,14 +176,48 @@ async def request_approval(
|
|
|
176
176
|
|
|
177
177
|
Holds stdout_lock for the duration so concurrent agent events don't
|
|
178
178
|
interleave with the banner or the input prompt.
|
|
179
|
+
|
|
180
|
+
Input always goes through prompt_toolkit:
|
|
181
|
+
- If a steering router is active, HITL claims the next stdin read
|
|
182
|
+
via the router. Text submitted at the active steering prompt is
|
|
183
|
+
routed to HITL instead of subscribers; if the router reaches a
|
|
184
|
+
pending claim between steering prompt cycles, it shows HITL's
|
|
185
|
+
approval prompt directly.
|
|
186
|
+
- If no router is active, HITL spins up a one-shot PromptSession
|
|
187
|
+
for the approval prompt. Same UX either way.
|
|
179
188
|
"""
|
|
189
|
+
from harness.steering import get_active_router
|
|
190
|
+
|
|
180
191
|
async with stdout_lock:
|
|
192
|
+
router = get_active_router()
|
|
193
|
+
approve_prompt = " Approve? [y/n/a/correction]: "
|
|
194
|
+
# If a router is active, reserve the next stdin read BEFORE printing
|
|
195
|
+
# the banner so the user's typed answer routes to HITL (not steering).
|
|
196
|
+
hitl_future: Any = (
|
|
197
|
+
router.claim_next_line(prompt=approve_prompt) if router is not None else None
|
|
198
|
+
)
|
|
199
|
+
|
|
181
200
|
_print_banner(req)
|
|
182
201
|
|
|
183
202
|
guard.suspend()
|
|
184
203
|
try:
|
|
185
|
-
|
|
186
|
-
|
|
204
|
+
if hitl_future is not None:
|
|
205
|
+
raw = await hitl_future
|
|
206
|
+
else:
|
|
207
|
+
# Standalone: one-shot prompt_toolkit session with the same
|
|
208
|
+
# Enter-submits / Ctrl+J-newline bindings as steering so
|
|
209
|
+
# single-token answers (y/n/a) and multi-line corrections
|
|
210
|
+
# both compose naturally.
|
|
211
|
+
from prompt_toolkit import PromptSession
|
|
212
|
+
|
|
213
|
+
from harness.steering import StdinRouter
|
|
214
|
+
|
|
215
|
+
session: PromptSession = PromptSession()
|
|
216
|
+
raw = await session.prompt_async(
|
|
217
|
+
approve_prompt,
|
|
218
|
+
multiline=True,
|
|
219
|
+
key_bindings=StdinRouter._build_key_bindings(),
|
|
220
|
+
)
|
|
187
221
|
finally:
|
|
188
222
|
guard.resume()
|
|
189
223
|
|
|
@@ -284,6 +284,7 @@ class AgentRuntime:
|
|
|
284
284
|
enable_otel: bool = False,
|
|
285
285
|
annotation_store: Any | None = None, # InMemoryAnnotationStore or compatible
|
|
286
286
|
checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
|
|
287
|
+
steering_source_factory: Any | None = None, # passed to each spawned BaseAgent
|
|
287
288
|
) -> None:
|
|
288
289
|
self._agent_registry = agent_registry
|
|
289
290
|
self._tool_registry = tool_registry
|
|
@@ -292,6 +293,7 @@ class AgentRuntime:
|
|
|
292
293
|
self._guardrail_config = guardrail_config or GuardrailConfig()
|
|
293
294
|
self._enable_otel = enable_otel
|
|
294
295
|
self._annotation_store = annotation_store
|
|
296
|
+
self._steering_source_factory = steering_source_factory
|
|
295
297
|
# Auto-create a FileCheckpointStore if any agent uses hitl_tools or
|
|
296
298
|
# checkpoint_every — zero-dep default, no configuration required.
|
|
297
299
|
if checkpoint_store is None and any(
|
|
@@ -304,6 +306,21 @@ class AgentRuntime:
|
|
|
304
306
|
checkpoint_store = FileCheckpointStore()
|
|
305
307
|
self._checkpoint_store = checkpoint_store
|
|
306
308
|
|
|
309
|
+
def _steering_lifecycle(self):
|
|
310
|
+
"""Wrap the dispatch in the steering factory's lifecycle if it has one.
|
|
311
|
+
|
|
312
|
+
Factories with shared resources (e.g. a StdinRouter) expose
|
|
313
|
+
`__aenter__/__aexit__`. File-based factories don't. We detect at
|
|
314
|
+
runtime and use nullcontext for the latter so the wrapping is
|
|
315
|
+
always safe.
|
|
316
|
+
"""
|
|
317
|
+
import contextlib
|
|
318
|
+
|
|
319
|
+
f = self._steering_source_factory
|
|
320
|
+
if f is not None and hasattr(f, "__aenter__") and hasattr(f, "__aexit__"):
|
|
321
|
+
return f
|
|
322
|
+
return contextlib.nullcontext()
|
|
323
|
+
|
|
307
324
|
def _make_tracer(self) -> Tracer:
|
|
308
325
|
"""Create a fresh Tracer, attaching configured hooks."""
|
|
309
326
|
tracer = Tracer()
|
|
@@ -337,6 +354,7 @@ class AgentRuntime:
|
|
|
337
354
|
guard=guard,
|
|
338
355
|
llm=self._llm,
|
|
339
356
|
checkpoint_store=self._checkpoint_store,
|
|
357
|
+
steering_source_factory=self._steering_source_factory,
|
|
340
358
|
)
|
|
341
359
|
async for event in agent.run_stream(task, run_id=run_id):
|
|
342
360
|
yield event
|
|
@@ -386,6 +404,7 @@ class AgentRuntime:
|
|
|
386
404
|
guard=guard,
|
|
387
405
|
llm=self._llm,
|
|
388
406
|
checkpoint_store=self._checkpoint_store,
|
|
407
|
+
steering_source_factory=self._steering_source_factory,
|
|
389
408
|
)
|
|
390
409
|
agent._working_memory = wm
|
|
391
410
|
agent._task = checkpoint["task"]
|
|
@@ -492,6 +511,7 @@ class AgentRuntime:
|
|
|
492
511
|
guard=guard,
|
|
493
512
|
llm=self._llm,
|
|
494
513
|
checkpoint_store=self._checkpoint_store,
|
|
514
|
+
steering_source_factory=self._steering_source_factory,
|
|
495
515
|
)
|
|
496
516
|
agent._working_memory = wm
|
|
497
517
|
agent._task = checkpoint["task"]
|
|
@@ -551,6 +571,7 @@ class AgentRuntime:
|
|
|
551
571
|
guard=guard,
|
|
552
572
|
llm=self._llm,
|
|
553
573
|
checkpoint_store=self._checkpoint_store,
|
|
574
|
+
steering_source_factory=self._steering_source_factory,
|
|
554
575
|
)
|
|
555
576
|
for agent_id in self._agent_registry.all_ids()
|
|
556
577
|
}
|
|
@@ -603,50 +624,58 @@ class AgentRuntime:
|
|
|
603
624
|
Auto-resume: when --resume <key> is in sys.argv and a checkpoint store is
|
|
604
625
|
configured, the saved run is transparently restored and streamed — callers
|
|
605
626
|
need no resume-specific handling.
|
|
627
|
+
|
|
628
|
+
Steering: if `steering_source_factory` exposes async context manager
|
|
629
|
+
methods (e.g. `stdin_steering_factory()` which owns a shared
|
|
630
|
+
StdinRouter), this method wraps the entire dispatch in that
|
|
631
|
+
lifecycle so callers don't manage the shared resource themselves.
|
|
606
632
|
"""
|
|
607
633
|
from harness.events import BusEvent, EventType
|
|
608
634
|
|
|
609
|
-
|
|
610
|
-
|
|
635
|
+
async with self._steering_lifecycle():
|
|
636
|
+
if self._checkpoint_store is not None:
|
|
637
|
+
from harness.checkpoint import maybe_resume_key
|
|
611
638
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
complexity = await self._classify(goal)
|
|
619
|
-
path = "routed" if complexity == "simple" else "orchestrated"
|
|
620
|
-
yield BusEvent(
|
|
621
|
-
type=EventType.DISPATCH,
|
|
622
|
-
agent_id="orchestrator",
|
|
623
|
-
payload={"complexity": complexity, "path": path},
|
|
624
|
-
)
|
|
639
|
+
resume_key = maybe_resume_key()
|
|
640
|
+
if resume_key:
|
|
641
|
+
async for event in self.resume_stream(resume_key):
|
|
642
|
+
yield event
|
|
643
|
+
return
|
|
625
644
|
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
645
|
+
complexity = await self._classify(goal)
|
|
646
|
+
path = "routed" if complexity == "simple" else "orchestrated"
|
|
647
|
+
yield BusEvent(
|
|
648
|
+
type=EventType.DISPATCH,
|
|
649
|
+
agent_id="orchestrator",
|
|
650
|
+
payload={"complexity": complexity, "path": path},
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
if complexity == "simple":
|
|
654
|
+
# Own the full OTEL lifecycle for the simple path so dispatch + route
|
|
655
|
+
# events appear in the same trace as the agent work.
|
|
656
|
+
tracer = self._make_tracer()
|
|
657
|
+
run_id = str(uuid.uuid4())
|
|
658
|
+
tracer.start_run(run_id, goal)
|
|
659
|
+
try:
|
|
660
|
+
tracer.log("dispatch", "orchestrator", {"complexity": complexity, "path": path})
|
|
661
|
+
agent_id, rationale = await self.route(goal)
|
|
662
|
+
logger.info("Router → %s (%s)", agent_id, rationale)
|
|
663
|
+
tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
|
|
664
|
+
yield BusEvent(
|
|
665
|
+
type=EventType.ROUTE,
|
|
666
|
+
agent_id=agent_id,
|
|
667
|
+
payload={"agent_id": agent_id, "rationale": rationale},
|
|
668
|
+
)
|
|
669
|
+
async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
|
|
670
|
+
yield event
|
|
671
|
+
finally:
|
|
672
|
+
tracer.end_run()
|
|
673
|
+
else:
|
|
674
|
+
# Orchestrated path owns its own trace via _build_orchestrator.
|
|
675
|
+
# run_stream re-enters _steering_lifecycle as nullcontext when
|
|
676
|
+
# the factory is already active (idempotent), so no double-start.
|
|
677
|
+
async for event in self.run_stream(goal):
|
|
643
678
|
yield event
|
|
644
|
-
finally:
|
|
645
|
-
tracer.end_run()
|
|
646
|
-
else:
|
|
647
|
-
# Orchestrated path owns its own trace via _build_orchestrator.
|
|
648
|
-
async for event in self.run_stream(goal):
|
|
649
|
-
yield event
|
|
650
679
|
|
|
651
680
|
async def dispatch(self, goal: str) -> dict:
|
|
652
681
|
"""Blocking dispatch. Returns TASK_DONE payload for simple goals,
|
|
@@ -707,22 +736,23 @@ class AgentRuntime:
|
|
|
707
736
|
"""
|
|
708
737
|
from harness.events import BusEvent, EventType
|
|
709
738
|
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
739
|
+
async with self._steering_lifecycle():
|
|
740
|
+
tracer = self._make_tracer()
|
|
741
|
+
run_id = str(uuid.uuid4())
|
|
742
|
+
tracer.start_run(run_id, goal)
|
|
743
|
+
try:
|
|
744
|
+
agent_id, rationale = await self.route(goal)
|
|
745
|
+
logger.info("Router → %s (%s)", agent_id, rationale)
|
|
746
|
+
tracer.log("route", agent_id, {"agent_id": agent_id, "rationale": rationale})
|
|
747
|
+
yield BusEvent(
|
|
748
|
+
type=EventType.ROUTE,
|
|
749
|
+
agent_id=agent_id,
|
|
750
|
+
payload={"agent_id": agent_id, "rationale": rationale},
|
|
751
|
+
)
|
|
752
|
+
async for event in self._run_agent_with_tracer(agent_id, goal, tracer, run_id):
|
|
753
|
+
yield event
|
|
754
|
+
finally:
|
|
755
|
+
tracer.end_run()
|
|
726
756
|
|
|
727
757
|
async def run_routed(self, goal: str) -> dict:
|
|
728
758
|
"""Blocking routed run. Returns the TASK_DONE payload dict."""
|
|
@@ -793,17 +823,18 @@ class AgentRuntime:
|
|
|
793
823
|
Auto-resume: when --resume <key> is in sys.argv and a checkpoint store is
|
|
794
824
|
configured, the saved run is transparently restored and streamed.
|
|
795
825
|
"""
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
826
|
+
async with self._steering_lifecycle():
|
|
827
|
+
if self._checkpoint_store is not None:
|
|
828
|
+
from harness.checkpoint import maybe_resume_key
|
|
829
|
+
|
|
830
|
+
resume_key = maybe_resume_key()
|
|
831
|
+
if resume_key:
|
|
832
|
+
async for event in self.resume_stream(resume_key):
|
|
833
|
+
yield event
|
|
834
|
+
return
|
|
835
|
+
orchestrator, _tracer, _guard = self._build_orchestrator()
|
|
836
|
+
async for event in orchestrator.run_stream(goal):
|
|
837
|
+
yield event
|
|
807
838
|
|
|
808
839
|
|
|
809
840
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|