react-agent-harness 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {react_agent_harness-0.1.0/react_agent_harness.egg-info → react_agent_harness-0.3.0}/PKG-INFO +2 -1
  2. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/README.md +189 -0
  3. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/agents/base.py +92 -22
  4. react_agent_harness-0.3.0/harness/cli.py +137 -0
  5. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/events.py +2 -0
  6. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/hitl.py +36 -2
  7. react_agent_harness-0.3.0/harness/llm/__init__.py +19 -0
  8. react_agent_harness-0.3.0/harness/llm/_streaming.py +56 -0
  9. react_agent_harness-0.3.0/harness/llm/auth.py +610 -0
  10. react_agent_harness-0.3.0/harness/llm/claude_code.py +312 -0
  11. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/llm/openai.py +11 -5
  12. react_agent_harness-0.3.0/harness/llm/openai_codex.py +283 -0
  13. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/runtime.py +96 -65
  14. react_agent_harness-0.3.0/harness/steering.py +674 -0
  15. react_agent_harness-0.3.0/harness/utils.py +102 -0
  16. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/pyproject.toml +10 -2
  17. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0/react_agent_harness.egg-info}/PKG-INFO +2 -1
  18. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/SOURCES.txt +13 -0
  19. react_agent_harness-0.3.0/react_agent_harness.egg-info/entry_points.txt +2 -0
  20. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/requires.txt +1 -0
  21. react_agent_harness-0.3.0/tests/test_claude_code_llm.py +265 -0
  22. react_agent_harness-0.3.0/tests/test_cli.py +69 -0
  23. react_agent_harness-0.3.0/tests/test_llm_auth.py +297 -0
  24. react_agent_harness-0.3.0/tests/test_openai_codex_llm.py +204 -0
  25. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_openai_llm.py +3 -2
  26. react_agent_harness-0.3.0/tests/test_steering.py +663 -0
  27. react_agent_harness-0.3.0/tests/test_utils.py +96 -0
  28. react_agent_harness-0.1.0/harness/utils.py +0 -46
  29. react_agent_harness-0.1.0/tools/builtin/__init__.py +0 -0
  30. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/LICENSE +0 -0
  31. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/agents/__init__.py +0 -0
  32. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/__init__.py +0 -0
  33. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/annotation.py +0 -0
  34. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/checkpoint.py +0 -0
  35. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/executor_bridge.py +0 -0
  36. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/otel.py +0 -0
  37. {react_agent_harness-0.1.0/harness/llm → react_agent_harness-0.3.0/memory}/__init__.py +0 -0
  38. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/episodic_lance.py +0 -0
  39. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/manager.py +0 -0
  40. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/redis_store.py +0 -0
  41. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/stores.py +0 -0
  42. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/working.py +0 -0
  43. {react_agent_harness-0.1.0/memory → react_agent_harness-0.3.0/orchestrator}/__init__.py +0 -0
  44. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/orchestrator/planner.py +0 -0
  45. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  46. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  47. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/setup.cfg +0 -0
  48. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_agents_base.py +0 -0
  49. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_annotation.py +0 -0
  50. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_checkpoint_resume.py +0 -0
  51. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_executor_bridge.py +0 -0
  52. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_http_fetch.py +0 -0
  53. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_mcp_adapter.py +0 -0
  54. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_memory.py +0 -0
  55. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_orchestrator.py +0 -0
  56. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_otel.py +0 -0
  57. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_parse_action_json.py +0 -0
  58. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_redis_store.py +0 -0
  59. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_streaming.py +0 -0
  60. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_vision.py +0 -0
  61. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_working_memory.py +0 -0
  62. {react_agent_harness-0.1.0/orchestrator → react_agent_harness-0.3.0/tools}/__init__.py +0 -0
  63. {react_agent_harness-0.1.0/tools → react_agent_harness-0.3.0/tools/builtin}/__init__.py +0 -0
  64. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/builtin/fetch_image.py +0 -0
  65. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/builtin/http_fetch.py +0 -0
  66. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/mcp/__init__.py +0 -0
  67. {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/mcp/adapter.py +0 -0
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
7
+ Requires-Dist: prompt_toolkit>=3.0
7
8
  Provides-Extra: lance
8
9
  Requires-Dist: lancedb>=0.6; extra == "lance"
9
10
  Requires-Dist: pyarrow>=14; extra == "lance"
@@ -38,6 +38,7 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
38
38
  harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
39
39
  harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
40
40
  harness/hitl.py HITL approval gate — interactive CLI, session-allow list
41
+ harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
41
42
  harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
42
43
  harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
43
44
  harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
@@ -73,6 +74,7 @@ explicit control.
73
74
  | `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
74
75
  | `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
75
76
  | `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
77
+ | `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
76
78
 
77
79
  ## Adding a new domain (3 steps)
78
80
 
@@ -108,6 +110,102 @@ llm = OpenAILLM(model="gpt-4o-mini") # reads OPENAI_API_KEY from
108
110
  runtime = AgentRuntime(..., llm=llm)
109
111
  ```
110
112
 
113
+ Credential-backed adapters can also plug into the same contract. This is the
114
+ shape used for provider-specific subscription or OAuth flows without teaching
115
+ agents about auth:
116
+
117
+ ```bash
118
+ agent-harness login openai-codex
119
+ agent-harness auth status openai-codex
120
+ agent-harness login claude-code
121
+ agent-harness auth status claude-code
122
+ ```
123
+
124
+ > **⚠️ Subscription adapters are experimental — use the metered API in production.**
125
+ >
126
+ > `OpenAICodexLLM` and `ClaudeCodeLLM` bridge **ChatGPT / Claude
127
+ > subscription OAuth credentials** into the harness by talking to
128
+ > internal CLI endpoints with CLI-shaped User-Agent and billing headers.
129
+ > This route:
130
+ >
131
+ > - **May violate OpenAI's and Anthropic's Terms of Service.** Both
132
+ > providers prohibit using subscription accounts (ChatGPT Plus/Pro,
133
+ > Claude Pro/Max) for arbitrary programmatic access — subscriptions
134
+ > price for the official CLI's intended use only.
135
+ > - **May result in account suspension** if abuse detection classifies
136
+ > harness traffic as misuse.
137
+ > - **Depends on undocumented internal endpoints**
138
+ > (`/backend-api/codex/responses`, the Anthropic Messages API with
139
+ > `claude-code-*` beta flags) that providers can change or revoke at
140
+ > any time.
141
+ >
142
+ > **Use these adapters only for personal research on accounts you own.**
143
+ > Do not use them to serve other users. For anything else, prefer the
144
+ > metered API path:
145
+ >
146
+ > - `OpenAILLM` with `OPENAI_API_KEY` (optionally routed through a
147
+ > gateway like LiteLLM/Helicone for cost headers).
148
+ > - The standard Anthropic Messages API with an Anthropic API key.
149
+
150
+ Direct `openai-codex` OAuth follows the Codex/Pi-style ChatGPT
151
+ subscription route rather than the stable OpenAI Platform API. The
152
+ Codex OAuth client id can be overridden with
153
+ `AGENT_HARNESS_OPENAI_CODEX_CLIENT_ID`.
154
+
155
+ ```python
156
+ from harness.llm.openai_codex import OpenAICodexLLM
157
+
158
+ llm = OpenAICodexLLM(
159
+ model="gpt-5.5",
160
+ auth_file="~/.agent-harness/auth/auth.json", # Pi-shaped openai-codex OAuth entry
161
+ )
162
+ runtime = AgentRuntime(..., llm=llm)
163
+ ```
164
+
165
+ `OpenAICodexLLM` calls the Codex backend directly
166
+ (`https://chatgpt.com/backend-api/codex/responses`) with OAuth credentials.
167
+ The stable fallback remains `OpenAILLM` with `OPENAI_API_KEY`.
168
+
169
+ For Claude Code-style setups, use `ClaudeCodeLLM` with Claude Pro/Max OAuth
170
+ credentials stored in the same auth file. It calls the Anthropic Messages API
171
+ directly with Claude-Code-compatible OAuth headers:
172
+
173
+ ```bash
174
+ agent-harness login claude-code
175
+ python examples/subscription_auth_demo.py claude-code
176
+ ```
177
+
178
+ ```python
179
+ from harness.llm.claude_code import ClaudeCodeLLM
180
+
181
+ llm = ClaudeCodeLLM(
182
+ model="claude-sonnet-4-6",
183
+ auth_file="~/.agent-harness/auth/auth.json",
184
+ )
185
+ ```
186
+
187
+ `ClaudeCodeLLM` reads a `claude-code` OAuth entry, refreshes it automatically
188
+ when expired, and retries once after `401`/`403`. This mirrors Pi's Claude
189
+ Pro/Max extension approach rather than shelling out to the Claude CLI. The
190
+ default model is the current canonical Sonnet release ID, `claude-sonnet-4-6`;
191
+ set `CLAUDE_CODE_MODEL` or pass `model="claude-opus-4-7"` to choose another
192
+ model.
193
+
194
+ Both adapters stream incrementally — `stream_complete()` yields each
195
+ SSE delta token as it arrives, and `complete()` consumes the same
196
+ stream and returns the concatenated text once finished. Cost / token
197
+ usage is captured from the final stream event into `last_usage`.
198
+
199
+ The Claude billing header's `cc_version` is read from
200
+ `CLAUDE_CODE_VERSION` (env) or from `claude --version` if the CLI is
201
+ installed; falls back to `unknown` otherwise. Pinning a specific
202
+ version with `CLAUDE_CODE_VERSION=2.1.150` is recommended if you want
203
+ stable behavior across CLI upgrades.
204
+
205
+ Do not copy browser/app refresh tokens into repo files. Store OAuth auth files
206
+ under `~/.agent-harness/auth` or reuse an existing Pi auth file with private
207
+ file permissions (`0600`).
208
+
111
209
  To use Anthropic / Gemini / Ollama / a local SGLang or vLLM server / anything
112
210
  else — write a 30-line adapter implementing those two methods. See
113
211
  `harness/llm/openai.py` for the reference shape; the harness never imports a
@@ -720,3 +818,94 @@ When the human types a correction instead of y/n:
720
818
 
721
819
  The `annotation_store` and `checkpoint_store` are independent — both can be
722
820
  wired simultaneously for RLHF data collection with HITL review.
821
+
822
+ ## Async steering
823
+
824
+ HITL is synchronous — it only fires when a gated tool is about to run. For
825
+ out-of-band course-correction (HTTP handler, supervisor agent, file watcher,
826
+ or a human typing in the terminal), each `BaseAgent` exposes a
827
+ non-blocking `steer(text)` method. Items are drained at the **top of each
828
+ ReAct iteration**, before the per-step checkpoint write and before the
829
+ next think, then appended to `WorkingMemory` as a `Human guidance: <text>`
830
+ user message. The LLM sees them on the next think and adjusts. One
831
+ `HUMAN_GUIDANCE` `BusEvent` fires per drained item.
832
+
833
+ Why a queue instead of writing straight to `WorkingMemory`: `steer()` is
834
+ synchronous and callable from any coroutine; `WorkingMemory.append` is
835
+ async (eviction can call the LLM). The queue is the producer/consumer
836
+ boundary, enforces step-boundary delivery, and keeps WM single-writer.
837
+
838
+ ### Programmatic API (always available)
839
+
840
+ ```python
841
+ agent.steer("skip the legal database, use academic sources only")
842
+ ```
843
+
844
+ Fires immediately; the agent picks it up at the next step boundary.
845
+ Worst-case latency = remaining tool time + next-think time.
846
+
847
+ ### Sources via factory (so orchestrated agents are reachable)
848
+
849
+ `BaseAgent` and `AgentRuntime` both accept `steering_source_factory` — a
850
+ callable `(agent) -> async ctx mgr`. The agent enters the source on
851
+ `run_stream`, exits on completion. No live-agent registry; agents the
852
+ runtime constructs internally still get steering.
853
+
854
+ Two built-in factories:
855
+
856
+ ```python
857
+ from harness.steering import file_steering_factory, stdin_steering_factory
858
+
859
+ # 1. File-based — one file per agent, polled for appends (no shared resource)
860
+ runtime = AgentRuntime(
861
+ ...,
862
+ steering_source_factory=file_steering_factory(
863
+ "/tmp/ah-{run_id}-{agent_id}.steer"
864
+ ),
865
+ )
866
+ # Steer from any other terminal:
867
+ # echo "wrap up and synthesise" >> /tmp/ah-<run_id>-researcher.steer
868
+
869
+ # 2. Stdin-based — single shared StdinRouter with prefix routing
870
+ runtime = AgentRuntime(
871
+ ...,
872
+ steering_source_factory=stdin_steering_factory(),
873
+ )
874
+ # At the terminal:
875
+ # researcher: skip the legal db, focus on academic
876
+ # writer: keep the report under 500 words
877
+ # *: stop after this step
878
+ ```
879
+
880
+ Single-agent stdin runs accept lines with no prefix. Multi-agent runs
881
+ require `agent_id: text` (or `*: text` for broadcast); unknown or
882
+ unprefixed lines print a stderr hint and are discarded.
883
+
884
+ The stdin factory's underlying `StdinRouter` is started/stopped
885
+ automatically — the runtime detects the factory's async-context-manager
886
+ shape and wraps `dispatch_stream` / `run_stream` / `run_routed_stream`
887
+ around it. Ref-counted so nested calls (`dispatch_stream → run_stream`)
888
+ don't double-start the router.
889
+
890
+ ### HITL coordination
891
+
892
+ When a `StdinRouter` is active, HITL calls `router.claim_next_line()`
893
+ **before** printing its approval banner — the next stdin line resolves
894
+ HITL's pending Future and bypasses pub/sub. After resolution, subsequent
895
+ lines route to steering subscribers normally. When no router is active,
896
+ HITL falls back to a standalone `prompt_toolkit` session, ensuring consistent
897
+ key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths.
898
+
899
+ ### Constraints
900
+
901
+ - Steering arrives **between steps**, never mid-tool, never mid-think.
902
+ Tools that are already running complete; the LLM stream that's
903
+ already producing completes; guidance lands at the next safe boundary.
904
+ - Guidance queued **after** the LLM emits `action: "finish"` is lost —
905
+ the agent already decided it's done.
906
+ - Crash between drain and next checkpoint write → the queued items are
907
+ in the persisted WM. Crash between checkpoint write and next drain →
908
+ lost; re-steer after `--resume`.
909
+
910
+ See `examples/complex_sysaudit_demo.py` for stdin steering across three
911
+ agents alongside HITL on the shell tool.
@@ -27,6 +27,7 @@ Token management:
27
27
  from __future__ import annotations
28
28
 
29
29
  import asyncio
30
+ import contextlib
30
31
  import json
31
32
  import logging
32
33
  import uuid
@@ -132,6 +133,7 @@ class BaseAgent:
132
133
  guard,
133
134
  llm,
134
135
  checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
136
+ steering_source_factory: Any | None = None, # (BaseAgent) -> async ctx mgr
135
137
  ) -> None:
136
138
  self.config = config
137
139
  self.role = config.role # exposed for orchestrator planner prompt
@@ -145,10 +147,60 @@ class BaseAgent:
145
147
  self._task: str = ""
146
148
  self._last_think_error: str | None = None
147
149
  self._ckp_id: str = "" # f"{run_id}:{agent_id}" — unique per agent per run
150
+ # Async steering queue — items drained at the top of each ReAct
151
+ # step (before checkpoint, before think). Created eagerly so
152
+ # callers can steer() before run_stream starts.
153
+ self._steering: asyncio.Queue[str] = asyncio.Queue()
154
+ # Optional factory: called once at run_stream entry. Must return an
155
+ # async context manager that, while active, may call agent.steer().
156
+ # The agent owns the source's lifecycle — no live-instance registry.
157
+ self._steering_source_factory = steering_source_factory
148
158
  self._resume_key: str = (
149
159
  "" # key printed in --resume banner; set by orchestrator to outer run_id
150
160
  )
151
161
 
162
+ # ── Async steering ────────────────────────────────────────────────────────
163
+
164
+ def steer(self, text: str) -> None:
165
+ """Inject human guidance to be consumed at the next ReAct step boundary.
166
+
167
+ Non-blocking and safe to call concurrently from any coroutine in the
168
+ same event loop. Drained at the top of the next iteration (before
169
+ the per-step checkpoint write and before the next think call), then
170
+ appended to WorkingMemory as a user message and emitted as a
171
+ HUMAN_GUIDANCE BusEvent.
172
+
173
+ Worst-case latency = time remaining in the current tool +
174
+ next-think duration. Guidance arriving after the LLM has already
175
+ emitted action="finish" is lost — the agent has decided it's done.
176
+ """
177
+ if not text or not text.strip():
178
+ return
179
+ self._steering.put_nowait(text.strip())
180
+
181
+ async def _drain_steering(self, step: int) -> AsyncGenerator[BusEvent, None]:
182
+ """Drain any queued guidance into WorkingMemory; yield one event each.
183
+
184
+ Called at the top of each ReAct iteration. Items are FIFO. Empty
185
+ queue is a no-op (zero overhead when no one is steering).
186
+ """
187
+ while not self._steering.empty():
188
+ try:
189
+ text = self._steering.get_nowait()
190
+ except asyncio.QueueEmpty:
191
+ break # defensive — single consumer, should never fire
192
+ await self._working_memory.append("user", f"Human guidance: {text}")
193
+ self._tracer.log(
194
+ "human_guidance",
195
+ self.config.agent_id,
196
+ {"step": step, "text": text},
197
+ )
198
+ yield BusEvent(
199
+ type=EventType.HUMAN_GUIDANCE,
200
+ agent_id=self.config.agent_id,
201
+ payload={"step": step, "text": text},
202
+ )
203
+
152
204
  # ── Streaming entry point (canonical) ─────────────────────────────────────
153
205
 
154
206
  async def run_stream(
@@ -170,17 +222,25 @@ class BaseAgent:
170
222
  await self._working_memory.append("system", system, pinned=True)
171
223
  await self._working_memory.append("user", task)
172
224
 
173
- async with _ResumeHint(
174
- self._resume_key,
175
- self._checkpoint_store,
176
- f"Agent {self.config.agent_id}",
177
- check_key=self._ckp_id,
178
- ) as hint:
179
- async for event in self._run_stream_internal(run_id):
180
- if event.type == EventType.TASK_DONE:
181
- await self._clear_checkpoint(run_id)
182
- hint.done = True
183
- yield event
225
+ # Steering source is owned by the agent for the duration of the run.
226
+ # nullcontext when no factory is configured — zero overhead.
227
+ source_cm = (
228
+ self._steering_source_factory(self)
229
+ if self._steering_source_factory is not None
230
+ else contextlib.nullcontext()
231
+ )
232
+ async with source_cm:
233
+ async with _ResumeHint(
234
+ self._resume_key,
235
+ self._checkpoint_store,
236
+ f"Agent {self.config.agent_id}",
237
+ check_key=self._ckp_id,
238
+ ) as hint:
239
+ async for event in self._run_stream_internal(run_id):
240
+ if event.type == EventType.TASK_DONE:
241
+ await self._clear_checkpoint(run_id)
242
+ hint.done = True
243
+ yield event
184
244
 
185
245
  async def _resume_stream(
186
246
  self,
@@ -203,17 +263,23 @@ class BaseAgent:
203
263
  yield event
204
264
  start_step = pending["step"] + 1
205
265
 
206
- async with _ResumeHint(
207
- self._resume_key,
208
- self._checkpoint_store,
209
- f"Agent {self.config.agent_id}",
210
- check_key=self._ckp_id,
211
- ) as hint:
212
- async for event in self._run_stream_internal(run_id, start_step=start_step):
213
- if event.type == EventType.TASK_DONE:
214
- await self._clear_checkpoint(run_id)
215
- hint.done = True
216
- yield event
266
+ source_cm = (
267
+ self._steering_source_factory(self)
268
+ if self._steering_source_factory is not None
269
+ else contextlib.nullcontext()
270
+ )
271
+ async with source_cm:
272
+ async with _ResumeHint(
273
+ self._resume_key,
274
+ self._checkpoint_store,
275
+ f"Agent {self.config.agent_id}",
276
+ check_key=self._ckp_id,
277
+ ) as hint:
278
+ async for event in self._run_stream_internal(run_id, start_step=start_step):
279
+ if event.type == EventType.TASK_DONE:
280
+ await self._clear_checkpoint(run_id)
281
+ hint.done = True
282
+ yield event
217
283
 
218
284
  async def _run_stream_internal(
219
285
  self,
@@ -295,6 +361,10 @@ class BaseAgent:
295
361
  ) -> AsyncGenerator[BusEvent, None]:
296
362
  for step in range(start_step, self.config.max_steps):
297
363
  self._guard.check()
364
+ # Drain steering queue BEFORE the checkpoint write so any
365
+ # queued guidance is captured by the persisted WM.
366
+ async for guidance_event in self._drain_steering(step):
367
+ yield guidance_event
298
368
  if (
299
369
  self._checkpoint_store is not None
300
370
  and self.config.checkpoint_every > 0
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import asyncio
5
+ import json
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from harness.llm.auth import (
11
+ AnthropicClaudeCodeOAuthClient,
12
+ AuthFileOAuthProvider,
13
+ OAuthCredential,
14
+ OpenAICodexOAuthClient,
15
+ default_auth_file,
16
+ )
17
+
18
+ PROVIDERS = ["openai-codex", "claude-code"]
19
+
20
+
21
+ def main() -> int:
22
+ parser = argparse.ArgumentParser(prog="agent-harness", description="agent-harness utilities")
23
+ sub = parser.add_subparsers(dest="command", required=True)
24
+
25
+ login = sub.add_parser("login", help="log in to a provider")
26
+ login.add_argument("provider", choices=PROVIDERS)
27
+ login.add_argument("--auth-file", default=str(default_auth_file()))
28
+
29
+ status = sub.add_parser("auth", help="inspect or clear provider auth")
30
+ status_sub = status.add_subparsers(dest="auth_command", required=True)
31
+ status_cmd = status_sub.add_parser("status", help="show auth status")
32
+ status_cmd.add_argument("provider", choices=PROVIDERS)
33
+ status_cmd.add_argument("--auth-file", default=str(default_auth_file()))
34
+ logout_cmd = status_sub.add_parser("logout", help="remove auth credentials")
35
+ logout_cmd.add_argument("provider", choices=PROVIDERS)
36
+ logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
37
+
38
+ args = parser.parse_args()
39
+ try:
40
+ if args.command == "login":
41
+ if args.provider == "openai-codex":
42
+ return asyncio.run(_login_openai_codex(Path(args.auth_file).expanduser()))
43
+ if args.provider == "claude-code":
44
+ return asyncio.run(_login_claude_code(Path(args.auth_file).expanduser()))
45
+ if args.command == "auth" and args.auth_command == "status":
46
+ if args.provider == "openai-codex":
47
+ return _status_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
48
+ if args.provider == "claude-code":
49
+ return _status_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
50
+ if args.command == "auth" and args.auth_command == "logout":
51
+ if args.provider == "openai-codex":
52
+ return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
53
+ if args.provider == "claude-code":
54
+ return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
55
+ except Exception as e:
56
+ print(f"agent-harness: {e}", file=sys.stderr)
57
+ return 1
58
+ parser.error("unsupported command")
59
+ return 2
60
+
61
+
62
+ async def _login_openai_codex(path: Path) -> int:
63
+ client = OpenAICodexOAuthClient()
64
+ try:
65
+ device = await client.request_device_code()
66
+ print("OpenAI Codex login")
67
+ print(f"Open: {device.verification_uri}")
68
+ print(f"Code: {device.user_code}")
69
+ print("Waiting for authorization...")
70
+ cred = await client.poll_device_code(device)
71
+ finally:
72
+ await client.aclose()
73
+ _write_oauth_credential(path, cred)
74
+ print(f"Logged in to openai-codex. Credentials saved to {path}")
75
+ return 0
76
+
77
+
78
+ async def _login_claude_code(path: Path) -> int:
79
+ client = AnthropicClaudeCodeOAuthClient()
80
+ try:
81
+ login = client.begin_login()
82
+ print("Claude Code login")
83
+ print(f"Open: {login.url}")
84
+ print("Paste the final callback URL, or the code#state value.")
85
+ callback_input = input("Callback: ")
86
+ cred = await client.finish_login(login, callback_input)
87
+ finally:
88
+ await client.aclose()
89
+ _write_oauth_credential(path, cred)
90
+ print(f"Logged in to claude-code. Credentials saved to {path}")
91
+ return 0
92
+
93
+
94
+ def _status_oauth_provider(path: Path, provider_name: str) -> int:
95
+ provider = AuthFileOAuthProvider(path, provider=provider_name)
96
+ try:
97
+ cred = provider._read_credential()
98
+ except FileNotFoundError:
99
+ print(f"Not logged in: {path} does not exist")
100
+ return 1
101
+ except Exception as e:
102
+ print(f"Not logged in: {e}")
103
+ return 1
104
+ status = {
105
+ "provider": provider_name,
106
+ "auth_file": str(path),
107
+ "account_id": cred.account_id,
108
+ "expires_at": cred.expires_at.isoformat() if cred.expires_at else None,
109
+ "expired": cred.is_expired(),
110
+ }
111
+ print(json.dumps(status, indent=2))
112
+ return 0
113
+
114
+
115
+ def _logout_oauth_provider(path: Path, provider_name: str) -> int:
116
+ provider = AuthFileOAuthProvider(
117
+ path, provider=provider_name, require_private_permissions=False
118
+ )
119
+ provider.clear()
120
+ print(f"Removed {provider_name} credentials from {path}")
121
+ return 0
122
+
123
+
124
+ def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
125
+ provider = AuthFileOAuthProvider(
126
+ path, provider=cred.provider, require_private_permissions=False
127
+ )
128
+ path.parent.mkdir(parents=True, exist_ok=True)
129
+ if not path.exists():
130
+ path.write_text("{}")
131
+ if os.name != "nt":
132
+ path.chmod(0o600)
133
+ provider._write_credential(cred)
134
+
135
+
136
+ if __name__ == "__main__":
137
+ raise SystemExit(main())
@@ -18,6 +18,7 @@ Event lifecycle within a single goal:
18
18
  Orchestrated path (run / run_stream):
19
19
  PLAN — orchestrator emitted a static DAG
20
20
  (per task in DAG)
21
+ HUMAN_GUIDANCE? — async steering drained at top of step
21
22
  THOUGHT — agent's next-step reasoning
22
23
  TOKEN* — partial LLM output (only when client streams)
23
24
  ACTION — agent chose a tool + args
@@ -46,6 +47,7 @@ class EventType(str, Enum):
46
47
  TOKEN = "token"
47
48
  ACTION = "action"
48
49
  OBSERVATION = "observation"
50
+ HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
49
51
  TASK_DONE = "task_done"
50
52
  REPLAN = "replan"
51
53
  SYNTHESIS = "synthesis"
@@ -176,14 +176,48 @@ async def request_approval(
176
176
 
177
177
  Holds stdout_lock for the duration so concurrent agent events don't
178
178
  interleave with the banner or the input prompt.
179
+
180
+ Input always goes through prompt_toolkit:
181
+ - If a steering router is active, HITL claims the next stdin read
182
+ via the router. Text submitted at the active steering prompt is
183
+ routed to HITL instead of subscribers; if the router reaches a
184
+ pending claim between steering prompt cycles, it shows HITL's
185
+ approval prompt directly.
186
+ - If no router is active, HITL spins up a one-shot PromptSession
187
+ for the approval prompt. Same UX either way.
179
188
  """
189
+ from harness.steering import get_active_router
190
+
180
191
  async with stdout_lock:
192
+ router = get_active_router()
193
+ approve_prompt = " Approve? [y/n/a/correction]: "
194
+ # If a router is active, reserve the next stdin read BEFORE printing
195
+ # the banner so the user's typed answer routes to HITL (not steering).
196
+ hitl_future: Any = (
197
+ router.claim_next_line(prompt=approve_prompt) if router is not None else None
198
+ )
199
+
181
200
  _print_banner(req)
182
201
 
183
202
  guard.suspend()
184
203
  try:
185
- loop = asyncio.get_running_loop()
186
- raw = await loop.run_in_executor(None, input, " Approve? [y/n/a/correction]: ")
204
+ if hitl_future is not None:
205
+ raw = await hitl_future
206
+ else:
207
+ # Standalone: one-shot prompt_toolkit session with the same
208
+ # Enter-submits / Ctrl+J-newline bindings as steering so
209
+ # single-token answers (y/n/a) and multi-line corrections
210
+ # both compose naturally.
211
+ from prompt_toolkit import PromptSession
212
+
213
+ from harness.steering import StdinRouter
214
+
215
+ session: PromptSession = PromptSession()
216
+ raw = await session.prompt_async(
217
+ approve_prompt,
218
+ multiline=True,
219
+ key_bindings=StdinRouter._build_key_bindings(),
220
+ )
187
221
  finally:
188
222
  guard.resume()
189
223
 
@@ -0,0 +1,19 @@
1
+ """LLM adapter helpers."""
2
+
3
+ from harness.llm.auth import (
4
+ AnthropicClaudeCodeOAuthClient,
5
+ AuthFileOAuthProvider,
6
+ OAuthCredential,
7
+ OpenAICodexOAuthClient,
8
+ )
9
+ from harness.llm.claude_code import ClaudeCodeLLM
10
+ from harness.llm.openai_codex import OpenAICodexLLM
11
+
12
+ __all__ = [
13
+ "AnthropicClaudeCodeOAuthClient",
14
+ "AuthFileOAuthProvider",
15
+ "ClaudeCodeLLM",
16
+ "OAuthCredential",
17
+ "OpenAICodexLLM",
18
+ "OpenAICodexOAuthClient",
19
+ ]
@@ -0,0 +1,56 @@
1
+ """Shared SSE helpers for streaming-capable LLM adapters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import AsyncGenerator
6
+ from typing import Any
7
+
8
+
9
+ async def aiter_sse_events(response: Any) -> AsyncGenerator[tuple[str, str], None]:
10
+ """Yield (event_type, data) pairs from an SSE response.
11
+
12
+ Parses the standard `event:` / `data:` line format. Blank lines
13
+ terminate events. The default event type for unlabelled events is
14
+ `"message"`. Trailing buffered data (no terminating blank line) is
15
+ flushed when the stream ends.
16
+ """
17
+ current_event = "message"
18
+ data_lines: list[str] = []
19
+ async for raw_line in response.aiter_lines():
20
+ line = raw_line.rstrip("\r")
21
+ if not line:
22
+ if data_lines:
23
+ yield current_event, "\n".join(data_lines)
24
+ current_event = "message"
25
+ data_lines = []
26
+ continue
27
+ if line.startswith("event:"):
28
+ current_event = line[len("event:") :].strip()
29
+ elif line.startswith("data:"):
30
+ data_lines.append(line[len("data:") :].strip())
31
+ if data_lines:
32
+ yield current_event, "\n".join(data_lines)
33
+
34
+
35
+ async def read_error_body(response: Any) -> bytes:
36
+ """Drain the body of an error response, returning at most 4 KiB."""
37
+ out: list[bytes] = []
38
+ total = 0
39
+ async for chunk in response.aiter_bytes():
40
+ if total >= 4096:
41
+ break
42
+ out.append(chunk)
43
+ total += len(chunk)
44
+ return b"".join(out)[:4096]
45
+
46
+
47
+ def format_streaming_error(status_code: int, body: bytes, *, provider: str) -> str:
48
+ """Build a user-facing error message from an error response body.
49
+
50
+ Truncates aggressively because error bodies sometimes echo request
51
+ payloads — we don't want bearer tokens or full prompts in tracebacks.
52
+ """
53
+ text = body.decode(errors="replace").strip()
54
+ if not text:
55
+ return f"{provider} backend returned HTTP {status_code}"
56
+ return f"{provider} backend returned {status_code}: {text[:500]}"