react-agent-harness 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.1.0/react_agent_harness.egg-info → react_agent_harness-0.3.0}/PKG-INFO +2 -1
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/README.md +189 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/agents/base.py +92 -22
- react_agent_harness-0.3.0/harness/cli.py +137 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/events.py +2 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/hitl.py +36 -2
- react_agent_harness-0.3.0/harness/llm/__init__.py +19 -0
- react_agent_harness-0.3.0/harness/llm/_streaming.py +56 -0
- react_agent_harness-0.3.0/harness/llm/auth.py +610 -0
- react_agent_harness-0.3.0/harness/llm/claude_code.py +312 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/llm/openai.py +11 -5
- react_agent_harness-0.3.0/harness/llm/openai_codex.py +283 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/runtime.py +96 -65
- react_agent_harness-0.3.0/harness/steering.py +674 -0
- react_agent_harness-0.3.0/harness/utils.py +102 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/pyproject.toml +10 -2
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0/react_agent_harness.egg-info}/PKG-INFO +2 -1
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/SOURCES.txt +13 -0
- react_agent_harness-0.3.0/react_agent_harness.egg-info/entry_points.txt +2 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/requires.txt +1 -0
- react_agent_harness-0.3.0/tests/test_claude_code_llm.py +265 -0
- react_agent_harness-0.3.0/tests/test_cli.py +69 -0
- react_agent_harness-0.3.0/tests/test_llm_auth.py +297 -0
- react_agent_harness-0.3.0/tests/test_openai_codex_llm.py +204 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_openai_llm.py +3 -2
- react_agent_harness-0.3.0/tests/test_steering.py +663 -0
- react_agent_harness-0.3.0/tests/test_utils.py +96 -0
- react_agent_harness-0.1.0/harness/utils.py +0 -46
- react_agent_harness-0.1.0/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/LICENSE +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/agents/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/annotation.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/checkpoint.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/harness/otel.py +0 -0
- {react_agent_harness-0.1.0/harness/llm → react_agent_harness-0.3.0/memory}/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/manager.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/redis_store.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/stores.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/memory/working.py +0 -0
- {react_agent_harness-0.1.0/memory → react_agent_harness-0.3.0/orchestrator}/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/orchestrator/planner.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/setup.cfg +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_checkpoint_resume.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_mcp_adapter.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_memory.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_orchestrator.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_otel.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_streaming.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_vision.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tests/test_working_memory.py +0 -0
- {react_agent_harness-0.1.0/orchestrator → react_agent_harness-0.3.0/tools}/__init__.py +0 -0
- {react_agent_harness-0.1.0/tools → react_agent_harness-0.3.0/tools/builtin}/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/builtin/http_fetch.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/mcp/__init__.py +0 -0
- {react_agent_harness-0.1.0 → react_agent_harness-0.3.0}/tools/mcp/adapter.py +0 -0
{react_agent_harness-0.1.0/react_agent_harness.egg-info → react_agent_harness-0.3.0}/PKG-INFO
RENAMED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: react-agent-harness
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
License-File: LICENSE
|
|
7
|
+
Requires-Dist: prompt_toolkit>=3.0
|
|
7
8
|
Provides-Extra: lance
|
|
8
9
|
Requires-Dist: lancedb>=0.6; extra == "lance"
|
|
9
10
|
Requires-Dist: pyarrow>=14; extra == "lance"
|
|
@@ -38,6 +38,7 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
|
|
|
38
38
|
harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
|
|
39
39
|
harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
|
|
40
40
|
harness/hitl.py HITL approval gate — interactive CLI, session-allow list
|
|
41
|
+
harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
|
|
41
42
|
harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
|
|
42
43
|
harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
|
|
43
44
|
harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
|
|
@@ -73,6 +74,7 @@ explicit control.
|
|
|
73
74
|
| `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
|
|
74
75
|
| `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
|
|
75
76
|
| `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
|
|
77
|
+
| `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
|
|
76
78
|
|
|
77
79
|
## Adding a new domain (3 steps)
|
|
78
80
|
|
|
@@ -108,6 +110,102 @@ llm = OpenAILLM(model="gpt-4o-mini") # reads OPENAI_API_KEY from
|
|
|
108
110
|
runtime = AgentRuntime(..., llm=llm)
|
|
109
111
|
```
|
|
110
112
|
|
|
113
|
+
Credential-backed adapters can also plug into the same contract. This is the
|
|
114
|
+
shape used for provider-specific subscription or OAuth flows without teaching
|
|
115
|
+
agents about auth:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
agent-harness login openai-codex
|
|
119
|
+
agent-harness auth status openai-codex
|
|
120
|
+
agent-harness login claude-code
|
|
121
|
+
agent-harness auth status claude-code
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
> **⚠️ Subscription adapters are experimental — use the metered API in production.**
|
|
125
|
+
>
|
|
126
|
+
> `OpenAICodexLLM` and `ClaudeCodeLLM` bridge **ChatGPT / Claude
|
|
127
|
+
> subscription OAuth credentials** into the harness by talking to
|
|
128
|
+
> internal CLI endpoints with CLI-shaped User-Agent and billing headers.
|
|
129
|
+
> This route:
|
|
130
|
+
>
|
|
131
|
+
> - **May violate OpenAI's and Anthropic's Terms of Service.** Both
|
|
132
|
+
> providers prohibit using subscription accounts (ChatGPT Plus/Pro,
|
|
133
|
+
> Claude Pro/Max) for arbitrary programmatic access — subscriptions
|
|
134
|
+
> price for the official CLI's intended use only.
|
|
135
|
+
> - **May result in account suspension** if abuse detection classifies
|
|
136
|
+
> harness traffic as misuse.
|
|
137
|
+
> - **Depends on undocumented internal endpoints**
|
|
138
|
+
> (`/backend-api/codex/responses`, the Anthropic Messages API with
|
|
139
|
+
> `claude-code-*` beta flags) that providers can change or revoke at
|
|
140
|
+
> any time.
|
|
141
|
+
>
|
|
142
|
+
> **Use these adapters only for personal research on accounts you own.**
|
|
143
|
+
> Do not use them to serve other users. For anything else, prefer the
|
|
144
|
+
> metered API path:
|
|
145
|
+
>
|
|
146
|
+
> - `OpenAILLM` with `OPENAI_API_KEY` (optionally routed through a
|
|
147
|
+
> gateway like LiteLLM/Helicone for cost headers).
|
|
148
|
+
> - The standard Anthropic Messages API with an Anthropic API key.
|
|
149
|
+
|
|
150
|
+
Direct `openai-codex` OAuth follows the Codex/Pi-style ChatGPT
|
|
151
|
+
subscription route rather than the stable OpenAI Platform API. The
|
|
152
|
+
Codex OAuth client id can be overridden with
|
|
153
|
+
`AGENT_HARNESS_OPENAI_CODEX_CLIENT_ID`.
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
from harness.llm.openai_codex import OpenAICodexLLM
|
|
157
|
+
|
|
158
|
+
llm = OpenAICodexLLM(
|
|
159
|
+
model="gpt-5.5",
|
|
160
|
+
auth_file="~/.agent-harness/auth/auth.json", # Pi-shaped openai-codex OAuth entry
|
|
161
|
+
)
|
|
162
|
+
runtime = AgentRuntime(..., llm=llm)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
`OpenAICodexLLM` calls the Codex backend directly
|
|
166
|
+
(`https://chatgpt.com/backend-api/codex/responses`) with OAuth credentials.
|
|
167
|
+
The stable fallback remains `OpenAILLM` with `OPENAI_API_KEY`.
|
|
168
|
+
|
|
169
|
+
For Claude Code-style setups, use `ClaudeCodeLLM` with Claude Pro/Max OAuth
|
|
170
|
+
credentials stored in the same auth file. It calls the Anthropic Messages API
|
|
171
|
+
directly with Claude-Code-compatible OAuth headers:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
agent-harness login claude-code
|
|
175
|
+
python examples/subscription_auth_demo.py claude-code
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from harness.llm.claude_code import ClaudeCodeLLM
|
|
180
|
+
|
|
181
|
+
llm = ClaudeCodeLLM(
|
|
182
|
+
model="claude-sonnet-4-6",
|
|
183
|
+
auth_file="~/.agent-harness/auth/auth.json",
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
`ClaudeCodeLLM` reads a `claude-code` OAuth entry, refreshes it automatically
|
|
188
|
+
when expired, and retries once after `401`/`403`. This mirrors Pi's Claude
|
|
189
|
+
Pro/Max extension approach rather than shelling out to the Claude CLI. The
|
|
190
|
+
default model is the current canonical Sonnet release ID, `claude-sonnet-4-6`;
|
|
191
|
+
set `CLAUDE_CODE_MODEL` or pass `model="claude-opus-4-7"` to choose another
|
|
192
|
+
model.
|
|
193
|
+
|
|
194
|
+
Both adapters stream incrementally — `stream_complete()` yields each
|
|
195
|
+
SSE delta token as it arrives, and `complete()` consumes the same
|
|
196
|
+
stream and returns the concatenated text once finished. Cost / token
|
|
197
|
+
usage is captured from the final stream event into `last_usage`.
|
|
198
|
+
|
|
199
|
+
The Claude billing header's `cc_version` is read from
|
|
200
|
+
`CLAUDE_CODE_VERSION` (env) or from `claude --version` if the CLI is
|
|
201
|
+
installed; falls back to `unknown` otherwise. Pinning a specific
|
|
202
|
+
version with `CLAUDE_CODE_VERSION=2.1.150` is recommended if you want
|
|
203
|
+
stable behavior across CLI upgrades.
|
|
204
|
+
|
|
205
|
+
Do not copy browser/app refresh tokens into repo files. Store OAuth auth files
|
|
206
|
+
under `~/.agent-harness/auth` or reuse an existing Pi auth file with private
|
|
207
|
+
file permissions (`0600`).
|
|
208
|
+
|
|
111
209
|
To use Anthropic / Gemini / Ollama / a local SGLang or vLLM server / anything
|
|
112
210
|
else — write a 30-line adapter implementing those two methods. See
|
|
113
211
|
`harness/llm/openai.py` for the reference shape; the harness never imports a
|
|
@@ -720,3 +818,94 @@ When the human types a correction instead of y/n:
|
|
|
720
818
|
|
|
721
819
|
The `annotation_store` and `checkpoint_store` are independent — both can be
|
|
722
820
|
wired simultaneously for RLHF data collection with HITL review.
|
|
821
|
+
|
|
822
|
+
## Async steering
|
|
823
|
+
|
|
824
|
+
HITL is synchronous — it only fires when a gated tool is about to run. For
|
|
825
|
+
out-of-band course-correction (HTTP handler, supervisor agent, file watcher,
|
|
826
|
+
or a human typing in the terminal), each `BaseAgent` exposes a
|
|
827
|
+
non-blocking `steer(text)` method. Items are drained at the **top of each
|
|
828
|
+
ReAct iteration**, before the per-step checkpoint write and before the
|
|
829
|
+
next think, then appended to `WorkingMemory` as a `Human guidance: <text>`
|
|
830
|
+
user message. The LLM sees them on the next think and adjusts. One
|
|
831
|
+
`HUMAN_GUIDANCE` `BusEvent` fires per drained item.
|
|
832
|
+
|
|
833
|
+
Why a queue instead of writing straight to `WorkingMemory`: `steer()` is
|
|
834
|
+
synchronous and callable from any coroutine; `WorkingMemory.append` is
|
|
835
|
+
async (eviction can call the LLM). The queue is the producer/consumer
|
|
836
|
+
boundary, enforces step-boundary delivery, and keeps WM single-writer.
|
|
837
|
+
|
|
838
|
+
### Programmatic API (always available)
|
|
839
|
+
|
|
840
|
+
```python
|
|
841
|
+
agent.steer("skip the legal database, use academic sources only")
|
|
842
|
+
```
|
|
843
|
+
|
|
844
|
+
Fires immediately; the agent picks it up at the next step boundary.
|
|
845
|
+
Worst-case latency = remaining tool time + next-think time.
|
|
846
|
+
|
|
847
|
+
### Sources via factory (so orchestrated agents are reachable)
|
|
848
|
+
|
|
849
|
+
`BaseAgent` and `AgentRuntime` both accept `steering_source_factory` — a
|
|
850
|
+
callable `(agent) -> async ctx mgr`. The agent enters the source on
|
|
851
|
+
`run_stream`, exits on completion. No live-agent registry; agents the
|
|
852
|
+
runtime constructs internally still get steering.
|
|
853
|
+
|
|
854
|
+
Two built-in factories:
|
|
855
|
+
|
|
856
|
+
```python
|
|
857
|
+
from harness.steering import file_steering_factory, stdin_steering_factory
|
|
858
|
+
|
|
859
|
+
# 1. File-based — one file per agent, polled for appends (no shared resource)
|
|
860
|
+
runtime = AgentRuntime(
|
|
861
|
+
...,
|
|
862
|
+
steering_source_factory=file_steering_factory(
|
|
863
|
+
"/tmp/ah-{run_id}-{agent_id}.steer"
|
|
864
|
+
),
|
|
865
|
+
)
|
|
866
|
+
# Steer from any other terminal:
|
|
867
|
+
# echo "wrap up and synthesise" >> /tmp/ah-<run_id>-researcher.steer
|
|
868
|
+
|
|
869
|
+
# 2. Stdin-based — single shared StdinRouter with prefix routing
|
|
870
|
+
runtime = AgentRuntime(
|
|
871
|
+
...,
|
|
872
|
+
steering_source_factory=stdin_steering_factory(),
|
|
873
|
+
)
|
|
874
|
+
# At the terminal:
|
|
875
|
+
# researcher: skip the legal db, focus on academic
|
|
876
|
+
# writer: keep the report under 500 words
|
|
877
|
+
# *: stop after this step
|
|
878
|
+
```
|
|
879
|
+
|
|
880
|
+
Single-agent stdin runs accept lines with no prefix. Multi-agent runs
|
|
881
|
+
require `agent_id: text` (or `*: text` for broadcast); unknown or
|
|
882
|
+
unprefixed lines print a stderr hint and are discarded.
|
|
883
|
+
|
|
884
|
+
The stdin factory's underlying `StdinRouter` is started/stopped
|
|
885
|
+
automatically — the runtime detects the factory's async-context-manager
|
|
886
|
+
shape and wraps `dispatch_stream` / `run_stream` / `run_routed_stream`
|
|
887
|
+
around it. Ref-counted so nested calls (`dispatch_stream → run_stream`)
|
|
888
|
+
don't double-start the router.
|
|
889
|
+
|
|
890
|
+
### HITL coordination
|
|
891
|
+
|
|
892
|
+
When a `StdinRouter` is active, HITL calls `router.claim_next_line()`
|
|
893
|
+
**before** printing its approval banner — the next stdin line resolves
|
|
894
|
+
HITL's pending Future and bypasses pub/sub. After resolution, subsequent
|
|
895
|
+
lines route to steering subscribers normally. When no router is active,
|
|
896
|
+
HITL falls back to a standalone `prompt_toolkit` session, ensuring consistent
|
|
897
|
+
key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths.
|
|
898
|
+
|
|
899
|
+
### Constraints
|
|
900
|
+
|
|
901
|
+
- Steering arrives **between steps**, never mid-tool, never mid-think.
|
|
902
|
+
Tools that are already running complete; the LLM stream that's
|
|
903
|
+
already producing completes; guidance lands at the next safe boundary.
|
|
904
|
+
- Guidance queued **after** the LLM emits `action: "finish"` is lost —
|
|
905
|
+
the agent already decided it's done.
|
|
906
|
+
- Crash between drain and next checkpoint write → the queued items are
|
|
907
|
+
in the persisted WM. Crash between checkpoint write and next drain →
|
|
908
|
+
lost; re-steer after `--resume`.
|
|
909
|
+
|
|
910
|
+
See `examples/complex_sysaudit_demo.py` for stdin steering across three
|
|
911
|
+
agents alongside HITL on the shell tool.
|
|
@@ -27,6 +27,7 @@ Token management:
|
|
|
27
27
|
from __future__ import annotations
|
|
28
28
|
|
|
29
29
|
import asyncio
|
|
30
|
+
import contextlib
|
|
30
31
|
import json
|
|
31
32
|
import logging
|
|
32
33
|
import uuid
|
|
@@ -132,6 +133,7 @@ class BaseAgent:
|
|
|
132
133
|
guard,
|
|
133
134
|
llm,
|
|
134
135
|
checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
|
|
136
|
+
steering_source_factory: Any | None = None, # (BaseAgent) -> async ctx mgr
|
|
135
137
|
) -> None:
|
|
136
138
|
self.config = config
|
|
137
139
|
self.role = config.role # exposed for orchestrator planner prompt
|
|
@@ -145,10 +147,60 @@ class BaseAgent:
|
|
|
145
147
|
self._task: str = ""
|
|
146
148
|
self._last_think_error: str | None = None
|
|
147
149
|
self._ckp_id: str = "" # f"{run_id}:{agent_id}" — unique per agent per run
|
|
150
|
+
# Async steering queue — items drained at the top of each ReAct
|
|
151
|
+
# step (before checkpoint, before think). Created eagerly so
|
|
152
|
+
# callers can steer() before run_stream starts.
|
|
153
|
+
self._steering: asyncio.Queue[str] = asyncio.Queue()
|
|
154
|
+
# Optional factory: called once at run_stream entry. Must return an
|
|
155
|
+
# async context manager that, while active, may call agent.steer().
|
|
156
|
+
# The agent owns the source's lifecycle — no live-instance registry.
|
|
157
|
+
self._steering_source_factory = steering_source_factory
|
|
148
158
|
self._resume_key: str = (
|
|
149
159
|
"" # key printed in --resume banner; set by orchestrator to outer run_id
|
|
150
160
|
)
|
|
151
161
|
|
|
162
|
+
# ── Async steering ────────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
def steer(self, text: str) -> None:
|
|
165
|
+
"""Inject human guidance to be consumed at the next ReAct step boundary.
|
|
166
|
+
|
|
167
|
+
Non-blocking and safe to call concurrently from any coroutine in the
|
|
168
|
+
same event loop. Drained at the top of the next iteration (before
|
|
169
|
+
the per-step checkpoint write and before the next think call), then
|
|
170
|
+
appended to WorkingMemory as a user message and emitted as a
|
|
171
|
+
HUMAN_GUIDANCE BusEvent.
|
|
172
|
+
|
|
173
|
+
Worst-case latency = time remaining in the current tool +
|
|
174
|
+
next-think duration. Guidance arriving after the LLM has already
|
|
175
|
+
emitted action="finish" is lost — the agent has decided it's done.
|
|
176
|
+
"""
|
|
177
|
+
if not text or not text.strip():
|
|
178
|
+
return
|
|
179
|
+
self._steering.put_nowait(text.strip())
|
|
180
|
+
|
|
181
|
+
async def _drain_steering(self, step: int) -> AsyncGenerator[BusEvent, None]:
|
|
182
|
+
"""Drain any queued guidance into WorkingMemory; yield one event each.
|
|
183
|
+
|
|
184
|
+
Called at the top of each ReAct iteration. Items are FIFO. Empty
|
|
185
|
+
queue is a no-op (zero overhead when no one is steering).
|
|
186
|
+
"""
|
|
187
|
+
while not self._steering.empty():
|
|
188
|
+
try:
|
|
189
|
+
text = self._steering.get_nowait()
|
|
190
|
+
except asyncio.QueueEmpty:
|
|
191
|
+
break # defensive — single consumer, should never fire
|
|
192
|
+
await self._working_memory.append("user", f"Human guidance: {text}")
|
|
193
|
+
self._tracer.log(
|
|
194
|
+
"human_guidance",
|
|
195
|
+
self.config.agent_id,
|
|
196
|
+
{"step": step, "text": text},
|
|
197
|
+
)
|
|
198
|
+
yield BusEvent(
|
|
199
|
+
type=EventType.HUMAN_GUIDANCE,
|
|
200
|
+
agent_id=self.config.agent_id,
|
|
201
|
+
payload={"step": step, "text": text},
|
|
202
|
+
)
|
|
203
|
+
|
|
152
204
|
# ── Streaming entry point (canonical) ─────────────────────────────────────
|
|
153
205
|
|
|
154
206
|
async def run_stream(
|
|
@@ -170,17 +222,25 @@ class BaseAgent:
|
|
|
170
222
|
await self._working_memory.append("system", system, pinned=True)
|
|
171
223
|
await self._working_memory.append("user", task)
|
|
172
224
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
225
|
+
# Steering source is owned by the agent for the duration of the run.
|
|
226
|
+
# nullcontext when no factory is configured — zero overhead.
|
|
227
|
+
source_cm = (
|
|
228
|
+
self._steering_source_factory(self)
|
|
229
|
+
if self._steering_source_factory is not None
|
|
230
|
+
else contextlib.nullcontext()
|
|
231
|
+
)
|
|
232
|
+
async with source_cm:
|
|
233
|
+
async with _ResumeHint(
|
|
234
|
+
self._resume_key,
|
|
235
|
+
self._checkpoint_store,
|
|
236
|
+
f"Agent {self.config.agent_id}",
|
|
237
|
+
check_key=self._ckp_id,
|
|
238
|
+
) as hint:
|
|
239
|
+
async for event in self._run_stream_internal(run_id):
|
|
240
|
+
if event.type == EventType.TASK_DONE:
|
|
241
|
+
await self._clear_checkpoint(run_id)
|
|
242
|
+
hint.done = True
|
|
243
|
+
yield event
|
|
184
244
|
|
|
185
245
|
async def _resume_stream(
|
|
186
246
|
self,
|
|
@@ -203,17 +263,23 @@ class BaseAgent:
|
|
|
203
263
|
yield event
|
|
204
264
|
start_step = pending["step"] + 1
|
|
205
265
|
|
|
206
|
-
|
|
207
|
-
self.
|
|
208
|
-
self.
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
async
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
266
|
+
source_cm = (
|
|
267
|
+
self._steering_source_factory(self)
|
|
268
|
+
if self._steering_source_factory is not None
|
|
269
|
+
else contextlib.nullcontext()
|
|
270
|
+
)
|
|
271
|
+
async with source_cm:
|
|
272
|
+
async with _ResumeHint(
|
|
273
|
+
self._resume_key,
|
|
274
|
+
self._checkpoint_store,
|
|
275
|
+
f"Agent {self.config.agent_id}",
|
|
276
|
+
check_key=self._ckp_id,
|
|
277
|
+
) as hint:
|
|
278
|
+
async for event in self._run_stream_internal(run_id, start_step=start_step):
|
|
279
|
+
if event.type == EventType.TASK_DONE:
|
|
280
|
+
await self._clear_checkpoint(run_id)
|
|
281
|
+
hint.done = True
|
|
282
|
+
yield event
|
|
217
283
|
|
|
218
284
|
async def _run_stream_internal(
|
|
219
285
|
self,
|
|
@@ -295,6 +361,10 @@ class BaseAgent:
|
|
|
295
361
|
) -> AsyncGenerator[BusEvent, None]:
|
|
296
362
|
for step in range(start_step, self.config.max_steps):
|
|
297
363
|
self._guard.check()
|
|
364
|
+
# Drain steering queue BEFORE the checkpoint write so any
|
|
365
|
+
# queued guidance is captured by the persisted WM.
|
|
366
|
+
async for guidance_event in self._drain_steering(step):
|
|
367
|
+
yield guidance_event
|
|
298
368
|
if (
|
|
299
369
|
self._checkpoint_store is not None
|
|
300
370
|
and self.config.checkpoint_every > 0
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from harness.llm.auth import (
|
|
11
|
+
AnthropicClaudeCodeOAuthClient,
|
|
12
|
+
AuthFileOAuthProvider,
|
|
13
|
+
OAuthCredential,
|
|
14
|
+
OpenAICodexOAuthClient,
|
|
15
|
+
default_auth_file,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
PROVIDERS = ["openai-codex", "claude-code"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def main() -> int:
|
|
22
|
+
parser = argparse.ArgumentParser(prog="agent-harness", description="agent-harness utilities")
|
|
23
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
24
|
+
|
|
25
|
+
login = sub.add_parser("login", help="log in to a provider")
|
|
26
|
+
login.add_argument("provider", choices=PROVIDERS)
|
|
27
|
+
login.add_argument("--auth-file", default=str(default_auth_file()))
|
|
28
|
+
|
|
29
|
+
status = sub.add_parser("auth", help="inspect or clear provider auth")
|
|
30
|
+
status_sub = status.add_subparsers(dest="auth_command", required=True)
|
|
31
|
+
status_cmd = status_sub.add_parser("status", help="show auth status")
|
|
32
|
+
status_cmd.add_argument("provider", choices=PROVIDERS)
|
|
33
|
+
status_cmd.add_argument("--auth-file", default=str(default_auth_file()))
|
|
34
|
+
logout_cmd = status_sub.add_parser("logout", help="remove auth credentials")
|
|
35
|
+
logout_cmd.add_argument("provider", choices=PROVIDERS)
|
|
36
|
+
logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
|
|
37
|
+
|
|
38
|
+
args = parser.parse_args()
|
|
39
|
+
try:
|
|
40
|
+
if args.command == "login":
|
|
41
|
+
if args.provider == "openai-codex":
|
|
42
|
+
return asyncio.run(_login_openai_codex(Path(args.auth_file).expanduser()))
|
|
43
|
+
if args.provider == "claude-code":
|
|
44
|
+
return asyncio.run(_login_claude_code(Path(args.auth_file).expanduser()))
|
|
45
|
+
if args.command == "auth" and args.auth_command == "status":
|
|
46
|
+
if args.provider == "openai-codex":
|
|
47
|
+
return _status_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
|
|
48
|
+
if args.provider == "claude-code":
|
|
49
|
+
return _status_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
|
|
50
|
+
if args.command == "auth" and args.auth_command == "logout":
|
|
51
|
+
if args.provider == "openai-codex":
|
|
52
|
+
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
|
|
53
|
+
if args.provider == "claude-code":
|
|
54
|
+
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(f"agent-harness: {e}", file=sys.stderr)
|
|
57
|
+
return 1
|
|
58
|
+
parser.error("unsupported command")
|
|
59
|
+
return 2
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def _login_openai_codex(path: Path) -> int:
|
|
63
|
+
client = OpenAICodexOAuthClient()
|
|
64
|
+
try:
|
|
65
|
+
device = await client.request_device_code()
|
|
66
|
+
print("OpenAI Codex login")
|
|
67
|
+
print(f"Open: {device.verification_uri}")
|
|
68
|
+
print(f"Code: {device.user_code}")
|
|
69
|
+
print("Waiting for authorization...")
|
|
70
|
+
cred = await client.poll_device_code(device)
|
|
71
|
+
finally:
|
|
72
|
+
await client.aclose()
|
|
73
|
+
_write_oauth_credential(path, cred)
|
|
74
|
+
print(f"Logged in to openai-codex. Credentials saved to {path}")
|
|
75
|
+
return 0
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def _login_claude_code(path: Path) -> int:
|
|
79
|
+
client = AnthropicClaudeCodeOAuthClient()
|
|
80
|
+
try:
|
|
81
|
+
login = client.begin_login()
|
|
82
|
+
print("Claude Code login")
|
|
83
|
+
print(f"Open: {login.url}")
|
|
84
|
+
print("Paste the final callback URL, or the code#state value.")
|
|
85
|
+
callback_input = input("Callback: ")
|
|
86
|
+
cred = await client.finish_login(login, callback_input)
|
|
87
|
+
finally:
|
|
88
|
+
await client.aclose()
|
|
89
|
+
_write_oauth_credential(path, cred)
|
|
90
|
+
print(f"Logged in to claude-code. Credentials saved to {path}")
|
|
91
|
+
return 0
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _status_oauth_provider(path: Path, provider_name: str) -> int:
|
|
95
|
+
provider = AuthFileOAuthProvider(path, provider=provider_name)
|
|
96
|
+
try:
|
|
97
|
+
cred = provider._read_credential()
|
|
98
|
+
except FileNotFoundError:
|
|
99
|
+
print(f"Not logged in: {path} does not exist")
|
|
100
|
+
return 1
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"Not logged in: {e}")
|
|
103
|
+
return 1
|
|
104
|
+
status = {
|
|
105
|
+
"provider": provider_name,
|
|
106
|
+
"auth_file": str(path),
|
|
107
|
+
"account_id": cred.account_id,
|
|
108
|
+
"expires_at": cred.expires_at.isoformat() if cred.expires_at else None,
|
|
109
|
+
"expired": cred.is_expired(),
|
|
110
|
+
}
|
|
111
|
+
print(json.dumps(status, indent=2))
|
|
112
|
+
return 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _logout_oauth_provider(path: Path, provider_name: str) -> int:
|
|
116
|
+
provider = AuthFileOAuthProvider(
|
|
117
|
+
path, provider=provider_name, require_private_permissions=False
|
|
118
|
+
)
|
|
119
|
+
provider.clear()
|
|
120
|
+
print(f"Removed {provider_name} credentials from {path}")
|
|
121
|
+
return 0
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
|
|
125
|
+
provider = AuthFileOAuthProvider(
|
|
126
|
+
path, provider=cred.provider, require_private_permissions=False
|
|
127
|
+
)
|
|
128
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
if not path.exists():
|
|
130
|
+
path.write_text("{}")
|
|
131
|
+
if os.name != "nt":
|
|
132
|
+
path.chmod(0o600)
|
|
133
|
+
provider._write_credential(cred)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if __name__ == "__main__":
|
|
137
|
+
raise SystemExit(main())
|
|
@@ -18,6 +18,7 @@ Event lifecycle within a single goal:
|
|
|
18
18
|
Orchestrated path (run / run_stream):
|
|
19
19
|
PLAN — orchestrator emitted a static DAG
|
|
20
20
|
(per task in DAG)
|
|
21
|
+
HUMAN_GUIDANCE? — async steering drained at top of step
|
|
21
22
|
THOUGHT — agent's next-step reasoning
|
|
22
23
|
TOKEN* — partial LLM output (only when client streams)
|
|
23
24
|
ACTION — agent chose a tool + args
|
|
@@ -46,6 +47,7 @@ class EventType(str, Enum):
|
|
|
46
47
|
TOKEN = "token"
|
|
47
48
|
ACTION = "action"
|
|
48
49
|
OBSERVATION = "observation"
|
|
50
|
+
HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
|
|
49
51
|
TASK_DONE = "task_done"
|
|
50
52
|
REPLAN = "replan"
|
|
51
53
|
SYNTHESIS = "synthesis"
|
|
@@ -176,14 +176,48 @@ async def request_approval(
|
|
|
176
176
|
|
|
177
177
|
Holds stdout_lock for the duration so concurrent agent events don't
|
|
178
178
|
interleave with the banner or the input prompt.
|
|
179
|
+
|
|
180
|
+
Input always goes through prompt_toolkit:
|
|
181
|
+
- If a steering router is active, HITL claims the next stdin read
|
|
182
|
+
via the router. Text submitted at the active steering prompt is
|
|
183
|
+
routed to HITL instead of subscribers; if the router reaches a
|
|
184
|
+
pending claim between steering prompt cycles, it shows HITL's
|
|
185
|
+
approval prompt directly.
|
|
186
|
+
- If no router is active, HITL spins up a one-shot PromptSession
|
|
187
|
+
for the approval prompt. Same UX either way.
|
|
179
188
|
"""
|
|
189
|
+
from harness.steering import get_active_router
|
|
190
|
+
|
|
180
191
|
async with stdout_lock:
|
|
192
|
+
router = get_active_router()
|
|
193
|
+
approve_prompt = " Approve? [y/n/a/correction]: "
|
|
194
|
+
# If a router is active, reserve the next stdin read BEFORE printing
|
|
195
|
+
# the banner so the user's typed answer routes to HITL (not steering).
|
|
196
|
+
hitl_future: Any = (
|
|
197
|
+
router.claim_next_line(prompt=approve_prompt) if router is not None else None
|
|
198
|
+
)
|
|
199
|
+
|
|
181
200
|
_print_banner(req)
|
|
182
201
|
|
|
183
202
|
guard.suspend()
|
|
184
203
|
try:
|
|
185
|
-
|
|
186
|
-
|
|
204
|
+
if hitl_future is not None:
|
|
205
|
+
raw = await hitl_future
|
|
206
|
+
else:
|
|
207
|
+
# Standalone: one-shot prompt_toolkit session with the same
|
|
208
|
+
# Enter-submits / Ctrl+J-newline bindings as steering so
|
|
209
|
+
# single-token answers (y/n/a) and multi-line corrections
|
|
210
|
+
# both compose naturally.
|
|
211
|
+
from prompt_toolkit import PromptSession
|
|
212
|
+
|
|
213
|
+
from harness.steering import StdinRouter
|
|
214
|
+
|
|
215
|
+
session: PromptSession = PromptSession()
|
|
216
|
+
raw = await session.prompt_async(
|
|
217
|
+
approve_prompt,
|
|
218
|
+
multiline=True,
|
|
219
|
+
key_bindings=StdinRouter._build_key_bindings(),
|
|
220
|
+
)
|
|
187
221
|
finally:
|
|
188
222
|
guard.resume()
|
|
189
223
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""LLM adapter helpers."""
|
|
2
|
+
|
|
3
|
+
from harness.llm.auth import (
|
|
4
|
+
AnthropicClaudeCodeOAuthClient,
|
|
5
|
+
AuthFileOAuthProvider,
|
|
6
|
+
OAuthCredential,
|
|
7
|
+
OpenAICodexOAuthClient,
|
|
8
|
+
)
|
|
9
|
+
from harness.llm.claude_code import ClaudeCodeLLM
|
|
10
|
+
from harness.llm.openai_codex import OpenAICodexLLM
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AnthropicClaudeCodeOAuthClient",
|
|
14
|
+
"AuthFileOAuthProvider",
|
|
15
|
+
"ClaudeCodeLLM",
|
|
16
|
+
"OAuthCredential",
|
|
17
|
+
"OpenAICodexLLM",
|
|
18
|
+
"OpenAICodexOAuthClient",
|
|
19
|
+
]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Shared SSE helpers for streaming-capable LLM adapters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import AsyncGenerator
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def aiter_sse_events(response: Any) -> AsyncGenerator[tuple[str, str], None]:
|
|
10
|
+
"""Yield (event_type, data) pairs from an SSE response.
|
|
11
|
+
|
|
12
|
+
Parses the standard `event:` / `data:` line format. Blank lines
|
|
13
|
+
terminate events. The default event type for unlabelled events is
|
|
14
|
+
`"message"`. Trailing buffered data (no terminating blank line) is
|
|
15
|
+
flushed when the stream ends.
|
|
16
|
+
"""
|
|
17
|
+
current_event = "message"
|
|
18
|
+
data_lines: list[str] = []
|
|
19
|
+
async for raw_line in response.aiter_lines():
|
|
20
|
+
line = raw_line.rstrip("\r")
|
|
21
|
+
if not line:
|
|
22
|
+
if data_lines:
|
|
23
|
+
yield current_event, "\n".join(data_lines)
|
|
24
|
+
current_event = "message"
|
|
25
|
+
data_lines = []
|
|
26
|
+
continue
|
|
27
|
+
if line.startswith("event:"):
|
|
28
|
+
current_event = line[len("event:") :].strip()
|
|
29
|
+
elif line.startswith("data:"):
|
|
30
|
+
data_lines.append(line[len("data:") :].strip())
|
|
31
|
+
if data_lines:
|
|
32
|
+
yield current_event, "\n".join(data_lines)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def read_error_body(response: Any) -> bytes:
|
|
36
|
+
"""Drain the body of an error response, returning at most 4 KiB."""
|
|
37
|
+
out: list[bytes] = []
|
|
38
|
+
total = 0
|
|
39
|
+
async for chunk in response.aiter_bytes():
|
|
40
|
+
if total >= 4096:
|
|
41
|
+
break
|
|
42
|
+
out.append(chunk)
|
|
43
|
+
total += len(chunk)
|
|
44
|
+
return b"".join(out)[:4096]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def format_streaming_error(status_code: int, body: bytes, *, provider: str) -> str:
|
|
48
|
+
"""Build a user-facing error message from an error response body.
|
|
49
|
+
|
|
50
|
+
Truncates aggressively because error bodies sometimes echo request
|
|
51
|
+
payloads — we don't want bearer tokens or full prompts in tracebacks.
|
|
52
|
+
"""
|
|
53
|
+
text = body.decode(errors="replace").strip()
|
|
54
|
+
if not text:
|
|
55
|
+
return f"{provider} backend returned HTTP {status_code}"
|
|
56
|
+
return f"{provider} backend returned {status_code}: {text[:500]}"
|