react-agent-harness 0.3.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.3.1/react_agent_harness.egg-info → react_agent_harness-0.4.0}/PKG-INFO +1 -1
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/README.md +157 -14
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/agents/base.py +27 -23
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/cli.py +40 -0
- react_agent_harness-0.4.0/harness/console.py +166 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/hitl.py +38 -6
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/runtime.py +37 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/steering.py +29 -16
- react_agent_harness-0.4.0/harness/tool_policy.py +183 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/orchestrator/planner.py +147 -9
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/pyproject.toml +1 -1
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/SOURCES.txt +6 -1
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_cli.py +29 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_mcp_adapter.py +45 -26
- react_agent_harness-0.4.0/tests/test_mcp_auth.py +104 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_orchestrator.py +250 -4
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_steering.py +6 -21
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_streaming.py +64 -23
- react_agent_harness-0.4.0/tests/test_tool_policy.py +89 -0
- react_agent_harness-0.4.0/tools/mcp/__init__.py +14 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/mcp/adapter.py +36 -31
- react_agent_harness-0.4.0/tools/mcp/auth.py +129 -0
- react_agent_harness-0.3.1/tools/mcp/__init__.py +0 -4
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/LICENSE +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/agents/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/annotation.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/checkpoint.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/events.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/_streaming.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/auth.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/claude_code.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/openai.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/openai_codex.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/otel.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/utils.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/manager.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/redis_store.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/stores.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/working.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/orchestrator/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/requires.txt +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/setup.cfg +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_checkpoint_resume.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_claude_code_llm.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_llm_auth.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_memory.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_openai_codex_llm.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_openai_llm.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_otel.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_utils.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_vision.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_working_memory.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/http_fetch.py +0 -0
|
@@ -38,6 +38,8 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
|
|
|
38
38
|
harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
|
|
39
39
|
harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
|
|
40
40
|
harness/hitl.py HITL approval gate — interactive CLI, session-allow list
|
|
41
|
+
harness/tool_policy.py Persistent tool policy — user-scoped allow rules, CLI management
|
|
42
|
+
harness/console.py ConsoleRenderer — centralised BusEvent formatting for CLI apps
|
|
41
43
|
harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
|
|
42
44
|
harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
|
|
43
45
|
harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
|
|
@@ -74,6 +76,7 @@ explicit control.
|
|
|
74
76
|
| `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
|
|
75
77
|
| `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
|
|
76
78
|
| `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
|
|
79
|
+
| `examples/mcp_auth_demo.py` | Connects to an authenticated remote MCP server using bearer or auth-file credentials. | `OPENAI_API_KEY`, `[openai,mcp]`, `MCP_URL`, `MCP_BEARER_TOKEN` or `MCP_AUTH_PROVIDER` |
|
|
77
80
|
| `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
|
|
78
81
|
|
|
79
82
|
## Adding a new domain (3 steps)
|
|
@@ -337,22 +340,96 @@ async for event in runtime.run_stream("investigate GPU spike on worker-07"):
|
|
|
337
340
|
print(event.payload["answer"])
|
|
338
341
|
```
|
|
339
342
|
|
|
343
|
+
### 4. Pre-built — `run_with_plan` / `run_with_plan_stream`
|
|
344
|
+
|
|
345
|
+
Supply a hand-written `Plan` and bypass the LLM planner entirely. Use
|
|
346
|
+
this for deterministic, repeatable workflows where the decomposition is
|
|
347
|
+
known upfront — CI pipelines, ETL jobs, scheduled tasks. The plan is
|
|
348
|
+
validated against registered agents before execution; everything
|
|
349
|
+
downstream (parallel batches, replan-on-failure, synthesis, memory
|
|
350
|
+
writes, steering) is identical to `run_stream`.
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
from orchestrator.planner import Plan, Task
|
|
354
|
+
|
|
355
|
+
plan = Plan([
|
|
356
|
+
Task("t1", "analyst", "Analyse error logs from the last hour"),
|
|
357
|
+
Task("t2", "reporter", "Write an incident summary", depends_on=["t1"]),
|
|
358
|
+
])
|
|
359
|
+
|
|
360
|
+
# streaming
|
|
361
|
+
async for event in runtime.run_with_plan_stream(plan, goal="Incident report"):
|
|
362
|
+
if event.type == EventType.DONE:
|
|
363
|
+
print(event.payload["answer"])
|
|
364
|
+
|
|
365
|
+
# blocking
|
|
366
|
+
result = await runtime.run_with_plan(plan, goal="Incident report")
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
The `goal` string is passed to the synthesiser and used for memory
|
|
370
|
+
context injection into agents — even though the plan shape is fixed, the
|
|
371
|
+
agents themselves still read from memory.
|
|
372
|
+
|
|
373
|
+
If a task fails mid-run and `on_failure="replan"`, the replan call does
|
|
374
|
+
go to the LLM — the bypass is for the *initial* plan only.
|
|
375
|
+
|
|
376
|
+
---
|
|
377
|
+
|
|
340
378
|
Event types by path:
|
|
341
379
|
|
|
342
|
-
| Event | Dispatch | Routed | Direct | Orchestrated |
|
|
343
|
-
|
|
344
|
-
| `DISPATCH` | ✓ | — | — | — |
|
|
345
|
-
| `ROUTE` | ✓ (simple) | ✓ | — | — |
|
|
346
|
-
| `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ |
|
|
347
|
-
| `TASK_DONE` | ✓ | ✓ | ✓ | ✓ |
|
|
348
|
-
| `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ |
|
|
349
|
-
| `ERROR` | ✓ | ✓ | ✓ | ✓ |
|
|
380
|
+
| Event | Dispatch | Routed | Direct | Orchestrated | Pre-built |
|
|
381
|
+
|---|---|---|---|---|---|
|
|
382
|
+
| `DISPATCH` | ✓ | — | — | — | — |
|
|
383
|
+
| `ROUTE` | ✓ (simple) | ✓ | — | — | — |
|
|
384
|
+
| `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
385
|
+
| `TASK_DONE` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
386
|
+
| `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ | ✓ |
|
|
387
|
+
| `ERROR` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
350
388
|
|
|
351
389
|
`TOKEN` events fire only when your LLM client exposes
|
|
352
390
|
`async def stream_complete(system, messages) -> AsyncGenerator[str, None]`.
|
|
353
391
|
Non-streaming clients still work — they emit the full response in one
|
|
354
392
|
`THOUGHT` event per step.
|
|
355
393
|
|
|
394
|
+
## Console rendering
|
|
395
|
+
|
|
396
|
+
`ConsoleRenderer` handles all `BusEvent` types with consistent label
|
|
397
|
+
and truncation formatting so event-loop boilerplate stays out of your
|
|
398
|
+
scripts.
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
from harness.console import ConsoleRenderer, trunc
|
|
402
|
+
|
|
403
|
+
renderer = ConsoleRenderer(
|
|
404
|
+
truncate=140, # max chars for long text fields
|
|
405
|
+
sep_char="─", # separator character
|
|
406
|
+
sep_width=72, # separator width
|
|
407
|
+
agent_label_width=16, # width of [agent_id] column
|
|
408
|
+
show_tokens=False, # True to print TOKEN events inline
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
async for event in runtime.dispatch_stream(goal):
|
|
412
|
+
renderer.render(event) # handles every EventType
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
For events with custom section headers (e.g. a "PROJECT HEALTH REPORT"
|
|
416
|
+
block), handle that event yourself and skip `render` for it — the
|
|
417
|
+
renderer is additive:
|
|
418
|
+
|
|
419
|
+
```python
|
|
420
|
+
async for event in runtime.run_stream(goal):
|
|
421
|
+
if event.type == EventType.DONE:
|
|
422
|
+
renderer.sep("═")
|
|
423
|
+
print("MY CUSTOM HEADER")
|
|
424
|
+
renderer.sep("═")
|
|
425
|
+
print(event.payload["answer"])
|
|
426
|
+
else:
|
|
427
|
+
renderer.render(event)
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
`trunc(s, n)` is exported for standalone use when you need to truncate
|
|
431
|
+
a string to `n` characters with a trailing `…`.
|
|
432
|
+
|
|
356
433
|
## Working memory budget
|
|
357
434
|
|
|
358
435
|
`AgentConfig.working_memory_max_tokens` controls per-agent eviction (default
|
|
@@ -518,7 +595,48 @@ async with MCPServerConnection(params, server_name="filesystem") as conn:
|
|
|
518
595
|
Supports **stdio** and **SSE** transports. The `MCPServerConnection` context
|
|
519
596
|
manager handles the full lifecycle — connect, discover, and cleanup.
|
|
520
597
|
|
|
521
|
-
|
|
598
|
+
Remote MCP servers can receive static headers or bearer tokens through an auth
|
|
599
|
+
provider:
|
|
600
|
+
|
|
601
|
+
```python
|
|
602
|
+
import os
|
|
603
|
+
from tools.mcp import MCPServerConnection, StaticMCPAuth
|
|
604
|
+
|
|
605
|
+
auth = StaticMCPAuth(
|
|
606
|
+
headers={
|
|
607
|
+
"DD_API_KEY": os.environ["DD_API_KEY"],
|
|
608
|
+
"DD_APPLICATION_KEY": os.environ["DD_APPLICATION_KEY"],
|
|
609
|
+
}
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
async with MCPServerConnection(
|
|
613
|
+
{"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
|
|
614
|
+
server_name="datadog",
|
|
615
|
+
auth=auth,
|
|
616
|
+
) as conn:
|
|
617
|
+
conn.register_tools(tool_registry)
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
OAuth-style auth files can be reused for MCP bearer auth:
|
|
621
|
+
|
|
622
|
+
```python
|
|
623
|
+
from tools.mcp import MCPServerConnection, OAuthMCPAuth
|
|
624
|
+
|
|
625
|
+
auth = OAuthMCPAuth.from_auth_file(
|
|
626
|
+
"~/.agent-harness/auth/auth.json",
|
|
627
|
+
provider="datadog-mcp",
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
async with MCPServerConnection(
|
|
631
|
+
{"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
|
|
632
|
+
server_name="datadog",
|
|
633
|
+
auth=auth,
|
|
634
|
+
) as conn:
|
|
635
|
+
conn.register_tools(tool_registry)
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
See `examples/mcp_demo.py` for local stdio MCP and `examples/mcp_auth_demo.py`
|
|
639
|
+
for authenticated remote MCP.
|
|
522
640
|
|
|
523
641
|
## OpenTelemetry Tracing
|
|
524
642
|
|
|
@@ -699,10 +817,10 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
|
699
817
|
Run: 3f7a1b2c-...:file_agent
|
|
700
818
|
ID: a1b2-c3d4
|
|
701
819
|
────────────────────────────────────────────────────────────
|
|
702
|
-
y = approve once | a = allow 'delete_file' for session | n = reject | <text> = steer
|
|
820
|
+
y = approve once | a = allow 'delete_file' for session | A = always allow 'delete_file' | n = reject | <text> = steer
|
|
703
821
|
Ctrl-C to pause. Resume: python my_script.py --resume 3f7a1b2c-...:file_agent
|
|
704
822
|
────────────────────────────────────────────────────────────
|
|
705
|
-
Approve? [y/n/a/correction]:
|
|
823
|
+
Approve? [y/n/a/A/correction]:
|
|
706
824
|
```
|
|
707
825
|
|
|
708
826
|
**Prompt semantics:**
|
|
@@ -712,11 +830,19 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
|
712
830
|
| `y` / `yes` | Tool runs once |
|
|
713
831
|
| `n` / `no` | Tool skipped; agent sees a rejection observation |
|
|
714
832
|
| `a` / `allow` | Tool runs **and** added to session allow-list; no further prompts for this tool (or command prefix for shell-like tools) |
|
|
833
|
+
| `A` / `always` | Tool runs **and** a user-scoped allow rule is stored in `~/.agent-harness/policies/tool_policy.json` |
|
|
715
834
|
| any other text | Correction: tool skipped, text injected into `WorkingMemory` as a user message; LLM self-corrects on the next step |
|
|
716
835
|
|
|
717
|
-
For shell-like tools (`shell`, `bash`, `run`, `exec`), `a`
|
|
718
|
-
word** of the command — e.g.
|
|
719
|
-
|
|
836
|
+
For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` and `A` allow the
|
|
837
|
+
**first word** of the command — e.g. approving `shell git commit ...` allows
|
|
838
|
+
all `git` commands in that scope but still prompts for `shell rm ...`.
|
|
839
|
+
Persistent rules are user-local, not repo files. Manage them with:
|
|
840
|
+
|
|
841
|
+
```bash
|
|
842
|
+
agent-harness policy list
|
|
843
|
+
agent-harness policy revoke <rule-id>
|
|
844
|
+
agent-harness policy clear
|
|
845
|
+
```
|
|
720
846
|
|
|
721
847
|
**Wall-time budget** is suspended while waiting for input — human think-time
|
|
722
848
|
does not count against `max_wall_time_seconds`.
|
|
@@ -909,3 +1035,20 @@ key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths
|
|
|
909
1035
|
|
|
910
1036
|
See `examples/complex_sysaudit_demo.py` for stdin steering across three
|
|
911
1037
|
agents alongside HITL on the shell tool.
|
|
1038
|
+
|
|
1039
|
+
## AgentConfig reference
|
|
1040
|
+
|
|
1041
|
+
| Field | Default | Description |
|
|
1042
|
+
|---|---|---|
|
|
1043
|
+
| `agent_id` | required | Unique identifier for the agent |
|
|
1044
|
+
| `role` | required | Plain-English description used by the planner for agent selection |
|
|
1045
|
+
| `system_prompt` | required | Base system prompt for the agent |
|
|
1046
|
+
| `allowed_tools` | required | Tool names the agent may call |
|
|
1047
|
+
| `max_steps` | `10` | Maximum ReAct iterations before the run is terminated |
|
|
1048
|
+
| `max_wall_time_seconds` | (guardrail) | See `GuardrailConfig` |
|
|
1049
|
+
| `memory_context_enabled` | `True` | Prepend relevant long-term memory to the system prompt |
|
|
1050
|
+
| `confidence_from_llm` | `True` | Use the `confidence` field from the LLM response; set `False` to always return `1.0` |
|
|
1051
|
+
| `working_memory_max_tokens` | `8000` | Token budget for in-context working memory before rolling summarisation kicks in |
|
|
1052
|
+
| `hitl_tools` | `[]` | Tool names that require human approval before execution |
|
|
1053
|
+
| `checkpoint_every` | `0` | Write a crash-resumable checkpoint every N steps; `0` disables periodic checkpoints |
|
|
1054
|
+
| `stream_tokens` | `False` | Emit `TOKEN` events as the LLM streams. Disabled by default — enable if you want to render partial output in real time: `AgentConfig(..., stream_tokens=True)` |
|
|
@@ -61,6 +61,7 @@ class AgentConfig:
|
|
|
61
61
|
max_steps: int = 10
|
|
62
62
|
memory_context_enabled: bool = True
|
|
63
63
|
confidence_from_llm: bool = True # if False, confidence=1.0 on success
|
|
64
|
+
stream_tokens: bool = False # if True, TOKEN events are emitted as the LLM streams
|
|
64
65
|
working_memory_max_tokens: int = 8000 # WorkingMemory eviction threshold; tune per agent
|
|
65
66
|
hitl_tools: list[str] = None # tools requiring human approval; None = no HITL
|
|
66
67
|
checkpoint_every: int = 0 # write a resumable checkpoint every N steps; 0 = disabled
|
|
@@ -649,11 +650,12 @@ class BaseAgent:
|
|
|
649
650
|
messages=messages,
|
|
650
651
|
):
|
|
651
652
|
accumulated += token
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
653
|
+
if self.config.stream_tokens:
|
|
654
|
+
yield BusEvent(
|
|
655
|
+
type=EventType.TOKEN,
|
|
656
|
+
agent_id=self.config.agent_id,
|
|
657
|
+
token=token,
|
|
658
|
+
)
|
|
657
659
|
response = _parse_action_json(accumulated)
|
|
658
660
|
if response is None:
|
|
659
661
|
logger.warning(
|
|
@@ -736,10 +738,10 @@ class BaseAgent:
|
|
|
736
738
|
if not (self._checkpoint_store and tool_name in self.config.hitl_tools):
|
|
737
739
|
return None
|
|
738
740
|
|
|
739
|
-
from harness.hitl import ApprovalRequest,
|
|
741
|
+
from harness.hitl import ApprovalRequest, is_allowed, request_approval
|
|
740
742
|
|
|
741
|
-
if
|
|
742
|
-
return None # fast-path: human already allowed this tool/prefix
|
|
743
|
+
if is_allowed(tool_name, tool_args):
|
|
744
|
+
return None # fast-path: human already allowed this tool/prefix
|
|
743
745
|
|
|
744
746
|
approval_id = str(uuid.uuid4())
|
|
745
747
|
await self._checkpoint_store.write(
|
|
@@ -840,33 +842,35 @@ class BaseAgent:
|
|
|
840
842
|
pending: dict,
|
|
841
843
|
) -> AsyncGenerator[BusEvent, None]:
|
|
842
844
|
"""Re-prompt approval for a step interrupted by a crash, then complete it."""
|
|
843
|
-
from harness.hitl import ApprovalRequest, request_approval
|
|
845
|
+
from harness.hitl import ApprovalRequest, is_allowed, request_approval
|
|
844
846
|
|
|
845
847
|
tool_name = pending["tool"]
|
|
846
848
|
tool_args = pending["args"]
|
|
847
849
|
step = pending["step"]
|
|
848
850
|
llm_response = pending["llm_response"]
|
|
849
851
|
|
|
850
|
-
approval =
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
852
|
+
approval = None
|
|
853
|
+
if not is_allowed(tool_name, tool_args):
|
|
854
|
+
approval = await request_approval(
|
|
855
|
+
ApprovalRequest(
|
|
856
|
+
approval_id=pending["approval_id"],
|
|
857
|
+
run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
|
|
858
|
+
agent_id=self.config.agent_id,
|
|
859
|
+
tool=tool_name,
|
|
860
|
+
args=tool_args,
|
|
861
|
+
step=step,
|
|
862
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
863
|
+
),
|
|
864
|
+
self._guard,
|
|
865
|
+
)
|
|
862
866
|
|
|
863
|
-
if approval.correction:
|
|
867
|
+
if approval is not None and approval.correction:
|
|
864
868
|
await self._inject_human_guidance(llm_response, approval.correction, run_id, step)
|
|
865
869
|
return
|
|
866
870
|
|
|
867
871
|
observation = (
|
|
868
872
|
await self._execute_tool(tool_name, tool_args)
|
|
869
|
-
if approval.approved
|
|
873
|
+
if approval is None or approval.approved
|
|
870
874
|
else f"Tool rejected by human: {approval.correction or 'no reason given'}"
|
|
871
875
|
)
|
|
872
876
|
obs_display = "[image]" if _is_image_block(observation) else str(observation)[:500]
|
|
@@ -14,6 +14,7 @@ from harness.llm.auth import (
|
|
|
14
14
|
OpenAICodexOAuthClient,
|
|
15
15
|
default_auth_file,
|
|
16
16
|
)
|
|
17
|
+
from harness.tool_policy import ToolPolicyStore, default_policy_file
|
|
17
18
|
|
|
18
19
|
PROVIDERS = ["openai-codex", "claude-code"]
|
|
19
20
|
|
|
@@ -35,6 +36,16 @@ def main() -> int:
|
|
|
35
36
|
logout_cmd.add_argument("provider", choices=PROVIDERS)
|
|
36
37
|
logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
|
|
37
38
|
|
|
39
|
+
policy = sub.add_parser("policy", help="manage persistent tool policy")
|
|
40
|
+
policy_sub = policy.add_subparsers(dest="policy_command", required=True)
|
|
41
|
+
policy_list = policy_sub.add_parser("list", help="list persistent policy rules")
|
|
42
|
+
policy_list.add_argument("--policy-file", default=str(default_policy_file()))
|
|
43
|
+
policy_revoke = policy_sub.add_parser("revoke", help="remove one policy rule")
|
|
44
|
+
policy_revoke.add_argument("rule_id")
|
|
45
|
+
policy_revoke.add_argument("--policy-file", default=str(default_policy_file()))
|
|
46
|
+
policy_clear = policy_sub.add_parser("clear", help="remove all policy rules")
|
|
47
|
+
policy_clear.add_argument("--policy-file", default=str(default_policy_file()))
|
|
48
|
+
|
|
38
49
|
args = parser.parse_args()
|
|
39
50
|
try:
|
|
40
51
|
if args.command == "login":
|
|
@@ -52,6 +63,14 @@ def main() -> int:
|
|
|
52
63
|
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
|
|
53
64
|
if args.provider == "claude-code":
|
|
54
65
|
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
|
|
66
|
+
if args.command == "policy":
|
|
67
|
+
path = Path(args.policy_file).expanduser()
|
|
68
|
+
if args.policy_command == "list":
|
|
69
|
+
return _policy_list(path)
|
|
70
|
+
if args.policy_command == "revoke":
|
|
71
|
+
return _policy_revoke(path, args.rule_id)
|
|
72
|
+
if args.policy_command == "clear":
|
|
73
|
+
return _policy_clear(path)
|
|
55
74
|
except Exception as e:
|
|
56
75
|
print(f"agent-harness: {e}", file=sys.stderr)
|
|
57
76
|
return 1
|
|
@@ -133,5 +152,26 @@ def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
|
|
|
133
152
|
provider._write_credential(cred)
|
|
134
153
|
|
|
135
154
|
|
|
155
|
+
def _policy_list(path: Path) -> int:
|
|
156
|
+
store = ToolPolicyStore(path)
|
|
157
|
+
rules = [rule.to_dict() for rule in store.list_rules()]
|
|
158
|
+
print(json.dumps({"policy_file": str(path), "rules": rules}, indent=2))
|
|
159
|
+
return 0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _policy_revoke(path: Path, rule_id: str) -> int:
|
|
163
|
+
if not ToolPolicyStore(path).revoke(rule_id):
|
|
164
|
+
print(f"Policy rule not found: {rule_id}", file=sys.stderr)
|
|
165
|
+
return 1
|
|
166
|
+
print(f"Removed policy rule: {rule_id}")
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _policy_clear(path: Path) -> int:
|
|
171
|
+
count = ToolPolicyStore(path).clear()
|
|
172
|
+
print(f"Removed {count} policy rule(s)")
|
|
173
|
+
return 0
|
|
174
|
+
|
|
175
|
+
|
|
136
176
|
if __name__ == "__main__":
|
|
137
177
|
raise SystemExit(main())
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Standard console renderer for BusEvent streams."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from typing import TextIO
|
|
8
|
+
|
|
9
|
+
from harness.events import BusEvent, EventType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def trunc(s: str, n: int) -> str:
|
|
13
|
+
"""Truncate *s* to *n* characters, appending '…' when clipped."""
|
|
14
|
+
return s if len(s) <= n else s[:n] + "…"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConsoleRenderer:
|
|
18
|
+
"""Renders BusEvent objects to a text stream.
|
|
19
|
+
|
|
20
|
+
Centralises all event-type formatting so callers don't duplicate
|
|
21
|
+
THOUGHT/ACTION/OBSERVATION/... blocks and separator/truncation helpers.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
truncate: Max characters for long text fields.
|
|
25
|
+
sep_char: Character used for separator lines.
|
|
26
|
+
sep_width: Width of separator lines.
|
|
27
|
+
agent_label_width: Width of the ``[agent_id]`` label column.
|
|
28
|
+
show_tokens: If True, TOKEN events are printed inline.
|
|
29
|
+
out: Output stream (defaults to ``sys.stdout``).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
*,
|
|
35
|
+
truncate: int = 140,
|
|
36
|
+
sep_char: str = "─",
|
|
37
|
+
sep_width: int = 72,
|
|
38
|
+
agent_label_width: int = 16,
|
|
39
|
+
show_tokens: bool = False,
|
|
40
|
+
out: TextIO | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self._truncate = truncate
|
|
43
|
+
self._sep_char = sep_char
|
|
44
|
+
self._sep_width = sep_width
|
|
45
|
+
self._label_w = agent_label_width
|
|
46
|
+
self._show_tokens = show_tokens
|
|
47
|
+
self._out = out or sys.stdout
|
|
48
|
+
self._in_token_stream = False
|
|
49
|
+
|
|
50
|
+
# ── public helpers ────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
def sep(self, char: str | None = None, w: int | None = None) -> None:
|
|
53
|
+
"""Print a separator line."""
|
|
54
|
+
print((char or self._sep_char) * (w or self._sep_width), file=self._out)
|
|
55
|
+
|
|
56
|
+
def render(self, event: BusEvent) -> None:
|
|
57
|
+
"""Print formatted output for one BusEvent."""
|
|
58
|
+
if event.type == EventType.TOKEN:
|
|
59
|
+
if self._show_tokens:
|
|
60
|
+
if not self._in_token_stream:
|
|
61
|
+
self._in_token_stream = True
|
|
62
|
+
self._out.write(event.token)
|
|
63
|
+
self._out.flush()
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
# Close any in-progress token stream before the next event line.
|
|
67
|
+
if self._in_token_stream:
|
|
68
|
+
self._out.write("\n")
|
|
69
|
+
self._out.flush()
|
|
70
|
+
self._in_token_stream = False
|
|
71
|
+
|
|
72
|
+
t = event.type
|
|
73
|
+
p = event.payload
|
|
74
|
+
|
|
75
|
+
if t == EventType.DISPATCH:
|
|
76
|
+
print(
|
|
77
|
+
f"\n[dispatch] complexity={p.get('complexity')} path={p.get('path')}",
|
|
78
|
+
file=self._out,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
elif t == EventType.ROUTE:
|
|
82
|
+
print(
|
|
83
|
+
f"[route] → {p.get('agent_id')}: {trunc(p.get('rationale', ''), 90)}",
|
|
84
|
+
file=self._out,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
elif t == EventType.PLAN:
|
|
88
|
+
tasks = p.get("plan", {}).get("tasks", [])
|
|
89
|
+
print(f"\n[plan] {len(tasks)} tasks", file=self._out)
|
|
90
|
+
for task in tasks:
|
|
91
|
+
deps = f" ← {task['depends_on']}" if task.get("depends_on") else ""
|
|
92
|
+
print(
|
|
93
|
+
f" {task['id']}@{task['agent_id']}: "
|
|
94
|
+
f"{trunc(task.get('instruction', ''), 70)}{deps}",
|
|
95
|
+
file=self._out,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
elif t == EventType.THOUGHT:
|
|
99
|
+
thought = p.get("thought", "")
|
|
100
|
+
if thought:
|
|
101
|
+
print(
|
|
102
|
+
f"{self._label(event)} think {trunc(thought, 110)}",
|
|
103
|
+
file=self._out,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
elif t == EventType.ACTION:
|
|
107
|
+
args = json.dumps(p.get("args", {}), default=str)
|
|
108
|
+
print(
|
|
109
|
+
f"{self._label(event)} action {p.get('tool')}({trunc(args, 90)})",
|
|
110
|
+
file=self._out,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
elif t == EventType.OBSERVATION:
|
|
114
|
+
obs = p.get("observation", "")
|
|
115
|
+
print(
|
|
116
|
+
f"{self._label(event)} obs {trunc(obs, 110)}",
|
|
117
|
+
file=self._out,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
elif t == EventType.HUMAN_GUIDANCE:
|
|
121
|
+
print(
|
|
122
|
+
f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
|
|
123
|
+
file=self._out,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
elif t == EventType.TASK_DONE:
|
|
127
|
+
print(
|
|
128
|
+
f"{self._label(event)} ✓ done "
|
|
129
|
+
f"confidence={p.get('confidence', 0):.2f} steps={p.get('steps', '?')}",
|
|
130
|
+
file=self._out,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
elif t == EventType.REPLAN:
|
|
134
|
+
print(
|
|
135
|
+
f"\n[replan] #{p.get('replan_count')} — trigger={p.get('trigger_task', '?')}",
|
|
136
|
+
file=self._out,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
elif t == EventType.SYNTHESIS:
|
|
140
|
+
print(
|
|
141
|
+
f"\n[synthesis] confidence={p.get('confidence', 0):.2f}",
|
|
142
|
+
file=self._out,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
elif t == EventType.DONE:
|
|
146
|
+
print(file=self._out)
|
|
147
|
+
self.sep("═")
|
|
148
|
+
print(p.get("answer", "(no answer)"), file=self._out)
|
|
149
|
+
self.sep()
|
|
150
|
+
print(
|
|
151
|
+
f"Confidence: {p.get('confidence', 0):.2f} | "
|
|
152
|
+
f"Replans: {p.get('replan_count', 0)} | "
|
|
153
|
+
f"Cost: ${p.get('cost_usd', 0):.4f} | "
|
|
154
|
+
f"Time: {p.get('elapsed_seconds', 0):.1f}s",
|
|
155
|
+
file=self._out,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
elif t == EventType.ERROR:
|
|
159
|
+
print(f"\n[error] {event.error}", file=sys.stderr)
|
|
160
|
+
|
|
161
|
+
# ── private helpers ───────────────────────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
def _label(self, event: BusEvent) -> str:
|
|
164
|
+
if event.agent_id:
|
|
165
|
+
return f"[{event.agent_id:<{self._label_w}}]"
|
|
166
|
+
return f"[{event.type.value:<{self._label_w}}]"
|
|
@@ -8,7 +8,7 @@ Same-session flow:
|
|
|
8
8
|
2. A checkpoint is written to the CheckpointStore (step + WorkingMemory +
|
|
9
9
|
pending tool). BudgetGuard clock suspends.
|
|
10
10
|
3. Approval banner is printed to the terminal.
|
|
11
|
-
4. Human types y / n / a / <correction> in the terminal.
|
|
11
|
+
4. Human types y / n / a / A / <correction> in the terminal.
|
|
12
12
|
5. Guard resumes; agent continues (or injects correction and skips the tool).
|
|
13
13
|
|
|
14
14
|
Crash / Ctrl-C / kill flow:
|
|
@@ -22,7 +22,7 @@ Crash / Ctrl-C / kill flow:
|
|
|
22
22
|
The UUID printed at the prompt is an audit reference only.
|
|
23
23
|
|
|
24
24
|
Correction steering:
|
|
25
|
-
Any text that isn't y/yes/a/allow/n/no is treated as a correction.
|
|
25
|
+
Any text that isn't y/yes/a/allow/A/always/n/no is treated as a correction.
|
|
26
26
|
The gated tool is skipped and the text is injected into WorkingMemory
|
|
27
27
|
as a user message, so the LLM sees it on the next think step.
|
|
28
28
|
|
|
@@ -32,6 +32,11 @@ Session allow:
|
|
|
32
32
|
first word of the command arg (e.g. 'git'), so allowing 'git' doesn't also
|
|
33
33
|
allow 'rm'. Subsequent calls matching the key skip checkpoint + banner.
|
|
34
34
|
Use is_session_allowed(tool, args) to query the list from outside.
|
|
35
|
+
|
|
36
|
+
Persistent allow:
|
|
37
|
+
Typing A or always approves the current call and writes a user-scoped
|
|
38
|
+
allow rule to ~/.agent-harness/policies/tool_policy.json. Rules are narrow:
|
|
39
|
+
shell-like tools are scoped by first command word, other tools by tool name.
|
|
35
40
|
"""
|
|
36
41
|
|
|
37
42
|
from __future__ import annotations
|
|
@@ -73,6 +78,18 @@ def is_session_allowed(tool: str, args: dict) -> bool:
|
|
|
73
78
|
return _session_key(tool, args) in _session_allowed
|
|
74
79
|
|
|
75
80
|
|
|
81
|
+
def is_persistently_allowed(tool: str, args: dict) -> bool:
|
|
82
|
+
"""True if this tool+args combination is allowed by the user policy file."""
|
|
83
|
+
from harness.tool_policy import ToolPolicyStore
|
|
84
|
+
|
|
85
|
+
return ToolPolicyStore().is_allowed(tool, args)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_allowed(tool: str, args: dict) -> bool:
|
|
89
|
+
"""True if this tool+args combination is session- or user-policy allowed."""
|
|
90
|
+
return is_session_allowed(tool, args) or is_persistently_allowed(tool, args)
|
|
91
|
+
|
|
92
|
+
|
|
76
93
|
def _session_label(tool: str, args: dict) -> str:
|
|
77
94
|
"""Human-readable description of what 'a' would allow."""
|
|
78
95
|
_, prefix = _session_key(tool, args)
|
|
@@ -122,6 +139,7 @@ class ApprovalResponse:
|
|
|
122
139
|
approved: bool
|
|
123
140
|
correction: str | None = None # non-None → steering; tool is skipped
|
|
124
141
|
session_allow: bool = False # True → add (tool, prefix) to _session_allowed
|
|
142
|
+
persistent_allow: bool = False # True → write a user-scoped allow rule
|
|
125
143
|
|
|
126
144
|
|
|
127
145
|
# ── CLI gate ──────────────────────────────────────────────────────────────────
|
|
@@ -140,21 +158,29 @@ def _print_banner(req: ApprovalRequest) -> None:
|
|
|
140
158
|
print(f" ID: {req.approval_id}")
|
|
141
159
|
print(_SEP)
|
|
142
160
|
print(
|
|
143
|
-
|
|
161
|
+
" y = approve once | "
|
|
162
|
+
f"a = allow '{label}' for session | "
|
|
163
|
+
f"A = always allow '{label}' | "
|
|
164
|
+
"n = reject | <text> = steer"
|
|
144
165
|
)
|
|
145
166
|
print(f" Ctrl-C to pause. Resume: python {script} --resume {req.run_id}")
|
|
146
167
|
print(_SEP)
|
|
147
168
|
|
|
148
169
|
|
|
149
170
|
def _parse_stdin(approval_id: str, raw: str) -> ApprovalResponse:
|
|
150
|
-
|
|
171
|
+
stripped = raw.strip()
|
|
172
|
+
if stripped == "A":
|
|
173
|
+
return ApprovalResponse(approval_id=approval_id, approved=True, persistent_allow=True)
|
|
174
|
+
lo = stripped.lower()
|
|
151
175
|
if lo in ("y", "yes"):
|
|
152
176
|
return ApprovalResponse(approval_id=approval_id, approved=True)
|
|
153
177
|
if lo in ("a", "allow"):
|
|
154
178
|
return ApprovalResponse(approval_id=approval_id, approved=True, session_allow=True)
|
|
179
|
+
if lo in ("always", "allow always"):
|
|
180
|
+
return ApprovalResponse(approval_id=approval_id, approved=True, persistent_allow=True)
|
|
155
181
|
if lo in ("n", "no"):
|
|
156
182
|
return ApprovalResponse(approval_id=approval_id, approved=False)
|
|
157
|
-
return ApprovalResponse(approval_id=approval_id, approved=True, correction=
|
|
183
|
+
return ApprovalResponse(approval_id=approval_id, approved=True, correction=stripped or None)
|
|
158
184
|
|
|
159
185
|
|
|
160
186
|
async def request_approval(
|
|
@@ -172,6 +198,7 @@ async def request_approval(
|
|
|
172
198
|
y / yes → approved, tool runs
|
|
173
199
|
n / no → rejected, tool skipped (error observation returned)
|
|
174
200
|
a / allow → approved + session-allow registered; tool runs
|
|
201
|
+
A / always → approved + user policy allow registered; tool runs
|
|
175
202
|
<any text> → correction injected into WorkingMemory; tool skipped
|
|
176
203
|
|
|
177
204
|
Holds stdout_lock for the duration so concurrent agent events don't
|
|
@@ -190,7 +217,7 @@ async def request_approval(
|
|
|
190
217
|
|
|
191
218
|
async with stdout_lock:
|
|
192
219
|
router = get_active_router()
|
|
193
|
-
approve_prompt = " Approve? [y/n/a/correction]: "
|
|
220
|
+
approve_prompt = " Approve? [y/n/a/A/correction]: "
|
|
194
221
|
# If a router is active, reserve the next stdin read BEFORE printing
|
|
195
222
|
# the banner so the user's typed answer routes to HITL (not steering).
|
|
196
223
|
hitl_future: Any = (
|
|
@@ -226,4 +253,9 @@ async def request_approval(
|
|
|
226
253
|
if resp.session_allow:
|
|
227
254
|
_session_allowed.add(_session_key(req.tool, req.args))
|
|
228
255
|
print(f" ✓ '{_session_label(req.tool, req.args)}' allowed for this session\n")
|
|
256
|
+
if resp.persistent_allow:
|
|
257
|
+
from harness.tool_policy import ToolPolicyStore
|
|
258
|
+
|
|
259
|
+
rule = ToolPolicyStore().add_allow_rule(tool=req.tool, args=req.args)
|
|
260
|
+
print(f" ✓ '{_session_label(req.tool, req.args)}' always allowed ({rule.id})\n")
|
|
229
261
|
return resp
|