react-agent-harness 0.3.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.3.2/react_agent_harness.egg-info → react_agent_harness-0.5.0}/PKG-INFO +1 -1
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/README.md +140 -14
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/agents/base.py +56 -18
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/cli.py +40 -0
- react_agent_harness-0.5.0/harness/console.py +197 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/events.py +4 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/hitl.py +38 -6
- react_agent_harness-0.5.0/harness/llm/anthropic.py +242 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/claude_code.py +42 -10
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/runtime.py +37 -0
- react_agent_harness-0.5.0/harness/tool_policy.py +183 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/working.py +17 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/orchestrator/planner.py +147 -9
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/pyproject.toml +1 -1
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt +9 -1
- react_agent_harness-0.5.0/tests/test_anthropic_llm.py +401 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_claude_code_llm.py +115 -2
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_cli.py +29 -0
- react_agent_harness-0.5.0/tests/test_console_renderer.py +52 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_mcp_adapter.py +45 -26
- react_agent_harness-0.5.0/tests/test_mcp_auth.py +104 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_orchestrator.py +250 -4
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_streaming.py +1 -0
- react_agent_harness-0.5.0/tests/test_tool_policy.py +89 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_working_memory.py +15 -0
- react_agent_harness-0.5.0/tools/mcp/__init__.py +14 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/mcp/adapter.py +36 -31
- react_agent_harness-0.5.0/tools/mcp/auth.py +129 -0
- react_agent_harness-0.3.2/tools/mcp/__init__.py +0 -4
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/LICENSE +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/agents/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/annotation.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/checkpoint.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/_streaming.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/auth.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/openai.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/openai_codex.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/otel.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/steering.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/utils.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/manager.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/redis_store.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/stores.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/orchestrator/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/requires.txt +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/setup.cfg +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_checkpoint_resume.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_llm_auth.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_memory.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_openai_codex_llm.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_openai_llm.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_otel.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_steering.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_utils.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_vision.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/http_fetch.py +0 -0
|
@@ -38,6 +38,8 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
|
|
|
38
38
|
harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
|
|
39
39
|
harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
|
|
40
40
|
harness/hitl.py HITL approval gate — interactive CLI, session-allow list
|
|
41
|
+
harness/tool_policy.py Persistent tool policy — user-scoped allow rules, CLI management
|
|
42
|
+
harness/console.py ConsoleRenderer — centralised BusEvent formatting for CLI apps
|
|
41
43
|
harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
|
|
42
44
|
harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
|
|
43
45
|
harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
|
|
@@ -74,6 +76,7 @@ explicit control.
|
|
|
74
76
|
| `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
|
|
75
77
|
| `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
|
|
76
78
|
| `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
|
|
79
|
+
| `examples/mcp_auth_demo.py` | Connects to an authenticated remote MCP server using bearer or auth-file credentials. | `OPENAI_API_KEY`, `[openai,mcp]`, `MCP_URL`, `MCP_BEARER_TOKEN` or `MCP_AUTH_PROVIDER` |
|
|
77
80
|
| `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
|
|
78
81
|
|
|
79
82
|
## Adding a new domain (3 steps)
|
|
@@ -337,22 +340,96 @@ async for event in runtime.run_stream("investigate GPU spike on worker-07"):
|
|
|
337
340
|
print(event.payload["answer"])
|
|
338
341
|
```
|
|
339
342
|
|
|
343
|
+
### 4. Pre-built — `run_with_plan` / `run_with_plan_stream`
|
|
344
|
+
|
|
345
|
+
Supply a hand-written `Plan` and bypass the LLM planner entirely. Use
|
|
346
|
+
this for deterministic, repeatable workflows where the decomposition is
|
|
347
|
+
known upfront — CI pipelines, ETL jobs, scheduled tasks. The plan is
|
|
348
|
+
validated against registered agents before execution; everything
|
|
349
|
+
downstream (parallel batches, replan-on-failure, synthesis, memory
|
|
350
|
+
writes, steering) is identical to `run_stream`.
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
from orchestrator.planner import Plan, Task
|
|
354
|
+
|
|
355
|
+
plan = Plan([
|
|
356
|
+
Task("t1", "analyst", "Analyse error logs from the last hour"),
|
|
357
|
+
Task("t2", "reporter", "Write an incident summary", depends_on=["t1"]),
|
|
358
|
+
])
|
|
359
|
+
|
|
360
|
+
# streaming
|
|
361
|
+
async for event in runtime.run_with_plan_stream(plan, goal="Incident report"):
|
|
362
|
+
if event.type == EventType.DONE:
|
|
363
|
+
print(event.payload["answer"])
|
|
364
|
+
|
|
365
|
+
# blocking
|
|
366
|
+
result = await runtime.run_with_plan(plan, goal="Incident report")
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
The `goal` string is passed to the synthesiser and used for memory
|
|
370
|
+
context injection into agents — even though the plan shape is fixed, the
|
|
371
|
+
agents themselves still read from memory.
|
|
372
|
+
|
|
373
|
+
If a task fails mid-run and `on_failure="replan"`, the replan call does
|
|
374
|
+
go to the LLM — the bypass is for the *initial* plan only.
|
|
375
|
+
|
|
376
|
+
---
|
|
377
|
+
|
|
340
378
|
Event types by path:
|
|
341
379
|
|
|
342
|
-
| Event | Dispatch | Routed | Direct | Orchestrated |
|
|
343
|
-
|
|
344
|
-
| `DISPATCH` | ✓ | — | — | — |
|
|
345
|
-
| `ROUTE` | ✓ (simple) | ✓ | — | — |
|
|
346
|
-
| `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ |
|
|
347
|
-
| `TASK_DONE` | ✓ | ✓ | ✓ | ✓ |
|
|
348
|
-
| `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ |
|
|
349
|
-
| `ERROR` | ✓ | ✓ | ✓ | ✓ |
|
|
380
|
+
| Event | Dispatch | Routed | Direct | Orchestrated | Pre-built |
|
|
381
|
+
|---|---|---|---|---|---|
|
|
382
|
+
| `DISPATCH` | ✓ | — | — | — | — |
|
|
383
|
+
| `ROUTE` | ✓ (simple) | ✓ | — | — | — |
|
|
384
|
+
| `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
385
|
+
| `TASK_DONE` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
386
|
+
| `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ | ✓ |
|
|
387
|
+
| `ERROR` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
350
388
|
|
|
351
389
|
`TOKEN` events fire only when your LLM client exposes
|
|
352
390
|
`async def stream_complete(system, messages) -> AsyncGenerator[str, None]`.
|
|
353
391
|
Non-streaming clients still work — they emit the full response in one
|
|
354
392
|
`THOUGHT` event per step.
|
|
355
393
|
|
|
394
|
+
## Console rendering
|
|
395
|
+
|
|
396
|
+
`ConsoleRenderer` handles all `BusEvent` types with consistent label
|
|
397
|
+
and truncation formatting so event-loop boilerplate stays out of your
|
|
398
|
+
scripts.
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
from harness.console import ConsoleRenderer, trunc
|
|
402
|
+
|
|
403
|
+
renderer = ConsoleRenderer(
|
|
404
|
+
truncate=140, # max chars for long text fields
|
|
405
|
+
sep_char="─", # separator character
|
|
406
|
+
sep_width=72, # separator width
|
|
407
|
+
agent_label_width=16, # width of [agent_id] column
|
|
408
|
+
show_tokens=False, # True to print TOKEN events inline
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
async for event in runtime.dispatch_stream(goal):
|
|
412
|
+
renderer.render(event) # handles every EventType
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
For events with custom section headers (e.g. a "PROJECT HEALTH REPORT"
|
|
416
|
+
block), handle that event yourself and skip `render` for it — the
|
|
417
|
+
renderer is additive:
|
|
418
|
+
|
|
419
|
+
```python
|
|
420
|
+
async for event in runtime.run_stream(goal):
|
|
421
|
+
if event.type == EventType.DONE:
|
|
422
|
+
renderer.sep("═")
|
|
423
|
+
print("MY CUSTOM HEADER")
|
|
424
|
+
renderer.sep("═")
|
|
425
|
+
print(event.payload["answer"])
|
|
426
|
+
else:
|
|
427
|
+
renderer.render(event)
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
`trunc(s, n)` is exported for standalone use when you need to truncate
|
|
431
|
+
a string to `n` characters with a trailing `…`.
|
|
432
|
+
|
|
356
433
|
## Working memory budget
|
|
357
434
|
|
|
358
435
|
`AgentConfig.working_memory_max_tokens` controls per-agent eviction (default
|
|
@@ -518,7 +595,48 @@ async with MCPServerConnection(params, server_name="filesystem") as conn:
|
|
|
518
595
|
Supports **stdio** and **SSE** transports. The `MCPServerConnection` context
|
|
519
596
|
manager handles the full lifecycle — connect, discover, and cleanup.
|
|
520
597
|
|
|
521
|
-
|
|
598
|
+
Remote MCP servers can receive static headers or bearer tokens through an auth
|
|
599
|
+
provider:
|
|
600
|
+
|
|
601
|
+
```python
|
|
602
|
+
import os
|
|
603
|
+
from tools.mcp import MCPServerConnection, StaticMCPAuth
|
|
604
|
+
|
|
605
|
+
auth = StaticMCPAuth(
|
|
606
|
+
headers={
|
|
607
|
+
"DD_API_KEY": os.environ["DD_API_KEY"],
|
|
608
|
+
"DD_APPLICATION_KEY": os.environ["DD_APPLICATION_KEY"],
|
|
609
|
+
}
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
async with MCPServerConnection(
|
|
613
|
+
{"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
|
|
614
|
+
server_name="datadog",
|
|
615
|
+
auth=auth,
|
|
616
|
+
) as conn:
|
|
617
|
+
conn.register_tools(tool_registry)
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
OAuth-style auth files can be reused for MCP bearer auth:
|
|
621
|
+
|
|
622
|
+
```python
|
|
623
|
+
from tools.mcp import MCPServerConnection, OAuthMCPAuth
|
|
624
|
+
|
|
625
|
+
auth = OAuthMCPAuth.from_auth_file(
|
|
626
|
+
"~/.agent-harness/auth/auth.json",
|
|
627
|
+
provider="datadog-mcp",
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
async with MCPServerConnection(
|
|
631
|
+
{"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
|
|
632
|
+
server_name="datadog",
|
|
633
|
+
auth=auth,
|
|
634
|
+
) as conn:
|
|
635
|
+
conn.register_tools(tool_registry)
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
See `examples/mcp_demo.py` for local stdio MCP and `examples/mcp_auth_demo.py`
|
|
639
|
+
for authenticated remote MCP.
|
|
522
640
|
|
|
523
641
|
## OpenTelemetry Tracing
|
|
524
642
|
|
|
@@ -699,10 +817,10 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
|
699
817
|
Run: 3f7a1b2c-...:file_agent
|
|
700
818
|
ID: a1b2-c3d4
|
|
701
819
|
────────────────────────────────────────────────────────────
|
|
702
|
-
y = approve once | a = allow 'delete_file' for session | n = reject | <text> = steer
|
|
820
|
+
y = approve once | a = allow 'delete_file' for session | A = always allow 'delete_file' | n = reject | <text> = steer
|
|
703
821
|
Ctrl-C to pause. Resume: python my_script.py --resume 3f7a1b2c-...:file_agent
|
|
704
822
|
────────────────────────────────────────────────────────────
|
|
705
|
-
Approve? [y/n/a/correction]:
|
|
823
|
+
Approve? [y/n/a/A/correction]:
|
|
706
824
|
```
|
|
707
825
|
|
|
708
826
|
**Prompt semantics:**
|
|
@@ -712,11 +830,19 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
|
712
830
|
| `y` / `yes` | Tool runs once |
|
|
713
831
|
| `n` / `no` | Tool skipped; agent sees a rejection observation |
|
|
714
832
|
| `a` / `allow` | Tool runs **and** added to session allow-list; no further prompts for this tool (or command prefix for shell-like tools) |
|
|
833
|
+
| `A` / `always` | Tool runs **and** a user-scoped allow rule is stored in `~/.agent-harness/policies/tool_policy.json` |
|
|
715
834
|
| any other text | Correction: tool skipped, text injected into `WorkingMemory` as a user message; LLM self-corrects on the next step |
|
|
716
835
|
|
|
717
|
-
For shell-like tools (`shell`, `bash`, `run`, `exec`), `a`
|
|
718
|
-
word** of the command — e.g.
|
|
719
|
-
|
|
836
|
+
For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` and `A` allow the
|
|
837
|
+
**first word** of the command — e.g. approving `shell git commit ...` allows
|
|
838
|
+
all `git` commands in that scope but still prompts for `shell rm ...`.
|
|
839
|
+
Persistent rules are user-local, not repo files. Manage them with:
|
|
840
|
+
|
|
841
|
+
```bash
|
|
842
|
+
agent-harness policy list
|
|
843
|
+
agent-harness policy revoke <rule-id>
|
|
844
|
+
agent-harness policy clear
|
|
845
|
+
```
|
|
720
846
|
|
|
721
847
|
**Wall-time budget** is suspended while waiting for input — human think-time
|
|
722
848
|
does not count against `max_wall_time_seconds`.
|
|
@@ -381,6 +381,8 @@ class BaseAgent:
|
|
|
381
381
|
elif thought_event.type == EventType.THOUGHT:
|
|
382
382
|
response = thought_event.payload.get("response")
|
|
383
383
|
yield thought_event
|
|
384
|
+
else:
|
|
385
|
+
yield thought_event
|
|
384
386
|
|
|
385
387
|
if response is None:
|
|
386
388
|
reason = self._last_think_error or "LLM returned unparseable response"
|
|
@@ -642,6 +644,14 @@ class BaseAgent:
|
|
|
642
644
|
"""
|
|
643
645
|
messages = self._working_memory.get_messages()
|
|
644
646
|
accumulated = ""
|
|
647
|
+
before_usage = self._working_memory.context_usage()
|
|
648
|
+
before_summarizations = self._working_memory.summarization_count
|
|
649
|
+
|
|
650
|
+
yield BusEvent(
|
|
651
|
+
type=EventType.CONTEXT,
|
|
652
|
+
agent_id=self.config.agent_id,
|
|
653
|
+
payload=before_usage,
|
|
654
|
+
)
|
|
645
655
|
|
|
646
656
|
try:
|
|
647
657
|
if hasattr(self._llm, "stream_complete"):
|
|
@@ -686,6 +696,32 @@ class BaseAgent:
|
|
|
686
696
|
if response is not None:
|
|
687
697
|
self._last_think_error = None
|
|
688
698
|
|
|
699
|
+
after_usage = self._working_memory.context_usage()
|
|
700
|
+
if self._working_memory.summarization_count > before_summarizations:
|
|
701
|
+
yield BusEvent(
|
|
702
|
+
type=EventType.MEMORY,
|
|
703
|
+
agent_id=self.config.agent_id,
|
|
704
|
+
payload={
|
|
705
|
+
"event": "summarized",
|
|
706
|
+
"before": before_usage,
|
|
707
|
+
"after": after_usage,
|
|
708
|
+
"summarizations": self._working_memory.summarization_count,
|
|
709
|
+
},
|
|
710
|
+
)
|
|
711
|
+
llm_usage = getattr(self._llm, "last_usage", None) or {}
|
|
712
|
+
if llm_usage or after_usage != before_usage:
|
|
713
|
+
yield BusEvent(
|
|
714
|
+
type=EventType.CONTEXT,
|
|
715
|
+
agent_id=self.config.agent_id,
|
|
716
|
+
payload={
|
|
717
|
+
**after_usage,
|
|
718
|
+
"tokens_in": llm_usage.get("tokens_in"),
|
|
719
|
+
"tokens_out": llm_usage.get("tokens_out"),
|
|
720
|
+
"cache_read_tokens": llm_usage.get("cache_read_tokens"),
|
|
721
|
+
"cache_creation_tokens": llm_usage.get("cache_creation_tokens"),
|
|
722
|
+
},
|
|
723
|
+
)
|
|
724
|
+
|
|
689
725
|
yield BusEvent(
|
|
690
726
|
type=EventType.THOUGHT,
|
|
691
727
|
agent_id=self.config.agent_id,
|
|
@@ -738,10 +774,10 @@ class BaseAgent:
|
|
|
738
774
|
if not (self._checkpoint_store and tool_name in self.config.hitl_tools):
|
|
739
775
|
return None
|
|
740
776
|
|
|
741
|
-
from harness.hitl import ApprovalRequest,
|
|
777
|
+
from harness.hitl import ApprovalRequest, is_allowed, request_approval
|
|
742
778
|
|
|
743
|
-
if
|
|
744
|
-
return None # fast-path: human already allowed this tool/prefix
|
|
779
|
+
if is_allowed(tool_name, tool_args):
|
|
780
|
+
return None # fast-path: human already allowed this tool/prefix
|
|
745
781
|
|
|
746
782
|
approval_id = str(uuid.uuid4())
|
|
747
783
|
await self._checkpoint_store.write(
|
|
@@ -842,33 +878,35 @@ class BaseAgent:
|
|
|
842
878
|
pending: dict,
|
|
843
879
|
) -> AsyncGenerator[BusEvent, None]:
|
|
844
880
|
"""Re-prompt approval for a step interrupted by a crash, then complete it."""
|
|
845
|
-
from harness.hitl import ApprovalRequest, request_approval
|
|
881
|
+
from harness.hitl import ApprovalRequest, is_allowed, request_approval
|
|
846
882
|
|
|
847
883
|
tool_name = pending["tool"]
|
|
848
884
|
tool_args = pending["args"]
|
|
849
885
|
step = pending["step"]
|
|
850
886
|
llm_response = pending["llm_response"]
|
|
851
887
|
|
|
852
|
-
approval =
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
888
|
+
approval = None
|
|
889
|
+
if not is_allowed(tool_name, tool_args):
|
|
890
|
+
approval = await request_approval(
|
|
891
|
+
ApprovalRequest(
|
|
892
|
+
approval_id=pending["approval_id"],
|
|
893
|
+
run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
|
|
894
|
+
agent_id=self.config.agent_id,
|
|
895
|
+
tool=tool_name,
|
|
896
|
+
args=tool_args,
|
|
897
|
+
step=step,
|
|
898
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
899
|
+
),
|
|
900
|
+
self._guard,
|
|
901
|
+
)
|
|
864
902
|
|
|
865
|
-
if approval.correction:
|
|
903
|
+
if approval is not None and approval.correction:
|
|
866
904
|
await self._inject_human_guidance(llm_response, approval.correction, run_id, step)
|
|
867
905
|
return
|
|
868
906
|
|
|
869
907
|
observation = (
|
|
870
908
|
await self._execute_tool(tool_name, tool_args)
|
|
871
|
-
if approval.approved
|
|
909
|
+
if approval is None or approval.approved
|
|
872
910
|
else f"Tool rejected by human: {approval.correction or 'no reason given'}"
|
|
873
911
|
)
|
|
874
912
|
obs_display = "[image]" if _is_image_block(observation) else str(observation)[:500]
|
|
@@ -14,6 +14,7 @@ from harness.llm.auth import (
|
|
|
14
14
|
OpenAICodexOAuthClient,
|
|
15
15
|
default_auth_file,
|
|
16
16
|
)
|
|
17
|
+
from harness.tool_policy import ToolPolicyStore, default_policy_file
|
|
17
18
|
|
|
18
19
|
PROVIDERS = ["openai-codex", "claude-code"]
|
|
19
20
|
|
|
@@ -35,6 +36,16 @@ def main() -> int:
|
|
|
35
36
|
logout_cmd.add_argument("provider", choices=PROVIDERS)
|
|
36
37
|
logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
|
|
37
38
|
|
|
39
|
+
policy = sub.add_parser("policy", help="manage persistent tool policy")
|
|
40
|
+
policy_sub = policy.add_subparsers(dest="policy_command", required=True)
|
|
41
|
+
policy_list = policy_sub.add_parser("list", help="list persistent policy rules")
|
|
42
|
+
policy_list.add_argument("--policy-file", default=str(default_policy_file()))
|
|
43
|
+
policy_revoke = policy_sub.add_parser("revoke", help="remove one policy rule")
|
|
44
|
+
policy_revoke.add_argument("rule_id")
|
|
45
|
+
policy_revoke.add_argument("--policy-file", default=str(default_policy_file()))
|
|
46
|
+
policy_clear = policy_sub.add_parser("clear", help="remove all policy rules")
|
|
47
|
+
policy_clear.add_argument("--policy-file", default=str(default_policy_file()))
|
|
48
|
+
|
|
38
49
|
args = parser.parse_args()
|
|
39
50
|
try:
|
|
40
51
|
if args.command == "login":
|
|
@@ -52,6 +63,14 @@ def main() -> int:
|
|
|
52
63
|
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
|
|
53
64
|
if args.provider == "claude-code":
|
|
54
65
|
return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
|
|
66
|
+
if args.command == "policy":
|
|
67
|
+
path = Path(args.policy_file).expanduser()
|
|
68
|
+
if args.policy_command == "list":
|
|
69
|
+
return _policy_list(path)
|
|
70
|
+
if args.policy_command == "revoke":
|
|
71
|
+
return _policy_revoke(path, args.rule_id)
|
|
72
|
+
if args.policy_command == "clear":
|
|
73
|
+
return _policy_clear(path)
|
|
55
74
|
except Exception as e:
|
|
56
75
|
print(f"agent-harness: {e}", file=sys.stderr)
|
|
57
76
|
return 1
|
|
@@ -133,5 +152,26 @@ def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
|
|
|
133
152
|
provider._write_credential(cred)
|
|
134
153
|
|
|
135
154
|
|
|
155
|
+
def _policy_list(path: Path) -> int:
|
|
156
|
+
store = ToolPolicyStore(path)
|
|
157
|
+
rules = [rule.to_dict() for rule in store.list_rules()]
|
|
158
|
+
print(json.dumps({"policy_file": str(path), "rules": rules}, indent=2))
|
|
159
|
+
return 0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _policy_revoke(path: Path, rule_id: str) -> int:
|
|
163
|
+
if not ToolPolicyStore(path).revoke(rule_id):
|
|
164
|
+
print(f"Policy rule not found: {rule_id}", file=sys.stderr)
|
|
165
|
+
return 1
|
|
166
|
+
print(f"Removed policy rule: {rule_id}")
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _policy_clear(path: Path) -> int:
|
|
171
|
+
count = ToolPolicyStore(path).clear()
|
|
172
|
+
print(f"Removed {count} policy rule(s)")
|
|
173
|
+
return 0
|
|
174
|
+
|
|
175
|
+
|
|
136
176
|
if __name__ == "__main__":
|
|
137
177
|
raise SystemExit(main())
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Standard console renderer for BusEvent streams."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from typing import TextIO
|
|
8
|
+
|
|
9
|
+
from harness.events import BusEvent, EventType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def trunc(s: str, n: int) -> str:
|
|
13
|
+
"""Truncate *s* to *n* characters, appending '…' when clipped."""
|
|
14
|
+
return s if len(s) <= n else s[:n] + "…"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConsoleRenderer:
|
|
18
|
+
"""Renders BusEvent objects to a text stream.
|
|
19
|
+
|
|
20
|
+
Centralises all event-type formatting so callers don't duplicate
|
|
21
|
+
THOUGHT/ACTION/OBSERVATION/... blocks and separator/truncation helpers.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
truncate: Max characters for long text fields.
|
|
25
|
+
sep_char: Character used for separator lines.
|
|
26
|
+
sep_width: Width of separator lines.
|
|
27
|
+
agent_label_width: Width of the ``[agent_id]`` label column.
|
|
28
|
+
show_tokens: If True, TOKEN events are printed inline.
|
|
29
|
+
out: Output stream (defaults to ``sys.stdout``).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
*,
|
|
35
|
+
truncate: int = 140,
|
|
36
|
+
sep_char: str = "─",
|
|
37
|
+
sep_width: int = 72,
|
|
38
|
+
agent_label_width: int = 16,
|
|
39
|
+
show_tokens: bool = False,
|
|
40
|
+
out: TextIO | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self._truncate = truncate
|
|
43
|
+
self._sep_char = sep_char
|
|
44
|
+
self._sep_width = sep_width
|
|
45
|
+
self._label_w = agent_label_width
|
|
46
|
+
self._show_tokens = show_tokens
|
|
47
|
+
self._out = out or sys.stdout
|
|
48
|
+
self._in_token_stream = False
|
|
49
|
+
|
|
50
|
+
# ── public helpers ────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
def sep(self, char: str | None = None, w: int | None = None) -> None:
|
|
53
|
+
"""Print a separator line."""
|
|
54
|
+
print((char or self._sep_char) * (w or self._sep_width), file=self._out)
|
|
55
|
+
|
|
56
|
+
def render(self, event: BusEvent) -> None:
|
|
57
|
+
"""Print formatted output for one BusEvent."""
|
|
58
|
+
if event.type == EventType.TOKEN:
|
|
59
|
+
if self._show_tokens:
|
|
60
|
+
if not self._in_token_stream:
|
|
61
|
+
self._in_token_stream = True
|
|
62
|
+
self._out.write(event.token)
|
|
63
|
+
self._out.flush()
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
# Close any in-progress token stream before the next event line.
|
|
67
|
+
if self._in_token_stream:
|
|
68
|
+
self._out.write("\n")
|
|
69
|
+
self._out.flush()
|
|
70
|
+
self._in_token_stream = False
|
|
71
|
+
|
|
72
|
+
t = event.type
|
|
73
|
+
p = event.payload
|
|
74
|
+
|
|
75
|
+
if t == EventType.DISPATCH:
|
|
76
|
+
print(
|
|
77
|
+
f"\n[dispatch] complexity={p.get('complexity')} path={p.get('path')}",
|
|
78
|
+
file=self._out,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
elif t == EventType.ROUTE:
|
|
82
|
+
print(
|
|
83
|
+
f"[route] → {p.get('agent_id')}: {trunc(p.get('rationale', ''), 90)}",
|
|
84
|
+
file=self._out,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
elif t == EventType.PLAN:
|
|
88
|
+
tasks = p.get("plan", {}).get("tasks", [])
|
|
89
|
+
print(f"\n[plan] {len(tasks)} tasks", file=self._out)
|
|
90
|
+
for task in tasks:
|
|
91
|
+
deps = f" ← {task['depends_on']}" if task.get("depends_on") else ""
|
|
92
|
+
print(
|
|
93
|
+
f" {task['id']}@{task['agent_id']}: "
|
|
94
|
+
f"{trunc(task.get('instruction', ''), 70)}{deps}",
|
|
95
|
+
file=self._out,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
elif t == EventType.THOUGHT:
|
|
99
|
+
thought = p.get("thought", "")
|
|
100
|
+
if thought:
|
|
101
|
+
print(
|
|
102
|
+
f"{self._label(event)} think {trunc(thought, 110)}",
|
|
103
|
+
file=self._out,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
elif t == EventType.ACTION:
|
|
107
|
+
args = json.dumps(p.get("args", {}), default=str)
|
|
108
|
+
print(
|
|
109
|
+
f"{self._label(event)} action {p.get('tool')}({trunc(args, 90)})",
|
|
110
|
+
file=self._out,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
elif t == EventType.OBSERVATION:
|
|
114
|
+
obs = p.get("observation", "")
|
|
115
|
+
print(
|
|
116
|
+
f"{self._label(event)} obs {trunc(obs, 110)}",
|
|
117
|
+
file=self._out,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
elif t == EventType.CONTEXT:
|
|
121
|
+
tokens = int(p.get("tokens") or 0)
|
|
122
|
+
max_tokens = int(p.get("max_tokens") or 0)
|
|
123
|
+
pct = float(p.get("percent") or 0.0) * 100
|
|
124
|
+
level = p.get("level") or "normal"
|
|
125
|
+
suffix = "" if level == "normal" else f" {level}"
|
|
126
|
+
llm_parts: list[str] = []
|
|
127
|
+
if p.get("tokens_in") is not None:
|
|
128
|
+
llm_parts.append(f"in={int(p['tokens_in']):,}")
|
|
129
|
+
if p.get("tokens_out") is not None:
|
|
130
|
+
llm_parts.append(f"out={int(p['tokens_out']):,}")
|
|
131
|
+
if p.get("cache_read_tokens"):
|
|
132
|
+
llm_parts.append(f"cache_hit={int(p['cache_read_tokens']):,}")
|
|
133
|
+
if p.get("cache_creation_tokens"):
|
|
134
|
+
llm_parts.append(f"cache_new={int(p['cache_creation_tokens']):,}")
|
|
135
|
+
llm_suffix = f" [{' '.join(llm_parts)}]" if llm_parts else ""
|
|
136
|
+
print(
|
|
137
|
+
f"{self._label(event)} ctx {tokens:,} / {max_tokens:,} tokens "
|
|
138
|
+
f"{pct:.0f}%{suffix}{llm_suffix}",
|
|
139
|
+
file=self._out,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
elif t == EventType.MEMORY:
|
|
143
|
+
before = p.get("before") if isinstance(p.get("before"), dict) else {}
|
|
144
|
+
after = p.get("after") if isinstance(p.get("after"), dict) else {}
|
|
145
|
+
print(
|
|
146
|
+
f"{self._label(event)} memory summarized "
|
|
147
|
+
f"{int(before.get('tokens') or 0):,} -> {int(after.get('tokens') or 0):,} tokens",
|
|
148
|
+
file=self._out,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif t == EventType.HUMAN_GUIDANCE:
|
|
152
|
+
print(
|
|
153
|
+
f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
|
|
154
|
+
file=self._out,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
elif t == EventType.TASK_DONE:
|
|
158
|
+
print(
|
|
159
|
+
f"{self._label(event)} ✓ done "
|
|
160
|
+
f"confidence={p.get('confidence', 0):.2f} steps={p.get('steps', '?')}",
|
|
161
|
+
file=self._out,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
elif t == EventType.REPLAN:
|
|
165
|
+
print(
|
|
166
|
+
f"\n[replan] #{p.get('replan_count')} — trigger={p.get('trigger_task', '?')}",
|
|
167
|
+
file=self._out,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
elif t == EventType.SYNTHESIS:
|
|
171
|
+
print(
|
|
172
|
+
f"\n[synthesis] confidence={p.get('confidence', 0):.2f}",
|
|
173
|
+
file=self._out,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
elif t == EventType.DONE:
|
|
177
|
+
print(file=self._out)
|
|
178
|
+
self.sep("═")
|
|
179
|
+
print(p.get("answer", "(no answer)"), file=self._out)
|
|
180
|
+
self.sep()
|
|
181
|
+
print(
|
|
182
|
+
f"Confidence: {p.get('confidence', 0):.2f} | "
|
|
183
|
+
f"Replans: {p.get('replan_count', 0)} | "
|
|
184
|
+
f"Cost: ${p.get('cost_usd', 0):.4f} | "
|
|
185
|
+
f"Time: {p.get('elapsed_seconds', 0):.1f}s",
|
|
186
|
+
file=self._out,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
elif t == EventType.ERROR:
|
|
190
|
+
print(f"\n[error] {event.error}", file=sys.stderr)
|
|
191
|
+
|
|
192
|
+
# ── private helpers ───────────────────────────────────────────────────────
|
|
193
|
+
|
|
194
|
+
def _label(self, event: BusEvent) -> str:
|
|
195
|
+
if event.agent_id:
|
|
196
|
+
return f"[{event.agent_id:<{self._label_w}}]"
|
|
197
|
+
return f"[{event.type.value:<{self._label_w}}]"
|
|
@@ -19,6 +19,8 @@ Event lifecycle within a single goal:
|
|
|
19
19
|
PLAN — orchestrator emitted a static DAG
|
|
20
20
|
(per task in DAG)
|
|
21
21
|
HUMAN_GUIDANCE? — async steering drained at top of step
|
|
22
|
+
CONTEXT — working-memory context budget estimate
|
|
23
|
+
MEMORY — working-memory compaction/summarization marker
|
|
22
24
|
THOUGHT — agent's next-step reasoning
|
|
23
25
|
TOKEN* — partial LLM output (only when client streams)
|
|
24
26
|
ACTION — agent chose a tool + args
|
|
@@ -47,6 +49,8 @@ class EventType(str, Enum):
|
|
|
47
49
|
TOKEN = "token"
|
|
48
50
|
ACTION = "action"
|
|
49
51
|
OBSERVATION = "observation"
|
|
52
|
+
CONTEXT = "context"
|
|
53
|
+
MEMORY = "memory"
|
|
50
54
|
HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
|
|
51
55
|
TASK_DONE = "task_done"
|
|
52
56
|
REPLAN = "replan"
|