react-agent-harness 0.3.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {react_agent_harness-0.3.1/react_agent_harness.egg-info → react_agent_harness-0.4.0}/PKG-INFO +1 -1
  2. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/README.md +157 -14
  3. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/agents/base.py +27 -23
  4. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/cli.py +40 -0
  5. react_agent_harness-0.4.0/harness/console.py +166 -0
  6. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/hitl.py +38 -6
  7. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/runtime.py +37 -0
  8. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/steering.py +29 -16
  9. react_agent_harness-0.4.0/harness/tool_policy.py +183 -0
  10. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/orchestrator/planner.py +147 -9
  11. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/pyproject.toml +1 -1
  12. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
  13. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/SOURCES.txt +6 -1
  14. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_cli.py +29 -0
  15. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_mcp_adapter.py +45 -26
  16. react_agent_harness-0.4.0/tests/test_mcp_auth.py +104 -0
  17. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_orchestrator.py +250 -4
  18. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_steering.py +6 -21
  19. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_streaming.py +64 -23
  20. react_agent_harness-0.4.0/tests/test_tool_policy.py +89 -0
  21. react_agent_harness-0.4.0/tools/mcp/__init__.py +14 -0
  22. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/mcp/adapter.py +36 -31
  23. react_agent_harness-0.4.0/tools/mcp/auth.py +129 -0
  24. react_agent_harness-0.3.1/tools/mcp/__init__.py +0 -4
  25. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/LICENSE +0 -0
  26. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/agents/__init__.py +0 -0
  27. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/__init__.py +0 -0
  28. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/annotation.py +0 -0
  29. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/checkpoint.py +0 -0
  30. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/events.py +0 -0
  31. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/executor_bridge.py +0 -0
  32. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/__init__.py +0 -0
  33. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/_streaming.py +0 -0
  34. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/auth.py +0 -0
  35. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/claude_code.py +0 -0
  36. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/openai.py +0 -0
  37. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/llm/openai_codex.py +0 -0
  38. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/otel.py +0 -0
  39. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/harness/utils.py +0 -0
  40. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/__init__.py +0 -0
  41. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/episodic_lance.py +0 -0
  42. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/manager.py +0 -0
  43. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/redis_store.py +0 -0
  44. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/stores.py +0 -0
  45. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/memory/working.py +0 -0
  46. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/orchestrator/__init__.py +0 -0
  47. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  48. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
  49. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/requires.txt +0 -0
  50. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  51. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/setup.cfg +0 -0
  52. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_agents_base.py +0 -0
  53. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_annotation.py +0 -0
  54. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_checkpoint_resume.py +0 -0
  55. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_claude_code_llm.py +0 -0
  56. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_executor_bridge.py +0 -0
  57. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_http_fetch.py +0 -0
  58. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_llm_auth.py +0 -0
  59. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_memory.py +0 -0
  60. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_openai_codex_llm.py +0 -0
  61. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_openai_llm.py +0 -0
  62. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_otel.py +0 -0
  63. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_parse_action_json.py +0 -0
  64. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_redis_store.py +0 -0
  65. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_utils.py +0 -0
  66. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_vision.py +0 -0
  67. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tests/test_working_memory.py +0 -0
  68. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/__init__.py +0 -0
  69. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/__init__.py +0 -0
  70. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/fetch_image.py +0 -0
  71. {react_agent_harness-0.3.1 → react_agent_harness-0.4.0}/tools/builtin/http_fetch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
@@ -38,6 +38,8 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
38
38
  harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
39
39
  harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
40
40
  harness/hitl.py HITL approval gate — interactive CLI, session-allow list
41
+ harness/tool_policy.py Persistent tool policy — user-scoped allow rules, CLI management
42
+ harness/console.py ConsoleRenderer — centralised BusEvent formatting for CLI apps
41
43
  harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
42
44
  harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
43
45
  harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
@@ -74,6 +76,7 @@ explicit control.
74
76
  | `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
75
77
  | `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
76
78
  | `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
79
+ | `examples/mcp_auth_demo.py` | Connects to an authenticated remote MCP server using bearer or auth-file credentials. | `OPENAI_API_KEY`, `[openai,mcp]`, `MCP_URL`, `MCP_BEARER_TOKEN` or `MCP_AUTH_PROVIDER` |
77
80
  | `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
78
81
 
79
82
  ## Adding a new domain (3 steps)
@@ -337,22 +340,96 @@ async for event in runtime.run_stream("investigate GPU spike on worker-07"):
337
340
  print(event.payload["answer"])
338
341
  ```
339
342
 
343
+ ### 4. Pre-built — `run_with_plan` / `run_with_plan_stream`
344
+
345
+ Supply a hand-written `Plan` and bypass the LLM planner entirely. Use
346
+ this for deterministic, repeatable workflows where the decomposition is
347
+ known upfront — CI pipelines, ETL jobs, scheduled tasks. The plan is
348
+ validated against registered agents before execution; everything
349
+ downstream (parallel batches, replan-on-failure, synthesis, memory
350
+ writes, steering) is identical to `run_stream`.
351
+
352
+ ```python
353
+ from orchestrator.planner import Plan, Task
354
+
355
+ plan = Plan([
356
+ Task("t1", "analyst", "Analyse error logs from the last hour"),
357
+ Task("t2", "reporter", "Write an incident summary", depends_on=["t1"]),
358
+ ])
359
+
360
+ # streaming
361
+ async for event in runtime.run_with_plan_stream(plan, goal="Incident report"):
362
+ if event.type == EventType.DONE:
363
+ print(event.payload["answer"])
364
+
365
+ # blocking
366
+ result = await runtime.run_with_plan(plan, goal="Incident report")
367
+ ```
368
+
369
+ The `goal` string is passed to the synthesiser and used for memory
370
+ context injection into agents — even though the plan shape is fixed, the
371
+ agents themselves still read from memory.
372
+
373
+ If a task fails mid-run and `on_failure="replan"`, the replan call does
374
+ go to the LLM — the bypass is for the *initial* plan only.
375
+
376
+ ---
377
+
340
378
  Event types by path:
341
379
 
342
- | Event | Dispatch | Routed | Direct | Orchestrated |
343
- |---|---|---|---|---|
344
- | `DISPATCH` | ✓ | — | — | — |
345
- | `ROUTE` | ✓ (simple) | ✓ | — | — |
346
- | `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ |
347
- | `TASK_DONE` | ✓ | ✓ | ✓ | ✓ |
348
- | `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ |
349
- | `ERROR` | ✓ | ✓ | ✓ | ✓ |
380
+ | Event | Dispatch | Routed | Direct | Orchestrated | Pre-built |
381
+ |---|---|---|---|---|---|
382
+ | `DISPATCH` | ✓ | — | — | — | — |
383
+ | `ROUTE` | ✓ (simple) | ✓ | — | — | — |
384
+ | `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ | ✓ |
385
+ | `TASK_DONE` | ✓ | ✓ | ✓ | ✓ | ✓ |
386
+ | `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ | ✓ |
387
+ | `ERROR` | ✓ | ✓ | ✓ | ✓ | ✓ |
350
388
 
351
389
  `TOKEN` events fire only when your LLM client exposes
352
390
  `async def stream_complete(system, messages) -> AsyncGenerator[str, None]`.
353
391
  Non-streaming clients still work — they emit the full response in one
354
392
  `THOUGHT` event per step.
355
393
 
394
+ ## Console rendering
395
+
396
+ `ConsoleRenderer` handles all `BusEvent` types with consistent label
397
+ and truncation formatting so event-loop boilerplate stays out of your
398
+ scripts.
399
+
400
+ ```python
401
+ from harness.console import ConsoleRenderer, trunc
402
+
403
+ renderer = ConsoleRenderer(
404
+ truncate=140, # max chars for long text fields
405
+ sep_char="─", # separator character
406
+ sep_width=72, # separator width
407
+ agent_label_width=16, # width of [agent_id] column
408
+ show_tokens=False, # True to print TOKEN events inline
409
+ )
410
+
411
+ async for event in runtime.dispatch_stream(goal):
412
+ renderer.render(event) # handles every EventType
413
+ ```
414
+
415
+ For events with custom section headers (e.g. a "PROJECT HEALTH REPORT"
416
+ block), handle that event yourself and skip `render` for it — the
417
+ renderer is additive:
418
+
419
+ ```python
420
+ async for event in runtime.run_stream(goal):
421
+ if event.type == EventType.DONE:
422
+ renderer.sep("═")
423
+ print("MY CUSTOM HEADER")
424
+ renderer.sep("═")
425
+ print(event.payload["answer"])
426
+ else:
427
+ renderer.render(event)
428
+ ```
429
+
430
+ `trunc(s, n)` is exported for standalone use when you need to truncate
431
+ a string to `n` characters with a trailing `…`.
432
+
356
433
  ## Working memory budget
357
434
 
358
435
  `AgentConfig.working_memory_max_tokens` controls per-agent eviction (default
@@ -518,7 +595,48 @@ async with MCPServerConnection(params, server_name="filesystem") as conn:
518
595
  Supports **stdio** and **SSE** transports. The `MCPServerConnection` context
519
596
  manager handles the full lifecycle — connect, discover, and cleanup.
520
597
 
521
- See `examples/mcp_demo.py` for a runnable example.
598
+ Remote MCP servers can receive static headers or bearer tokens through an auth
599
+ provider:
600
+
601
+ ```python
602
+ import os
603
+ from tools.mcp import MCPServerConnection, StaticMCPAuth
604
+
605
+ auth = StaticMCPAuth(
606
+ headers={
607
+ "DD_API_KEY": os.environ["DD_API_KEY"],
608
+ "DD_APPLICATION_KEY": os.environ["DD_APPLICATION_KEY"],
609
+ }
610
+ )
611
+
612
+ async with MCPServerConnection(
613
+ {"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
614
+ server_name="datadog",
615
+ auth=auth,
616
+ ) as conn:
617
+ conn.register_tools(tool_registry)
618
+ ```
619
+
620
+ OAuth-style auth files can be reused for MCP bearer auth:
621
+
622
+ ```python
623
+ from tools.mcp import MCPServerConnection, OAuthMCPAuth
624
+
625
+ auth = OAuthMCPAuth.from_auth_file(
626
+ "~/.agent-harness/auth/auth.json",
627
+ provider="datadog-mcp",
628
+ )
629
+
630
+ async with MCPServerConnection(
631
+ {"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
632
+ server_name="datadog",
633
+ auth=auth,
634
+ ) as conn:
635
+ conn.register_tools(tool_registry)
636
+ ```
637
+
638
+ See `examples/mcp_demo.py` for local stdio MCP and `examples/mcp_auth_demo.py`
639
+ for authenticated remote MCP.
522
640
 
523
641
  ## OpenTelemetry Tracing
524
642
 
@@ -699,10 +817,10 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
699
817
  Run: 3f7a1b2c-...:file_agent
700
818
  ID: a1b2-c3d4
701
819
  ────────────────────────────────────────────────────────────
702
- y = approve once | a = allow 'delete_file' for session | n = reject | <text> = steer
820
+ y = approve once | a = allow 'delete_file' for session | A = always allow 'delete_file' | n = reject | <text> = steer
703
821
  Ctrl-C to pause. Resume: python my_script.py --resume 3f7a1b2c-...:file_agent
704
822
  ────────────────────────────────────────────────────────────
705
- Approve? [y/n/a/correction]:
823
+ Approve? [y/n/a/A/correction]:
706
824
  ```
707
825
 
708
826
  **Prompt semantics:**
@@ -712,11 +830,19 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
712
830
  | `y` / `yes` | Tool runs once |
713
831
  | `n` / `no` | Tool skipped; agent sees a rejection observation |
714
832
  | `a` / `allow` | Tool runs **and** added to session allow-list; no further prompts for this tool (or command prefix for shell-like tools) |
833
+ | `A` / `always` | Tool runs **and** a user-scoped allow rule is stored in `~/.agent-harness/policies/tool_policy.json` |
715
834
  | any other text | Correction: tool skipped, text injected into `WorkingMemory` as a user message; LLM self-corrects on the next step |
716
835
 
717
- For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` allows the **first
718
- word** of the command — e.g. typing `a` when approving `shell git commit ...`
719
- allows all `git` commands for the session but still prompts for `shell rm ...`.
836
+ For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` and `A` allow the
837
+ **first word** of the command — e.g. approving `shell git commit ...` allows
838
+ all `git` commands in that scope but still prompts for `shell rm ...`.
839
+ Persistent rules are user-local, not repo files. Manage them with:
840
+
841
+ ```bash
842
+ agent-harness policy list
843
+ agent-harness policy revoke <rule-id>
844
+ agent-harness policy clear
845
+ ```
720
846
 
721
847
  **Wall-time budget** is suspended while waiting for input — human think-time
722
848
  does not count against `max_wall_time_seconds`.
@@ -909,3 +1035,20 @@ key-bindings (like Enter-submits and Alt-Enter/Ctrl-J-newline) across both paths
909
1035
 
910
1036
  See `examples/complex_sysaudit_demo.py` for stdin steering across three
911
1037
  agents alongside HITL on the shell tool.
1038
+
1039
+ ## AgentConfig reference
1040
+
1041
+ | Field | Default | Description |
1042
+ |---|---|---|
1043
+ | `agent_id` | required | Unique identifier for the agent |
1044
+ | `role` | required | Plain-English description used by the planner for agent selection |
1045
+ | `system_prompt` | required | Base system prompt for the agent |
1046
+ | `allowed_tools` | required | Tool names the agent may call |
1047
+ | `max_steps` | `10` | Maximum ReAct iterations before the run is terminated |
1048
+ | `max_wall_time_seconds` | (guardrail) | See `GuardrailConfig` |
1049
+ | `memory_context_enabled` | `True` | Prepend relevant long-term memory to the system prompt |
1050
+ | `confidence_from_llm` | `True` | Use the `confidence` field from the LLM response; set `False` to always return `1.0` |
1051
+ | `working_memory_max_tokens` | `8000` | Token budget for in-context working memory before rolling summarisation kicks in |
1052
+ | `hitl_tools` | `[]` | Tool names that require human approval before execution |
1053
+ | `checkpoint_every` | `0` | Write a crash-resumable checkpoint every N steps; `0` disables periodic checkpoints |
1054
+ | `stream_tokens` | `False` | Emit `TOKEN` events as the LLM streams. Disabled by default — enable if you want to render partial output in real time: `AgentConfig(..., stream_tokens=True)` |
@@ -61,6 +61,7 @@ class AgentConfig:
61
61
  max_steps: int = 10
62
62
  memory_context_enabled: bool = True
63
63
  confidence_from_llm: bool = True # if False, confidence=1.0 on success
64
+ stream_tokens: bool = False # if True, TOKEN events are emitted as the LLM streams
64
65
  working_memory_max_tokens: int = 8000 # WorkingMemory eviction threshold; tune per agent
65
66
  hitl_tools: list[str] = None # tools requiring human approval; None = no HITL
66
67
  checkpoint_every: int = 0 # write a resumable checkpoint every N steps; 0 = disabled
@@ -649,11 +650,12 @@ class BaseAgent:
649
650
  messages=messages,
650
651
  ):
651
652
  accumulated += token
652
- yield BusEvent(
653
- type=EventType.TOKEN,
654
- agent_id=self.config.agent_id,
655
- token=token,
656
- )
653
+ if self.config.stream_tokens:
654
+ yield BusEvent(
655
+ type=EventType.TOKEN,
656
+ agent_id=self.config.agent_id,
657
+ token=token,
658
+ )
657
659
  response = _parse_action_json(accumulated)
658
660
  if response is None:
659
661
  logger.warning(
@@ -736,10 +738,10 @@ class BaseAgent:
736
738
  if not (self._checkpoint_store and tool_name in self.config.hitl_tools):
737
739
  return None
738
740
 
739
- from harness.hitl import ApprovalRequest, is_session_allowed, request_approval
741
+ from harness.hitl import ApprovalRequest, is_allowed, request_approval
740
742
 
741
- if is_session_allowed(tool_name, tool_args):
742
- return None # fast-path: human already allowed this tool/prefix for session
743
+ if is_allowed(tool_name, tool_args):
744
+ return None # fast-path: human already allowed this tool/prefix
743
745
 
744
746
  approval_id = str(uuid.uuid4())
745
747
  await self._checkpoint_store.write(
@@ -840,33 +842,35 @@ class BaseAgent:
840
842
  pending: dict,
841
843
  ) -> AsyncGenerator[BusEvent, None]:
842
844
  """Re-prompt approval for a step interrupted by a crash, then complete it."""
843
- from harness.hitl import ApprovalRequest, request_approval
845
+ from harness.hitl import ApprovalRequest, is_allowed, request_approval
844
846
 
845
847
  tool_name = pending["tool"]
846
848
  tool_args = pending["args"]
847
849
  step = pending["step"]
848
850
  llm_response = pending["llm_response"]
849
851
 
850
- approval = await request_approval(
851
- ApprovalRequest(
852
- approval_id=pending["approval_id"],
853
- run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
854
- agent_id=self.config.agent_id,
855
- tool=tool_name,
856
- args=tool_args,
857
- step=step,
858
- timestamp=datetime.now(timezone.utc).isoformat(),
859
- ),
860
- self._guard,
861
- )
852
+ approval = None
853
+ if not is_allowed(tool_name, tool_args):
854
+ approval = await request_approval(
855
+ ApprovalRequest(
856
+ approval_id=pending["approval_id"],
857
+ run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
858
+ agent_id=self.config.agent_id,
859
+ tool=tool_name,
860
+ args=tool_args,
861
+ step=step,
862
+ timestamp=datetime.now(timezone.utc).isoformat(),
863
+ ),
864
+ self._guard,
865
+ )
862
866
 
863
- if approval.correction:
867
+ if approval is not None and approval.correction:
864
868
  await self._inject_human_guidance(llm_response, approval.correction, run_id, step)
865
869
  return
866
870
 
867
871
  observation = (
868
872
  await self._execute_tool(tool_name, tool_args)
869
- if approval.approved
873
+ if approval is None or approval.approved
870
874
  else f"Tool rejected by human: {approval.correction or 'no reason given'}"
871
875
  )
872
876
  obs_display = "[image]" if _is_image_block(observation) else str(observation)[:500]
@@ -14,6 +14,7 @@ from harness.llm.auth import (
14
14
  OpenAICodexOAuthClient,
15
15
  default_auth_file,
16
16
  )
17
+ from harness.tool_policy import ToolPolicyStore, default_policy_file
17
18
 
18
19
  PROVIDERS = ["openai-codex", "claude-code"]
19
20
 
@@ -35,6 +36,16 @@ def main() -> int:
35
36
  logout_cmd.add_argument("provider", choices=PROVIDERS)
36
37
  logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
37
38
 
39
+ policy = sub.add_parser("policy", help="manage persistent tool policy")
40
+ policy_sub = policy.add_subparsers(dest="policy_command", required=True)
41
+ policy_list = policy_sub.add_parser("list", help="list persistent policy rules")
42
+ policy_list.add_argument("--policy-file", default=str(default_policy_file()))
43
+ policy_revoke = policy_sub.add_parser("revoke", help="remove one policy rule")
44
+ policy_revoke.add_argument("rule_id")
45
+ policy_revoke.add_argument("--policy-file", default=str(default_policy_file()))
46
+ policy_clear = policy_sub.add_parser("clear", help="remove all policy rules")
47
+ policy_clear.add_argument("--policy-file", default=str(default_policy_file()))
48
+
38
49
  args = parser.parse_args()
39
50
  try:
40
51
  if args.command == "login":
@@ -52,6 +63,14 @@ def main() -> int:
52
63
  return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
53
64
  if args.provider == "claude-code":
54
65
  return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
66
+ if args.command == "policy":
67
+ path = Path(args.policy_file).expanduser()
68
+ if args.policy_command == "list":
69
+ return _policy_list(path)
70
+ if args.policy_command == "revoke":
71
+ return _policy_revoke(path, args.rule_id)
72
+ if args.policy_command == "clear":
73
+ return _policy_clear(path)
55
74
  except Exception as e:
56
75
  print(f"agent-harness: {e}", file=sys.stderr)
57
76
  return 1
@@ -133,5 +152,26 @@ def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
133
152
  provider._write_credential(cred)
134
153
 
135
154
 
155
+ def _policy_list(path: Path) -> int:
156
+ store = ToolPolicyStore(path)
157
+ rules = [rule.to_dict() for rule in store.list_rules()]
158
+ print(json.dumps({"policy_file": str(path), "rules": rules}, indent=2))
159
+ return 0
160
+
161
+
162
+ def _policy_revoke(path: Path, rule_id: str) -> int:
163
+ if not ToolPolicyStore(path).revoke(rule_id):
164
+ print(f"Policy rule not found: {rule_id}", file=sys.stderr)
165
+ return 1
166
+ print(f"Removed policy rule: {rule_id}")
167
+ return 0
168
+
169
+
170
+ def _policy_clear(path: Path) -> int:
171
+ count = ToolPolicyStore(path).clear()
172
+ print(f"Removed {count} policy rule(s)")
173
+ return 0
174
+
175
+
136
176
  if __name__ == "__main__":
137
177
  raise SystemExit(main())
@@ -0,0 +1,166 @@
1
+ """Standard console renderer for BusEvent streams."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sys
7
+ from typing import TextIO
8
+
9
+ from harness.events import BusEvent, EventType
10
+
11
+
12
+ def trunc(s: str, n: int) -> str:
13
+ """Truncate *s* to *n* characters, appending '…' when clipped."""
14
+ return s if len(s) <= n else s[:n] + "…"
15
+
16
+
17
+ class ConsoleRenderer:
18
+ """Renders BusEvent objects to a text stream.
19
+
20
+ Centralises all event-type formatting so callers don't duplicate
21
+ THOUGHT/ACTION/OBSERVATION/... blocks and separator/truncation helpers.
22
+
23
+ Args:
24
+ truncate: Max characters for long text fields.
25
+ sep_char: Character used for separator lines.
26
+ sep_width: Width of separator lines.
27
+ agent_label_width: Width of the ``[agent_id]`` label column.
28
+ show_tokens: If True, TOKEN events are printed inline.
29
+ out: Output stream (defaults to ``sys.stdout``).
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ *,
35
+ truncate: int = 140,
36
+ sep_char: str = "─",
37
+ sep_width: int = 72,
38
+ agent_label_width: int = 16,
39
+ show_tokens: bool = False,
40
+ out: TextIO | None = None,
41
+ ) -> None:
42
+ self._truncate = truncate
43
+ self._sep_char = sep_char
44
+ self._sep_width = sep_width
45
+ self._label_w = agent_label_width
46
+ self._show_tokens = show_tokens
47
+ self._out = out or sys.stdout
48
+ self._in_token_stream = False
49
+
50
+ # ── public helpers ────────────────────────────────────────────────────────
51
+
52
+ def sep(self, char: str | None = None, w: int | None = None) -> None:
53
+ """Print a separator line."""
54
+ print((char or self._sep_char) * (w or self._sep_width), file=self._out)
55
+
56
+ def render(self, event: BusEvent) -> None:
57
+ """Print formatted output for one BusEvent."""
58
+ if event.type == EventType.TOKEN:
59
+ if self._show_tokens:
60
+ if not self._in_token_stream:
61
+ self._in_token_stream = True
62
+ self._out.write(event.token)
63
+ self._out.flush()
64
+ return
65
+
66
+ # Close any in-progress token stream before the next event line.
67
+ if self._in_token_stream:
68
+ self._out.write("\n")
69
+ self._out.flush()
70
+ self._in_token_stream = False
71
+
72
+ t = event.type
73
+ p = event.payload
74
+
75
+ if t == EventType.DISPATCH:
76
+ print(
77
+ f"\n[dispatch] complexity={p.get('complexity')} path={p.get('path')}",
78
+ file=self._out,
79
+ )
80
+
81
+ elif t == EventType.ROUTE:
82
+ print(
83
+ f"[route] → {p.get('agent_id')}: {trunc(p.get('rationale', ''), 90)}",
84
+ file=self._out,
85
+ )
86
+
87
+ elif t == EventType.PLAN:
88
+ tasks = p.get("plan", {}).get("tasks", [])
89
+ print(f"\n[plan] {len(tasks)} tasks", file=self._out)
90
+ for task in tasks:
91
+ deps = f" ← {task['depends_on']}" if task.get("depends_on") else ""
92
+ print(
93
+ f" {task['id']}@{task['agent_id']}: "
94
+ f"{trunc(task.get('instruction', ''), 70)}{deps}",
95
+ file=self._out,
96
+ )
97
+
98
+ elif t == EventType.THOUGHT:
99
+ thought = p.get("thought", "")
100
+ if thought:
101
+ print(
102
+ f"{self._label(event)} think {trunc(thought, 110)}",
103
+ file=self._out,
104
+ )
105
+
106
+ elif t == EventType.ACTION:
107
+ args = json.dumps(p.get("args", {}), default=str)
108
+ print(
109
+ f"{self._label(event)} action {p.get('tool')}({trunc(args, 90)})",
110
+ file=self._out,
111
+ )
112
+
113
+ elif t == EventType.OBSERVATION:
114
+ obs = p.get("observation", "")
115
+ print(
116
+ f"{self._label(event)} obs {trunc(obs, 110)}",
117
+ file=self._out,
118
+ )
119
+
120
+ elif t == EventType.HUMAN_GUIDANCE:
121
+ print(
122
+ f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
123
+ file=self._out,
124
+ )
125
+
126
+ elif t == EventType.TASK_DONE:
127
+ print(
128
+ f"{self._label(event)} ✓ done "
129
+ f"confidence={p.get('confidence', 0):.2f} steps={p.get('steps', '?')}",
130
+ file=self._out,
131
+ )
132
+
133
+ elif t == EventType.REPLAN:
134
+ print(
135
+ f"\n[replan] #{p.get('replan_count')} — trigger={p.get('trigger_task', '?')}",
136
+ file=self._out,
137
+ )
138
+
139
+ elif t == EventType.SYNTHESIS:
140
+ print(
141
+ f"\n[synthesis] confidence={p.get('confidence', 0):.2f}",
142
+ file=self._out,
143
+ )
144
+
145
+ elif t == EventType.DONE:
146
+ print(file=self._out)
147
+ self.sep("═")
148
+ print(p.get("answer", "(no answer)"), file=self._out)
149
+ self.sep()
150
+ print(
151
+ f"Confidence: {p.get('confidence', 0):.2f} | "
152
+ f"Replans: {p.get('replan_count', 0)} | "
153
+ f"Cost: ${p.get('cost_usd', 0):.4f} | "
154
+ f"Time: {p.get('elapsed_seconds', 0):.1f}s",
155
+ file=self._out,
156
+ )
157
+
158
+ elif t == EventType.ERROR:
159
+ print(f"\n[error] {event.error}", file=sys.stderr)
160
+
161
+ # ── private helpers ───────────────────────────────────────────────────────
162
+
163
+ def _label(self, event: BusEvent) -> str:
164
+ if event.agent_id:
165
+ return f"[{event.agent_id:<{self._label_w}}]"
166
+ return f"[{event.type.value:<{self._label_w}}]"
@@ -8,7 +8,7 @@ Same-session flow:
8
8
  2. A checkpoint is written to the CheckpointStore (step + WorkingMemory +
9
9
  pending tool). BudgetGuard clock suspends.
10
10
  3. Approval banner is printed to the terminal.
11
- 4. Human types y / n / a / <correction> in the terminal.
11
+ 4. Human types y / n / a / A / <correction> in the terminal.
12
12
  5. Guard resumes; agent continues (or injects correction and skips the tool).
13
13
 
14
14
  Crash / Ctrl-C / kill flow:
@@ -22,7 +22,7 @@ Crash / Ctrl-C / kill flow:
22
22
  The UUID printed at the prompt is an audit reference only.
23
23
 
24
24
  Correction steering:
25
- Any text that isn't y/yes/a/allow/n/no is treated as a correction.
25
+ Any text that isn't y/yes/a/allow/A/always/n/no is treated as a correction.
26
26
  The gated tool is skipped and the text is injected into WorkingMemory
27
27
  as a user message, so the LLM sees it on the next think step.
28
28
 
@@ -32,6 +32,11 @@ Session allow:
32
32
  first word of the command arg (e.g. 'git'), so allowing 'git' doesn't also
33
33
  allow 'rm'. Subsequent calls matching the key skip checkpoint + banner.
34
34
  Use is_session_allowed(tool, args) to query the list from outside.
35
+
36
+ Persistent allow:
37
+ Typing A or always approves the current call and writes a user-scoped
38
+ allow rule to ~/.agent-harness/policies/tool_policy.json. Rules are narrow:
39
+ shell-like tools are scoped by first command word, other tools by tool name.
35
40
  """
36
41
 
37
42
  from __future__ import annotations
@@ -73,6 +78,18 @@ def is_session_allowed(tool: str, args: dict) -> bool:
73
78
  return _session_key(tool, args) in _session_allowed
74
79
 
75
80
 
81
+ def is_persistently_allowed(tool: str, args: dict) -> bool:
82
+ """True if this tool+args combination is allowed by the user policy file."""
83
+ from harness.tool_policy import ToolPolicyStore
84
+
85
+ return ToolPolicyStore().is_allowed(tool, args)
86
+
87
+
88
+ def is_allowed(tool: str, args: dict) -> bool:
89
+ """True if this tool+args combination is session- or user-policy allowed."""
90
+ return is_session_allowed(tool, args) or is_persistently_allowed(tool, args)
91
+
92
+
76
93
  def _session_label(tool: str, args: dict) -> str:
77
94
  """Human-readable description of what 'a' would allow."""
78
95
  _, prefix = _session_key(tool, args)
@@ -122,6 +139,7 @@ class ApprovalResponse:
122
139
  approved: bool
123
140
  correction: str | None = None # non-None → steering; tool is skipped
124
141
  session_allow: bool = False # True → add (tool, prefix) to _session_allowed
142
+ persistent_allow: bool = False # True → write a user-scoped allow rule
125
143
 
126
144
 
127
145
  # ── CLI gate ──────────────────────────────────────────────────────────────────
@@ -140,21 +158,29 @@ def _print_banner(req: ApprovalRequest) -> None:
140
158
  print(f" ID: {req.approval_id}")
141
159
  print(_SEP)
142
160
  print(
143
- f" y = approve once | a = allow '{label}' for session | n = reject | <text> = steer"
161
+ " y = approve once | "
162
+ f"a = allow '{label}' for session | "
163
+ f"A = always allow '{label}' | "
164
+ "n = reject | <text> = steer"
144
165
  )
145
166
  print(f" Ctrl-C to pause. Resume: python {script} --resume {req.run_id}")
146
167
  print(_SEP)
147
168
 
148
169
 
149
170
  def _parse_stdin(approval_id: str, raw: str) -> ApprovalResponse:
150
- lo = raw.strip().lower()
171
+ stripped = raw.strip()
172
+ if stripped == "A":
173
+ return ApprovalResponse(approval_id=approval_id, approved=True, persistent_allow=True)
174
+ lo = stripped.lower()
151
175
  if lo in ("y", "yes"):
152
176
  return ApprovalResponse(approval_id=approval_id, approved=True)
153
177
  if lo in ("a", "allow"):
154
178
  return ApprovalResponse(approval_id=approval_id, approved=True, session_allow=True)
179
+ if lo in ("always", "allow always"):
180
+ return ApprovalResponse(approval_id=approval_id, approved=True, persistent_allow=True)
155
181
  if lo in ("n", "no"):
156
182
  return ApprovalResponse(approval_id=approval_id, approved=False)
157
- return ApprovalResponse(approval_id=approval_id, approved=True, correction=raw.strip() or None)
183
+ return ApprovalResponse(approval_id=approval_id, approved=True, correction=stripped or None)
158
184
 
159
185
 
160
186
  async def request_approval(
@@ -172,6 +198,7 @@ async def request_approval(
172
198
  y / yes → approved, tool runs
173
199
  n / no → rejected, tool skipped (error observation returned)
174
200
  a / allow → approved + session-allow registered; tool runs
201
+ A / always → approved + user policy allow registered; tool runs
175
202
  <any text> → correction injected into WorkingMemory; tool skipped
176
203
 
177
204
  Holds stdout_lock for the duration so concurrent agent events don't
@@ -190,7 +217,7 @@ async def request_approval(
190
217
 
191
218
  async with stdout_lock:
192
219
  router = get_active_router()
193
- approve_prompt = " Approve? [y/n/a/correction]: "
220
+ approve_prompt = " Approve? [y/n/a/A/correction]: "
194
221
  # If a router is active, reserve the next stdin read BEFORE printing
195
222
  # the banner so the user's typed answer routes to HITL (not steering).
196
223
  hitl_future: Any = (
@@ -226,4 +253,9 @@ async def request_approval(
226
253
  if resp.session_allow:
227
254
  _session_allowed.add(_session_key(req.tool, req.args))
228
255
  print(f" ✓ '{_session_label(req.tool, req.args)}' allowed for this session\n")
256
+ if resp.persistent_allow:
257
+ from harness.tool_policy import ToolPolicyStore
258
+
259
+ rule = ToolPolicyStore().add_allow_rule(tool=req.tool, args=req.args)
260
+ print(f" ✓ '{_session_label(req.tool, req.args)}' always allowed ({rule.id})\n")
229
261
  return resp