react-agent-harness 0.3.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {react_agent_harness-0.3.2/react_agent_harness.egg-info → react_agent_harness-0.5.0}/PKG-INFO +1 -1
  2. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/README.md +140 -14
  3. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/agents/base.py +56 -18
  4. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/cli.py +40 -0
  5. react_agent_harness-0.5.0/harness/console.py +197 -0
  6. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/events.py +4 -0
  7. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/hitl.py +38 -6
  8. react_agent_harness-0.5.0/harness/llm/anthropic.py +242 -0
  9. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/claude_code.py +42 -10
  10. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/runtime.py +37 -0
  11. react_agent_harness-0.5.0/harness/tool_policy.py +183 -0
  12. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/working.py +17 -0
  13. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/orchestrator/planner.py +147 -9
  14. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/pyproject.toml +1 -1
  15. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
  16. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt +9 -1
  17. react_agent_harness-0.5.0/tests/test_anthropic_llm.py +401 -0
  18. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_claude_code_llm.py +115 -2
  19. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_cli.py +29 -0
  20. react_agent_harness-0.5.0/tests/test_console_renderer.py +52 -0
  21. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_mcp_adapter.py +45 -26
  22. react_agent_harness-0.5.0/tests/test_mcp_auth.py +104 -0
  23. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_orchestrator.py +250 -4
  24. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_streaming.py +1 -0
  25. react_agent_harness-0.5.0/tests/test_tool_policy.py +89 -0
  26. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_working_memory.py +15 -0
  27. react_agent_harness-0.5.0/tools/mcp/__init__.py +14 -0
  28. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/mcp/adapter.py +36 -31
  29. react_agent_harness-0.5.0/tools/mcp/auth.py +129 -0
  30. react_agent_harness-0.3.2/tools/mcp/__init__.py +0 -4
  31. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/LICENSE +0 -0
  32. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/agents/__init__.py +0 -0
  33. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/__init__.py +0 -0
  34. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/annotation.py +0 -0
  35. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/checkpoint.py +0 -0
  36. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/executor_bridge.py +0 -0
  37. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/__init__.py +0 -0
  38. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/_streaming.py +0 -0
  39. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/auth.py +0 -0
  40. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/openai.py +0 -0
  41. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/llm/openai_codex.py +0 -0
  42. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/otel.py +0 -0
  43. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/steering.py +0 -0
  44. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/harness/utils.py +0 -0
  45. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/__init__.py +0 -0
  46. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/episodic_lance.py +0 -0
  47. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/manager.py +0 -0
  48. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/redis_store.py +0 -0
  49. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/memory/stores.py +0 -0
  50. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/orchestrator/__init__.py +0 -0
  51. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  52. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
  53. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/requires.txt +0 -0
  54. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  55. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/setup.cfg +0 -0
  56. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_agents_base.py +0 -0
  57. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_annotation.py +0 -0
  58. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_checkpoint_resume.py +0 -0
  59. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_executor_bridge.py +0 -0
  60. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_http_fetch.py +0 -0
  61. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_llm_auth.py +0 -0
  62. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_memory.py +0 -0
  63. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_openai_codex_llm.py +0 -0
  64. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_openai_llm.py +0 -0
  65. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_otel.py +0 -0
  66. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_parse_action_json.py +0 -0
  67. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_redis_store.py +0 -0
  68. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_steering.py +0 -0
  69. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_utils.py +0 -0
  70. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tests/test_vision.py +0 -0
  71. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/__init__.py +0 -0
  72. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/__init__.py +0 -0
  73. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/fetch_image.py +0 -0
  74. {react_agent_harness-0.3.2 → react_agent_harness-0.5.0}/tools/builtin/http_fetch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.3.2
3
+ Version: 0.5.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
@@ -38,6 +38,8 @@ harness/events.py BusEvent + EventType — canonical event vocabulary
38
38
  harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
39
39
  harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
40
40
  harness/hitl.py HITL approval gate — interactive CLI, session-allow list
41
+ harness/tool_policy.py Persistent tool policy — user-scoped allow rules, CLI management
42
+ harness/console.py ConsoleRenderer — centralised BusEvent formatting for CLI apps
41
43
  harness/steering.py Async steering — agent.steer(text), StdinRouter pub/sub, FileSteer, factory helpers
42
44
  harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
43
45
  harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
@@ -74,6 +76,7 @@ explicit control.
74
76
  | `examples/executor_bridge_demo.py` | `ExecutorBridge` backends side-by-side: allowlist, env scrubbing, Docker network/fs isolation, timeout, positional-arg tools. | `ah-executor` and/or Docker |
75
77
  | `examples/durable_memory_demo.py` | Redis (semantic) + LanceDB (episodic) memory persistence across two related goals. | `OPENAI_API_KEY`, `[openai,redis,lance]`, Redis reachable |
76
78
  | `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
79
+ | `examples/mcp_auth_demo.py` | Connects to an authenticated remote MCP server using bearer or auth-file credentials. | `OPENAI_API_KEY`, `[openai,mcp]`, `MCP_URL`, `MCP_BEARER_TOKEN` or `MCP_AUTH_PROVIDER` |
77
80
  | `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
78
81
 
79
82
  ## Adding a new domain (3 steps)
@@ -337,22 +340,96 @@ async for event in runtime.run_stream("investigate GPU spike on worker-07"):
337
340
  print(event.payload["answer"])
338
341
  ```
339
342
 
343
+ ### 4. Pre-built — `run_with_plan` / `run_with_plan_stream`
344
+
345
+ Supply a hand-written `Plan` and bypass the LLM planner entirely. Use
346
+ this for deterministic, repeatable workflows where the decomposition is
347
+ known upfront — CI pipelines, ETL jobs, scheduled tasks. The plan is
348
+ validated against registered agents before execution; everything
349
+ downstream (parallel batches, replan-on-failure, synthesis, memory
350
+ writes, steering) is identical to `run_stream`.
351
+
352
+ ```python
353
+ from orchestrator.planner import Plan, Task
354
+
355
+ plan = Plan([
356
+ Task("t1", "analyst", "Analyse error logs from the last hour"),
357
+ Task("t2", "reporter", "Write an incident summary", depends_on=["t1"]),
358
+ ])
359
+
360
+ # streaming
361
+ async for event in runtime.run_with_plan_stream(plan, goal="Incident report"):
362
+ if event.type == EventType.DONE:
363
+ print(event.payload["answer"])
364
+
365
+ # blocking
366
+ result = await runtime.run_with_plan(plan, goal="Incident report")
367
+ ```
368
+
369
+ The `goal` string is passed to the synthesiser and used for memory
370
+ context injection into agents — even though the plan shape is fixed, the
371
+ agents themselves still read from memory.
372
+
373
+ If a task fails mid-run and `on_failure="replan"`, the replan call does
374
+ go to the LLM — the bypass is for the *initial* plan only.
375
+
376
+ ---
377
+
340
378
  Event types by path:
341
379
 
342
- | Event | Dispatch | Routed | Direct | Orchestrated |
343
- |---|---|---|---|---|
344
- | `DISPATCH` | ✓ | — | — | — |
345
- | `ROUTE` | ✓ (simple) | ✓ | — | — |
346
- | `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ |
347
- | `TASK_DONE` | ✓ | ✓ | ✓ | ✓ |
348
- | `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ |
349
- | `ERROR` | ✓ | ✓ | ✓ | ✓ |
380
+ | Event | Dispatch | Routed | Direct | Orchestrated | Pre-built |
381
+ |---|---|---|---|---|---|
382
+ | `DISPATCH` | ✓ | — | — | — | — |
383
+ | `ROUTE` | ✓ (simple) | ✓ | — | — | — |
384
+ | `THOUGHT` / `TOKEN` / `ACTION` / `OBSERVATION` | ✓ | ✓ | ✓ | ✓ | ✓ |
385
+ | `TASK_DONE` | ✓ | ✓ | ✓ | ✓ | ✓ |
386
+ | `PLAN` / `REPLAN` / `SYNTHESIS` / `DONE` | ✓ (complex) | — | — | ✓ | ✓ |
387
+ | `ERROR` | ✓ | ✓ | ✓ | ✓ | ✓ |
350
388
 
351
389
  `TOKEN` events fire only when your LLM client exposes
352
390
  `async def stream_complete(system, messages) -> AsyncGenerator[str, None]`.
353
391
  Non-streaming clients still work — they emit the full response in one
354
392
  `THOUGHT` event per step.
355
393
 
394
+ ## Console rendering
395
+
396
+ `ConsoleRenderer` handles all `BusEvent` types with consistent label
397
+ and truncation formatting so event-loop boilerplate stays out of your
398
+ scripts.
399
+
400
+ ```python
401
+ from harness.console import ConsoleRenderer, trunc
402
+
403
+ renderer = ConsoleRenderer(
404
+ truncate=140, # max chars for long text fields
405
+ sep_char="─", # separator character
406
+ sep_width=72, # separator width
407
+ agent_label_width=16, # width of [agent_id] column
408
+ show_tokens=False, # True to print TOKEN events inline
409
+ )
410
+
411
+ async for event in runtime.dispatch_stream(goal):
412
+ renderer.render(event) # handles every EventType
413
+ ```
414
+
415
+ For events with custom section headers (e.g. a "PROJECT HEALTH REPORT"
416
+ block), handle that event yourself and skip `render` for it — the
417
+ renderer is additive:
418
+
419
+ ```python
420
+ async for event in runtime.run_stream(goal):
421
+ if event.type == EventType.DONE:
422
+ renderer.sep("═")
423
+ print("MY CUSTOM HEADER")
424
+ renderer.sep("═")
425
+ print(event.payload["answer"])
426
+ else:
427
+ renderer.render(event)
428
+ ```
429
+
430
+ `trunc(s, n)` is exported for standalone use when you need to truncate
431
+ a string to `n` characters with a trailing `…`.
432
+
356
433
  ## Working memory budget
357
434
 
358
435
  `AgentConfig.working_memory_max_tokens` controls per-agent eviction (default
@@ -518,7 +595,48 @@ async with MCPServerConnection(params, server_name="filesystem") as conn:
518
595
  Supports **stdio** and **SSE** transports. The `MCPServerConnection` context
519
596
  manager handles the full lifecycle — connect, discover, and cleanup.
520
597
 
521
- See `examples/mcp_demo.py` for a runnable example.
598
+ Remote MCP servers can receive static headers or bearer tokens through an auth
599
+ provider:
600
+
601
+ ```python
602
+ import os
603
+ from tools.mcp import MCPServerConnection, StaticMCPAuth
604
+
605
+ auth = StaticMCPAuth(
606
+ headers={
607
+ "DD_API_KEY": os.environ["DD_API_KEY"],
608
+ "DD_APPLICATION_KEY": os.environ["DD_APPLICATION_KEY"],
609
+ }
610
+ )
611
+
612
+ async with MCPServerConnection(
613
+ {"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
614
+ server_name="datadog",
615
+ auth=auth,
616
+ ) as conn:
617
+ conn.register_tools(tool_registry)
618
+ ```
619
+
620
+ OAuth-style auth files can be reused for MCP bearer auth:
621
+
622
+ ```python
623
+ from tools.mcp import MCPServerConnection, OAuthMCPAuth
624
+
625
+ auth = OAuthMCPAuth.from_auth_file(
626
+ "~/.agent-harness/auth/auth.json",
627
+ provider="datadog-mcp",
628
+ )
629
+
630
+ async with MCPServerConnection(
631
+ {"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"},
632
+ server_name="datadog",
633
+ auth=auth,
634
+ ) as conn:
635
+ conn.register_tools(tool_registry)
636
+ ```
637
+
638
+ See `examples/mcp_demo.py` for local stdio MCP and `examples/mcp_auth_demo.py`
639
+ for authenticated remote MCP.
522
640
 
523
641
  ## OpenTelemetry Tracing
524
642
 
@@ -699,10 +817,10 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
699
817
  Run: 3f7a1b2c-...:file_agent
700
818
  ID: a1b2-c3d4
701
819
  ────────────────────────────────────────────────────────────
702
- y = approve once | a = allow 'delete_file' for session | n = reject | <text> = steer
820
+ y = approve once | a = allow 'delete_file' for session | A = always allow 'delete_file' | n = reject | <text> = steer
703
821
  Ctrl-C to pause. Resume: python my_script.py --resume 3f7a1b2c-...:file_agent
704
822
  ────────────────────────────────────────────────────────────
705
- Approve? [y/n/a/correction]:
823
+ Approve? [y/n/a/A/correction]:
706
824
  ```
707
825
 
708
826
  **Prompt semantics:**
@@ -712,11 +830,19 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
712
830
  | `y` / `yes` | Tool runs once |
713
831
  | `n` / `no` | Tool skipped; agent sees a rejection observation |
714
832
  | `a` / `allow` | Tool runs **and** added to session allow-list; no further prompts for this tool (or command prefix for shell-like tools) |
833
+ | `A` / `always` | Tool runs **and** a user-scoped allow rule is stored in `~/.agent-harness/policies/tool_policy.json` |
715
834
  | any other text | Correction: tool skipped, text injected into `WorkingMemory` as a user message; LLM self-corrects on the next step |
716
835
 
717
- For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` allows the **first
718
- word** of the command — e.g. typing `a` when approving `shell git commit ...`
719
- allows all `git` commands for the session but still prompts for `shell rm ...`.
836
+ For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` and `A` allow the
837
+ **first word** of the command — e.g. approving `shell git commit ...` allows
838
+ all `git` commands in that scope but still prompts for `shell rm ...`.
839
+ Persistent rules are user-local, not repo files. Manage them with:
840
+
841
+ ```bash
842
+ agent-harness policy list
843
+ agent-harness policy revoke <rule-id>
844
+ agent-harness policy clear
845
+ ```
720
846
 
721
847
  **Wall-time budget** is suspended while waiting for input — human think-time
722
848
  does not count against `max_wall_time_seconds`.
@@ -381,6 +381,8 @@ class BaseAgent:
381
381
  elif thought_event.type == EventType.THOUGHT:
382
382
  response = thought_event.payload.get("response")
383
383
  yield thought_event
384
+ else:
385
+ yield thought_event
384
386
 
385
387
  if response is None:
386
388
  reason = self._last_think_error or "LLM returned unparseable response"
@@ -642,6 +644,14 @@ class BaseAgent:
642
644
  """
643
645
  messages = self._working_memory.get_messages()
644
646
  accumulated = ""
647
+ before_usage = self._working_memory.context_usage()
648
+ before_summarizations = self._working_memory.summarization_count
649
+
650
+ yield BusEvent(
651
+ type=EventType.CONTEXT,
652
+ agent_id=self.config.agent_id,
653
+ payload=before_usage,
654
+ )
645
655
 
646
656
  try:
647
657
  if hasattr(self._llm, "stream_complete"):
@@ -686,6 +696,32 @@ class BaseAgent:
686
696
  if response is not None:
687
697
  self._last_think_error = None
688
698
 
699
+ after_usage = self._working_memory.context_usage()
700
+ if self._working_memory.summarization_count > before_summarizations:
701
+ yield BusEvent(
702
+ type=EventType.MEMORY,
703
+ agent_id=self.config.agent_id,
704
+ payload={
705
+ "event": "summarized",
706
+ "before": before_usage,
707
+ "after": after_usage,
708
+ "summarizations": self._working_memory.summarization_count,
709
+ },
710
+ )
711
+ llm_usage = getattr(self._llm, "last_usage", None) or {}
712
+ if llm_usage or after_usage != before_usage:
713
+ yield BusEvent(
714
+ type=EventType.CONTEXT,
715
+ agent_id=self.config.agent_id,
716
+ payload={
717
+ **after_usage,
718
+ "tokens_in": llm_usage.get("tokens_in"),
719
+ "tokens_out": llm_usage.get("tokens_out"),
720
+ "cache_read_tokens": llm_usage.get("cache_read_tokens"),
721
+ "cache_creation_tokens": llm_usage.get("cache_creation_tokens"),
722
+ },
723
+ )
724
+
689
725
  yield BusEvent(
690
726
  type=EventType.THOUGHT,
691
727
  agent_id=self.config.agent_id,
@@ -738,10 +774,10 @@ class BaseAgent:
738
774
  if not (self._checkpoint_store and tool_name in self.config.hitl_tools):
739
775
  return None
740
776
 
741
- from harness.hitl import ApprovalRequest, is_session_allowed, request_approval
777
+ from harness.hitl import ApprovalRequest, is_allowed, request_approval
742
778
 
743
- if is_session_allowed(tool_name, tool_args):
744
- return None # fast-path: human already allowed this tool/prefix for session
779
+ if is_allowed(tool_name, tool_args):
780
+ return None # fast-path: human already allowed this tool/prefix
745
781
 
746
782
  approval_id = str(uuid.uuid4())
747
783
  await self._checkpoint_store.write(
@@ -842,33 +878,35 @@ class BaseAgent:
842
878
  pending: dict,
843
879
  ) -> AsyncGenerator[BusEvent, None]:
844
880
  """Re-prompt approval for a step interrupted by a crash, then complete it."""
845
- from harness.hitl import ApprovalRequest, request_approval
881
+ from harness.hitl import ApprovalRequest, is_allowed, request_approval
846
882
 
847
883
  tool_name = pending["tool"]
848
884
  tool_args = pending["args"]
849
885
  step = pending["step"]
850
886
  llm_response = pending["llm_response"]
851
887
 
852
- approval = await request_approval(
853
- ApprovalRequest(
854
- approval_id=pending["approval_id"],
855
- run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
856
- agent_id=self.config.agent_id,
857
- tool=tool_name,
858
- args=tool_args,
859
- step=step,
860
- timestamp=datetime.now(timezone.utc).isoformat(),
861
- ),
862
- self._guard,
863
- )
888
+ approval = None
889
+ if not is_allowed(tool_name, tool_args):
890
+ approval = await request_approval(
891
+ ApprovalRequest(
892
+ approval_id=pending["approval_id"],
893
+ run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
894
+ agent_id=self.config.agent_id,
895
+ tool=tool_name,
896
+ args=tool_args,
897
+ step=step,
898
+ timestamp=datetime.now(timezone.utc).isoformat(),
899
+ ),
900
+ self._guard,
901
+ )
864
902
 
865
- if approval.correction:
903
+ if approval is not None and approval.correction:
866
904
  await self._inject_human_guidance(llm_response, approval.correction, run_id, step)
867
905
  return
868
906
 
869
907
  observation = (
870
908
  await self._execute_tool(tool_name, tool_args)
871
- if approval.approved
909
+ if approval is None or approval.approved
872
910
  else f"Tool rejected by human: {approval.correction or 'no reason given'}"
873
911
  )
874
912
  obs_display = "[image]" if _is_image_block(observation) else str(observation)[:500]
@@ -14,6 +14,7 @@ from harness.llm.auth import (
14
14
  OpenAICodexOAuthClient,
15
15
  default_auth_file,
16
16
  )
17
+ from harness.tool_policy import ToolPolicyStore, default_policy_file
17
18
 
18
19
  PROVIDERS = ["openai-codex", "claude-code"]
19
20
 
@@ -35,6 +36,16 @@ def main() -> int:
35
36
  logout_cmd.add_argument("provider", choices=PROVIDERS)
36
37
  logout_cmd.add_argument("--auth-file", default=str(default_auth_file()))
37
38
 
39
+ policy = sub.add_parser("policy", help="manage persistent tool policy")
40
+ policy_sub = policy.add_subparsers(dest="policy_command", required=True)
41
+ policy_list = policy_sub.add_parser("list", help="list persistent policy rules")
42
+ policy_list.add_argument("--policy-file", default=str(default_policy_file()))
43
+ policy_revoke = policy_sub.add_parser("revoke", help="remove one policy rule")
44
+ policy_revoke.add_argument("rule_id")
45
+ policy_revoke.add_argument("--policy-file", default=str(default_policy_file()))
46
+ policy_clear = policy_sub.add_parser("clear", help="remove all policy rules")
47
+ policy_clear.add_argument("--policy-file", default=str(default_policy_file()))
48
+
38
49
  args = parser.parse_args()
39
50
  try:
40
51
  if args.command == "login":
@@ -52,6 +63,14 @@ def main() -> int:
52
63
  return _logout_oauth_provider(Path(args.auth_file).expanduser(), "openai-codex")
53
64
  if args.provider == "claude-code":
54
65
  return _logout_oauth_provider(Path(args.auth_file).expanduser(), "claude-code")
66
+ if args.command == "policy":
67
+ path = Path(args.policy_file).expanduser()
68
+ if args.policy_command == "list":
69
+ return _policy_list(path)
70
+ if args.policy_command == "revoke":
71
+ return _policy_revoke(path, args.rule_id)
72
+ if args.policy_command == "clear":
73
+ return _policy_clear(path)
55
74
  except Exception as e:
56
75
  print(f"agent-harness: {e}", file=sys.stderr)
57
76
  return 1
@@ -133,5 +152,26 @@ def _write_oauth_credential(path: Path, cred: OAuthCredential) -> None:
133
152
  provider._write_credential(cred)
134
153
 
135
154
 
155
+ def _policy_list(path: Path) -> int:
156
+ store = ToolPolicyStore(path)
157
+ rules = [rule.to_dict() for rule in store.list_rules()]
158
+ print(json.dumps({"policy_file": str(path), "rules": rules}, indent=2))
159
+ return 0
160
+
161
+
162
+ def _policy_revoke(path: Path, rule_id: str) -> int:
163
+ if not ToolPolicyStore(path).revoke(rule_id):
164
+ print(f"Policy rule not found: {rule_id}", file=sys.stderr)
165
+ return 1
166
+ print(f"Removed policy rule: {rule_id}")
167
+ return 0
168
+
169
+
170
+ def _policy_clear(path: Path) -> int:
171
+ count = ToolPolicyStore(path).clear()
172
+ print(f"Removed {count} policy rule(s)")
173
+ return 0
174
+
175
+
136
176
  if __name__ == "__main__":
137
177
  raise SystemExit(main())
@@ -0,0 +1,197 @@
1
+ """Standard console renderer for BusEvent streams."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sys
7
+ from typing import TextIO
8
+
9
+ from harness.events import BusEvent, EventType
10
+
11
+
12
+ def trunc(s: str, n: int) -> str:
13
+ """Truncate *s* to *n* characters, appending '…' when clipped."""
14
+ return s if len(s) <= n else s[:n] + "…"
15
+
16
+
17
+ class ConsoleRenderer:
18
+ """Renders BusEvent objects to a text stream.
19
+
20
+ Centralises all event-type formatting so callers don't duplicate
21
+ THOUGHT/ACTION/OBSERVATION/... blocks and separator/truncation helpers.
22
+
23
+ Args:
24
+ truncate: Max characters for long text fields.
25
+ sep_char: Character used for separator lines.
26
+ sep_width: Width of separator lines.
27
+ agent_label_width: Width of the ``[agent_id]`` label column.
28
+ show_tokens: If True, TOKEN events are printed inline.
29
+ out: Output stream (defaults to ``sys.stdout``).
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ *,
35
+ truncate: int = 140,
36
+ sep_char: str = "─",
37
+ sep_width: int = 72,
38
+ agent_label_width: int = 16,
39
+ show_tokens: bool = False,
40
+ out: TextIO | None = None,
41
+ ) -> None:
42
+ self._truncate = truncate
43
+ self._sep_char = sep_char
44
+ self._sep_width = sep_width
45
+ self._label_w = agent_label_width
46
+ self._show_tokens = show_tokens
47
+ self._out = out or sys.stdout
48
+ self._in_token_stream = False
49
+
50
+ # ── public helpers ────────────────────────────────────────────────────────
51
+
52
+ def sep(self, char: str | None = None, w: int | None = None) -> None:
53
+ """Print a separator line."""
54
+ print((char or self._sep_char) * (w or self._sep_width), file=self._out)
55
+
56
+ def render(self, event: BusEvent) -> None:
57
+ """Print formatted output for one BusEvent."""
58
+ if event.type == EventType.TOKEN:
59
+ if self._show_tokens:
60
+ if not self._in_token_stream:
61
+ self._in_token_stream = True
62
+ self._out.write(event.token)
63
+ self._out.flush()
64
+ return
65
+
66
+ # Close any in-progress token stream before the next event line.
67
+ if self._in_token_stream:
68
+ self._out.write("\n")
69
+ self._out.flush()
70
+ self._in_token_stream = False
71
+
72
+ t = event.type
73
+ p = event.payload
74
+
75
+ if t == EventType.DISPATCH:
76
+ print(
77
+ f"\n[dispatch] complexity={p.get('complexity')} path={p.get('path')}",
78
+ file=self._out,
79
+ )
80
+
81
+ elif t == EventType.ROUTE:
82
+ print(
83
+ f"[route] → {p.get('agent_id')}: {trunc(p.get('rationale', ''), 90)}",
84
+ file=self._out,
85
+ )
86
+
87
+ elif t == EventType.PLAN:
88
+ tasks = p.get("plan", {}).get("tasks", [])
89
+ print(f"\n[plan] {len(tasks)} tasks", file=self._out)
90
+ for task in tasks:
91
+ deps = f" ← {task['depends_on']}" if task.get("depends_on") else ""
92
+ print(
93
+ f" {task['id']}@{task['agent_id']}: "
94
+ f"{trunc(task.get('instruction', ''), 70)}{deps}",
95
+ file=self._out,
96
+ )
97
+
98
+ elif t == EventType.THOUGHT:
99
+ thought = p.get("thought", "")
100
+ if thought:
101
+ print(
102
+ f"{self._label(event)} think {trunc(thought, 110)}",
103
+ file=self._out,
104
+ )
105
+
106
+ elif t == EventType.ACTION:
107
+ args = json.dumps(p.get("args", {}), default=str)
108
+ print(
109
+ f"{self._label(event)} action {p.get('tool')}({trunc(args, 90)})",
110
+ file=self._out,
111
+ )
112
+
113
+ elif t == EventType.OBSERVATION:
114
+ obs = p.get("observation", "")
115
+ print(
116
+ f"{self._label(event)} obs {trunc(obs, 110)}",
117
+ file=self._out,
118
+ )
119
+
120
+ elif t == EventType.CONTEXT:
121
+ tokens = int(p.get("tokens") or 0)
122
+ max_tokens = int(p.get("max_tokens") or 0)
123
+ pct = float(p.get("percent") or 0.0) * 100
124
+ level = p.get("level") or "normal"
125
+ suffix = "" if level == "normal" else f" {level}"
126
+ llm_parts: list[str] = []
127
+ if p.get("tokens_in") is not None:
128
+ llm_parts.append(f"in={int(p['tokens_in']):,}")
129
+ if p.get("tokens_out") is not None:
130
+ llm_parts.append(f"out={int(p['tokens_out']):,}")
131
+ if p.get("cache_read_tokens"):
132
+ llm_parts.append(f"cache_hit={int(p['cache_read_tokens']):,}")
133
+ if p.get("cache_creation_tokens"):
134
+ llm_parts.append(f"cache_new={int(p['cache_creation_tokens']):,}")
135
+ llm_suffix = f" [{' '.join(llm_parts)}]" if llm_parts else ""
136
+ print(
137
+ f"{self._label(event)} ctx {tokens:,} / {max_tokens:,} tokens "
138
+ f"{pct:.0f}%{suffix}{llm_suffix}",
139
+ file=self._out,
140
+ )
141
+
142
+ elif t == EventType.MEMORY:
143
+ before = p.get("before") if isinstance(p.get("before"), dict) else {}
144
+ after = p.get("after") if isinstance(p.get("after"), dict) else {}
145
+ print(
146
+ f"{self._label(event)} memory summarized "
147
+ f"{int(before.get('tokens') or 0):,} -> {int(after.get('tokens') or 0):,} tokens",
148
+ file=self._out,
149
+ )
150
+
151
+ elif t == EventType.HUMAN_GUIDANCE:
152
+ print(
153
+ f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
154
+ file=self._out,
155
+ )
156
+
157
+ elif t == EventType.TASK_DONE:
158
+ print(
159
+ f"{self._label(event)} ✓ done "
160
+ f"confidence={p.get('confidence', 0):.2f} steps={p.get('steps', '?')}",
161
+ file=self._out,
162
+ )
163
+
164
+ elif t == EventType.REPLAN:
165
+ print(
166
+ f"\n[replan] #{p.get('replan_count')} — trigger={p.get('trigger_task', '?')}",
167
+ file=self._out,
168
+ )
169
+
170
+ elif t == EventType.SYNTHESIS:
171
+ print(
172
+ f"\n[synthesis] confidence={p.get('confidence', 0):.2f}",
173
+ file=self._out,
174
+ )
175
+
176
+ elif t == EventType.DONE:
177
+ print(file=self._out)
178
+ self.sep("═")
179
+ print(p.get("answer", "(no answer)"), file=self._out)
180
+ self.sep()
181
+ print(
182
+ f"Confidence: {p.get('confidence', 0):.2f} | "
183
+ f"Replans: {p.get('replan_count', 0)} | "
184
+ f"Cost: ${p.get('cost_usd', 0):.4f} | "
185
+ f"Time: {p.get('elapsed_seconds', 0):.1f}s",
186
+ file=self._out,
187
+ )
188
+
189
+ elif t == EventType.ERROR:
190
+ print(f"\n[error] {event.error}", file=sys.stderr)
191
+
192
+ # ── private helpers ───────────────────────────────────────────────────────
193
+
194
+ def _label(self, event: BusEvent) -> str:
195
+ if event.agent_id:
196
+ return f"[{event.agent_id:<{self._label_w}}]"
197
+ return f"[{event.type.value:<{self._label_w}}]"
@@ -19,6 +19,8 @@ Event lifecycle within a single goal:
19
19
  PLAN — orchestrator emitted a static DAG
20
20
  (per task in DAG)
21
21
  HUMAN_GUIDANCE? — async steering drained at top of step
22
+ CONTEXT — working-memory context budget estimate
23
+ MEMORY — working-memory compaction/summarization marker
22
24
  THOUGHT — agent's next-step reasoning
23
25
  TOKEN* — partial LLM output (only when client streams)
24
26
  ACTION — agent chose a tool + args
@@ -47,6 +49,8 @@ class EventType(str, Enum):
47
49
  TOKEN = "token"
48
50
  ACTION = "action"
49
51
  OBSERVATION = "observation"
52
+ CONTEXT = "context"
53
+ MEMORY = "memory"
50
54
  HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
51
55
  TASK_DONE = "task_done"
52
56
  REPLAN = "replan"