react-agent-harness 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {react_agent_harness-0.7.0/react_agent_harness.egg-info → react_agent_harness-0.8.0}/PKG-INFO +1 -1
  2. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/README.md +188 -4
  3. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/agents/base.py +336 -29
  4. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/events.py +4 -0
  5. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/anthropic.py +61 -1
  6. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/claude_code.py +28 -1
  7. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/fallback.py +2 -1
  8. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/openai.py +115 -5
  9. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/openai_codex.py +48 -4
  10. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/routing.py +3 -2
  11. react_agent_harness-0.8.0/harness/persistent.py +786 -0
  12. react_agent_harness-0.8.0/harness/streaming.py +136 -0
  13. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/stores.py +105 -0
  14. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/working.py +15 -1
  15. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/pyproject.toml +1 -1
  16. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
  17. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/react_agent_harness.egg-info/SOURCES.txt +6 -0
  18. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_agents_base.py +30 -0
  19. react_agent_harness-0.8.0/tests/test_fan_in.py +94 -0
  20. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_memory.py +14 -1
  21. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_openai_codex_llm.py +42 -0
  22. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_openai_llm.py +112 -1
  23. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_parse_action_json.py +12 -0
  24. react_agent_harness-0.8.0/tests/test_persistent_agent.py +439 -0
  25. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_streaming.py +34 -0
  26. react_agent_harness-0.8.0/tests/test_subagent_tool.py +453 -0
  27. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_working_memory.py +58 -0
  28. react_agent_harness-0.8.0/tools/builtin/subagent.py +173 -0
  29. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/LICENSE +0 -0
  30. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/agents/__init__.py +0 -0
  31. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/__init__.py +0 -0
  32. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/annotation.py +0 -0
  33. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/checkpoint.py +0 -0
  34. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/cli.py +0 -0
  35. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/console.py +0 -0
  36. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/executor_bridge.py +0 -0
  37. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/hitl.py +0 -0
  38. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/__init__.py +0 -0
  39. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/_streaming.py +0 -0
  40. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/llm/auth.py +0 -0
  41. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/oauth_browser.py +0 -0
  42. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/otel.py +0 -0
  43. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/runtime.py +0 -0
  44. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/steering.py +0 -0
  45. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/tool_policy.py +0 -0
  46. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/trace.py +0 -0
  47. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/trace_viewer.py +0 -0
  48. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/harness/utils.py +0 -0
  49. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/__init__.py +0 -0
  50. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/episodic_lance.py +0 -0
  51. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/manager.py +0 -0
  52. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/memory/redis_store.py +0 -0
  53. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/orchestrator/__init__.py +0 -0
  54. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/orchestrator/planner.py +0 -0
  55. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  56. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
  57. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/react_agent_harness.egg-info/requires.txt +0 -0
  58. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  59. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/setup.cfg +0 -0
  60. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_annotation.py +0 -0
  61. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_anthropic_llm.py +0 -0
  62. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_budget_guard.py +0 -0
  63. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_checkpoint_resume.py +0 -0
  64. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_claude_code_llm.py +0 -0
  65. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_cli.py +0 -0
  66. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_console_renderer.py +0 -0
  67. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_executor_bridge.py +0 -0
  68. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_fallback_llm.py +0 -0
  69. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_http_fetch.py +0 -0
  70. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_llm_auth.py +0 -0
  71. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_mcp_adapter.py +0 -0
  72. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_mcp_auth.py +0 -0
  73. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_memory_reconciler.py +0 -0
  74. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_memory_touchpoints.py +0 -0
  75. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_oauth_browser.py +0 -0
  76. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_orchestrator.py +0 -0
  77. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_otel.py +0 -0
  78. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_per_call_site_llm.py +0 -0
  79. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_redis_store.py +0 -0
  80. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_routing_llm.py +0 -0
  81. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_steering.py +0 -0
  82. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_tool_policy.py +0 -0
  83. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_trace.py +0 -0
  84. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_utils.py +0 -0
  85. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tests/test_vision.py +0 -0
  86. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/__init__.py +0 -0
  87. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/builtin/__init__.py +0 -0
  88. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/builtin/fetch_image.py +0 -0
  89. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/builtin/http_fetch.py +0 -0
  90. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/mcp/__init__.py +0 -0
  91. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/mcp/adapter.py +0 -0
  92. {react_agent_harness-0.7.0 → react_agent_harness-0.8.0}/tools/mcp/auth.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming, cost/token budgets with per-call-site breakdown
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
@@ -55,6 +55,7 @@ harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key —
55
55
  harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
56
56
  harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
57
57
  harness/oauth_browser.py Localhost OAuth callback server + open_or_print_url — shared by MCP browser-OAuth and LLM login flows
58
+ harness/persistent.py PersistentAgent + SQLiteSessionStore — durable chat sessions around user-built agents
58
59
  orchestrator/planner.py Hybrid DAG orchestrator — plan, replan, synthesize
59
60
  agents/base.py Generic BaseAgent — ReAct loop, no subclassing needed
60
61
  memory/manager.py MemoryManager — semantic KV + episodic vector
@@ -64,8 +65,10 @@ memory/redis_store.py Redis semantic store — durable KV with TTL
64
65
  memory/stores.py InMemory stores — local dev default, no deps
65
66
  tools/builtin/http_fetch.py HTTPFetch — minimal read-only GET tool
66
67
  tools/builtin/fetch_image.py FetchImage — fetch URL and return OpenAI image_url block
68
+ tools/builtin/subagent.py SubAgentTool — expose a BaseAgent as a parent-callable streaming tool
67
69
  tools/mcp/adapter.py MCP tool adapter — stdio, SSE, streamable-HTTP transports
68
70
  tools/mcp/auth.py ApiKeyMCPAuth + BrowserOAuthMCPAuth — auth primitives for remote MCP servers
71
+ harness/streaming.py Multi-producer fan-in for parallel streaming tools (sub-agents)
69
72
  ```
70
73
 
71
74
  Execution is **streaming-primary**: every path yields `BusEvent`s for
@@ -90,6 +93,8 @@ explicit control.
90
93
  | `examples/mcp_demo.py` | Connects to an MCP filesystem server and gives the agent its tools. | `OPENAI_API_KEY`, `[openai,mcp]`, `npx` |
91
94
  | `examples/mcp_auth_demo.py` | Connects to an authenticated remote MCP server using bearer or auth-file credentials. | `OPENAI_API_KEY`, `[openai,mcp]`, `MCP_URL`, `MCP_BEARER_TOKEN` or `MCP_AUTH_PROVIDER` |
92
95
  | `examples/subscription_auth_demo.py` | Runs an agent through subscription-backed providers: direct `openai-codex` OAuth or direct `claude-code` OAuth. | `agent-harness login openai-codex` or `agent-harness login claude-code` |
96
+ | `examples/coordinator_demo.py` | Sub-agent-as-tool pattern: a `coordinator` ReAct agent delegates dynamically to `researcher` / `analyst` / `reporter` via `SubAgentTool`. Demonstrates parallel delegation through `actions: [...]`. | `OPENAI_API_KEY`, `[openai,http]` |
97
+ | `examples/persistent_agent_demo.py` | Persistent local assistant: SQLite session + semantic memory, Lance episodic memory, shell tool, and a browser researcher via `@playwright/mcp`. Supports `--provider openai`, `--provider openai-codex`, or `--provider claude-code`. | `[openai,mcp,lance]`, `OPENAI_API_KEY` or `python -m harness.cli login openai-codex` / `claude-code`, `ah-executor`, `npx` (Node 18+) |
93
98
 
94
99
  ## Adding a new domain (3 steps)
95
100
 
@@ -541,6 +546,167 @@ agents themselves still read from memory.
541
546
  If a task fails mid-run and `on_failure="replan"`, the replan call does
542
547
  go to the LLM — the bypass is for the *initial* plan only.
543
548
 
549
+ ### 5. Sub-agents as tools — `SubAgentTool`
550
+
551
+ A different decomposition model from the orchestrator. Instead of a
552
+ separate planner LLM deciding the DAG upfront, **one main agent's ReAct
553
+ loop decides per step whether to delegate** and to whom. Use when the
554
+ path is exploratory — you don't know which specialist you'll need until
555
+ you've seen partial results.
556
+
557
+ ```python
558
+ from tools.builtin.subagent import SubAgentTool
559
+
560
+ researcher = BaseAgent(config=researcher_config, ...)
561
+ analyst = BaseAgent(config=analyst_config, ...)
562
+
563
+ main_agent = BaseAgent(
564
+ config=AgentConfig(
565
+ agent_id="coordinator",
566
+ role="decides what to research and analyses results",
567
+ system_prompt="...",
568
+ allowed_tools=["delegate_research", "delegate_analyse"],
569
+ max_subagent_depth=3, # bounds the delegation chain
570
+ ),
571
+ tools={
572
+ "delegate_research": SubAgentTool(researcher),
573
+ "delegate_analyse": SubAgentTool(analyst),
574
+ },
575
+ ...
576
+ )
577
+
578
+ async for event in main_agent.run_stream(goal):
579
+ # Sub-agent THOUGHT / ACTION / OBSERVATION events bubble up tagged
580
+ # with event.parent_agent_id so renderers can indent or group them.
581
+ ...
582
+ ```
583
+
584
+ When the main agent's LLM emits `{"action": "delegate_research", "args":
585
+ {"task": "..."}}`, the wrapped sub-agent runs its own ReAct loop with a
586
+ **fresh `WorkingMemory`**; the sub's final answer becomes the main agent's
587
+ next observation. Each delegation = a fresh sub run; cross-delegation
588
+ continuity flows through long-term memory (`MemoryManager.build_context(
589
+ agent_id=…)`), not through WM carry-over — same model as the
590
+ orchestrator's per-task agents.
591
+
592
+ Parallel delegation works via the existing `actions: [...]` shape:
593
+
594
+ ```json
595
+ {
596
+ "thought": "research and analyse can run in parallel",
597
+ "actions": [
598
+ {"tool": "delegate_research", "args": {"task": "find baseline metrics"}},
599
+ {"tool": "delegate_analyse", "args": {"task": "score recent incidents"}}
600
+ ]
601
+ }
602
+ ```
603
+
604
+ Both sub-agent streams interleave through a fan-in helper
605
+ (`harness/streaming.py:fan_in`) so the parent's event stream stays a
606
+ single sequence even when multiple sub-agents are working concurrently.
607
+
608
+ **Sub-agents as tools vs. Orchestrator — which to pick?**
609
+
610
+ | | Sub-agents as tools | Orchestrator |
611
+ |---|---|---|
612
+ | Plan timing | Per step (dynamic) | Upfront DAG |
613
+ | Who plans | The main agent's LLM | A separate planner LLM |
614
+ | Best for | Exploratory work, "I don't know what I need until I see partial results" | Known workflows (audits, ETL, scheduled jobs) |
615
+ | Replan-on-failure | Implicit — main agent reacts to sub's failure | Explicit `on_failure="replan"` |
616
+ | Recursion guard | `max_subagent_depth` on `AgentConfig` | N/A — DAG is flat |
617
+
618
+ Both are first-class. Most real systems combine them — the orchestrator
619
+ plans a high-level DAG, individual tasks within it use sub-agent tools
620
+ for finer dynamic decomposition.
621
+
622
+ ### 6. Persistent chat sessions — `PersistentAgent`
623
+
624
+ `PersistentAgent` is a wrapper around a coordinator `BaseAgent`, not a new
625
+ agent constructor. Build agents, sub-agents, MCP tools, and auth exactly as
626
+ usual; then wrap the top-level coordinator to add durable chat/session state.
627
+
628
+ ```python
629
+ from harness.persistent import PersistentAgent, SQLiteSessionStore
630
+
631
+ app = PersistentAgent(
632
+ coordinator=coordinator_agent,
633
+ session_store=SQLiteSessionStore("~/.agent-harness/sessions.sqlite"),
634
+ memory=memory_manager,
635
+ llm=llm,
636
+ )
637
+
638
+ async for event in app.chat("I like the above; can you do X?", session_id="thread-1"):
639
+ renderer.render(event)
640
+ ```
641
+
642
+ Use `app.capabilities()` to inspect the already-wired coordinator,
643
+ sub-agents, and MCP tools. The demo exposes this with
644
+ `--show-capabilities`.
645
+
646
+ Each chat turn gets a fresh `WorkingMemory`. Continuity comes from the
647
+ SQLite session state (rolling summary + recent messages) and normal
648
+ `MemoryManager` recall, not from carrying old ReAct scratchpads forever.
649
+
650
+ **Prefix-cache aware.** The full prompt stays byte-identical across plain
651
+ chat turns within a compaction window. Three things make that work:
652
+
653
+ 1. The accumulated session transcript is sent to the coordinator as real
654
+ `user`/`assistant` role messages — not folded into one inline-rendered
655
+ text blob. Each turn extends the message list by exactly the previous
656
+ turn's user+assistant pair plus the new user task.
657
+ 2. Memory context (`MemoryManager.build_context` result) lives in a
658
+ pinned user-message prior, **not in the system prompt**. The system
659
+ prompt is now pure agent identity + tool list + ReAct format — purely
660
+ static. Memory context is fetched once per session, cached on the
661
+ `PersistentAgent`, and refreshed only at compaction or high-signal
662
+ reconcile (so it doesn't shift turn-to-turn just because the goal
663
+ changed).
664
+ 3. Long-term memory writes are **deferred to compaction**, not fired
665
+ periodically. The session transcript is the per-turn journal — facts
666
+ land in long-term memory only when we're already breaking cache (at
667
+ compaction or high-signal events like tool runs / "remember" terms).
668
+ The legacy `reconcile_every_turns` knob is a no-op.
669
+
670
+ OpenAI's automatic prefix cache and Anthropic's `cache_control` markers
671
+ both match on longest-identical prefix. Together these three changes let
672
+ a typical session pay one cold compaction every ~12 turns plus ~K turns
673
+ of warm-prefix hits in between, instead of paying full price every
674
+ turn.
675
+
676
+ The demo stores local state under `~/.agent-harness` by default:
677
+
678
+ - `sessions.sqlite` for chat/session state
679
+ - `memory/semantic.sqlite` for semantic facts/preferences
680
+ - `memory/lance_episodic` for searchable episodic summaries
681
+
682
+ By default the demo uses `OpenAILLM` and requires `OPENAI_API_KEY`.
683
+ To run it with stored OpenAI subscription credentials instead:
684
+
685
+ ```bash
686
+ python -m harness.cli login openai-codex
687
+ python examples/persistent_agent_demo.py --provider openai-codex
688
+ ```
689
+
690
+ Or with stored Claude Code credentials:
691
+
692
+ ```bash
693
+ python -m harness.cli login claude-code
694
+ python examples/persistent_agent_demo.py --provider claude-code
695
+ ```
696
+
697
+ The wrapper owns cadence:
698
+
699
+ - session transcript is written every turn without an LLM;
700
+ - `MemoryManager.write_run_end(...)` is called only when the turn has a
701
+ durable signal, tool/sub-agent work, errors, or the configured interval
702
+ has elapsed;
703
+ - session compaction runs at message/turn thresholds and updates the
704
+ stored summary.
705
+
706
+ For MCP, put MCP tools on the coordinator or sub-agents before wrapping.
707
+ `PersistentAgent` does not special-case MCP auth; it preserves the existing
708
+ tool wiring model.
709
+
544
710
  ---
545
711
 
546
712
  Event types by path:
@@ -600,9 +766,27 @@ a string to `n` characters with a trailing `…`.
600
766
 
601
767
  ## Working memory budget
602
768
 
603
- `AgentConfig.working_memory_max_tokens` controls per-agent eviction (default
604
- `8000`). Counting defaults to a `chars/4` heuristic (stable for code/JSON/text
605
- within ~10–20% of real BPE counts, zero deps). For exact counts plug your own
769
+ `AgentConfig.working_memory_max_tokens` controls per-agent eviction. **Default
770
+ is `None` auto-derived from the LLM's context window at runtime** so the
771
+ threshold adapts when you swap models (a 128K `gpt-5.4-mini` gets ~99K of WM
772
+ headroom; a 200K `claude-sonnet-4-6` gets ~159K; a tiny 8K fallback gets ~6K).
773
+ Concretely the WM compacts at `0.8 × llm.input_token_budget`, leaving 20%
774
+ headroom for system prompt, memory context, tool schemas, and tokeniser
775
+ variance.
776
+
777
+ Each shipped adapter (`OpenAILLM`, `AnthropicLLM`, `ClaudeCodeLLM`,
778
+ `OpenAICodexLLM`) exposes `context_window` and `input_token_budget`
779
+ properties driven by a per-provider lookup table. For models the table
780
+ doesn't know (new releases, fine-tunes), pass `context_window=N` explicitly:
781
+
782
+ ```python
783
+ llm = OpenAILLM(model="gpt-6-preview", context_window=256_000)
784
+ # or hard-cap WM independent of the LLM:
785
+ AgentConfig(..., working_memory_max_tokens=16_000)
786
+ ```
787
+
788
+ Counting defaults to a `chars/4` heuristic (stable for code/JSON/text within
789
+ ~10–20% of real BPE counts, zero deps). For exact counts plug your own
606
790
  counter into `WorkingMemory` directly:
607
791
 
608
792
  ```python
@@ -1376,7 +1560,7 @@ agents alongside HITL on the shell tool.
1376
1560
  | `max_wall_time_seconds` | (guardrail) | See `GuardrailConfig` |
1377
1561
  | `memory_context_enabled` | `True` | Prepend relevant long-term memory to the system prompt |
1378
1562
  | `confidence_from_llm` | `True` | Use the `confidence` field from the LLM response; set `False` to always return `1.0` |
1379
- | `working_memory_max_tokens` | `8000` | Token budget for in-context working memory before rolling summarisation kicks in |
1563
+ | `working_memory_max_tokens` | `None` (auto-derive from `llm.input_token_budget × 0.8`; pass an int to hard-cap) | Token budget for in-context working memory before rolling summarisation kicks in |
1380
1564
  | `hitl_tools` | `[]` | Tool names that require human approval before execution |
1381
1565
  | `checkpoint_every` | `0` | Write a crash-resumable checkpoint every N steps; `0` disables periodic checkpoints |
1382
1566
  | `stream_tokens` | `False` | Emit `TOKEN` events as the LLM streams. Disabled by default — enable if you want to render partial output in real time: `AgentConfig(..., stream_tokens=True)` |