react-agent-harness 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {react_agent_harness-0.4.0/react_agent_harness.egg-info → react_agent_harness-0.5.0}/PKG-INFO +1 -1
  2. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/agents/base.py +36 -0
  3. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/console.py +31 -0
  4. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/events.py +4 -0
  5. react_agent_harness-0.5.0/harness/llm/anthropic.py +242 -0
  6. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/claude_code.py +42 -10
  7. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/working.py +17 -0
  8. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/pyproject.toml +1 -1
  9. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
  10. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt +3 -0
  11. react_agent_harness-0.5.0/tests/test_anthropic_llm.py +401 -0
  12. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_claude_code_llm.py +115 -2
  13. react_agent_harness-0.5.0/tests/test_console_renderer.py +52 -0
  14. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_streaming.py +1 -0
  15. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_working_memory.py +15 -0
  16. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/LICENSE +0 -0
  17. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/README.md +0 -0
  18. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/agents/__init__.py +0 -0
  19. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/__init__.py +0 -0
  20. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/annotation.py +0 -0
  21. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/checkpoint.py +0 -0
  22. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/cli.py +0 -0
  23. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/executor_bridge.py +0 -0
  24. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/hitl.py +0 -0
  25. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/__init__.py +0 -0
  26. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/_streaming.py +0 -0
  27. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/auth.py +0 -0
  28. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/openai.py +0 -0
  29. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/openai_codex.py +0 -0
  30. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/otel.py +0 -0
  31. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/runtime.py +0 -0
  32. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/steering.py +0 -0
  33. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/tool_policy.py +0 -0
  34. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/utils.py +0 -0
  35. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/__init__.py +0 -0
  36. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/episodic_lance.py +0 -0
  37. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/manager.py +0 -0
  38. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/redis_store.py +0 -0
  39. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/stores.py +0 -0
  40. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/orchestrator/__init__.py +0 -0
  41. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/orchestrator/planner.py +0 -0
  42. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
  43. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
  44. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/requires.txt +0 -0
  45. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/top_level.txt +0 -0
  46. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/setup.cfg +0 -0
  47. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_agents_base.py +0 -0
  48. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_annotation.py +0 -0
  49. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_checkpoint_resume.py +0 -0
  50. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_cli.py +0 -0
  51. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_executor_bridge.py +0 -0
  52. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_http_fetch.py +0 -0
  53. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_llm_auth.py +0 -0
  54. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_mcp_adapter.py +0 -0
  55. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_mcp_auth.py +0 -0
  56. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_memory.py +0 -0
  57. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_openai_codex_llm.py +0 -0
  58. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_openai_llm.py +0 -0
  59. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_orchestrator.py +0 -0
  60. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_otel.py +0 -0
  61. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_parse_action_json.py +0 -0
  62. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_redis_store.py +0 -0
  63. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_steering.py +0 -0
  64. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_tool_policy.py +0 -0
  65. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_utils.py +0 -0
  66. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_vision.py +0 -0
  67. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/__init__.py +0 -0
  68. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/__init__.py +0 -0
  69. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/fetch_image.py +0 -0
  70. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/http_fetch.py +0 -0
  71. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/__init__.py +0 -0
  72. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/adapter.py +0 -0
  73. {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/auth.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
@@ -381,6 +381,8 @@ class BaseAgent:
381
381
  elif thought_event.type == EventType.THOUGHT:
382
382
  response = thought_event.payload.get("response")
383
383
  yield thought_event
384
+ else:
385
+ yield thought_event
384
386
 
385
387
  if response is None:
386
388
  reason = self._last_think_error or "LLM returned unparseable response"
@@ -642,6 +644,14 @@ class BaseAgent:
642
644
  """
643
645
  messages = self._working_memory.get_messages()
644
646
  accumulated = ""
647
+ before_usage = self._working_memory.context_usage()
648
+ before_summarizations = self._working_memory.summarization_count
649
+
650
+ yield BusEvent(
651
+ type=EventType.CONTEXT,
652
+ agent_id=self.config.agent_id,
653
+ payload=before_usage,
654
+ )
645
655
 
646
656
  try:
647
657
  if hasattr(self._llm, "stream_complete"):
@@ -686,6 +696,32 @@ class BaseAgent:
686
696
  if response is not None:
687
697
  self._last_think_error = None
688
698
 
699
+ after_usage = self._working_memory.context_usage()
700
+ if self._working_memory.summarization_count > before_summarizations:
701
+ yield BusEvent(
702
+ type=EventType.MEMORY,
703
+ agent_id=self.config.agent_id,
704
+ payload={
705
+ "event": "summarized",
706
+ "before": before_usage,
707
+ "after": after_usage,
708
+ "summarizations": self._working_memory.summarization_count,
709
+ },
710
+ )
711
+ llm_usage = getattr(self._llm, "last_usage", None) or {}
712
+ if llm_usage or after_usage != before_usage:
713
+ yield BusEvent(
714
+ type=EventType.CONTEXT,
715
+ agent_id=self.config.agent_id,
716
+ payload={
717
+ **after_usage,
718
+ "tokens_in": llm_usage.get("tokens_in"),
719
+ "tokens_out": llm_usage.get("tokens_out"),
720
+ "cache_read_tokens": llm_usage.get("cache_read_tokens"),
721
+ "cache_creation_tokens": llm_usage.get("cache_creation_tokens"),
722
+ },
723
+ )
724
+
689
725
  yield BusEvent(
690
726
  type=EventType.THOUGHT,
691
727
  agent_id=self.config.agent_id,
@@ -117,6 +117,37 @@ class ConsoleRenderer:
117
117
  file=self._out,
118
118
  )
119
119
 
120
+ elif t == EventType.CONTEXT:
121
+ tokens = int(p.get("tokens") or 0)
122
+ max_tokens = int(p.get("max_tokens") or 0)
123
+ pct = float(p.get("percent") or 0.0) * 100
124
+ level = p.get("level") or "normal"
125
+ suffix = "" if level == "normal" else f" {level}"
126
+ llm_parts: list[str] = []
127
+ if p.get("tokens_in") is not None:
128
+ llm_parts.append(f"in={int(p['tokens_in']):,}")
129
+ if p.get("tokens_out") is not None:
130
+ llm_parts.append(f"out={int(p['tokens_out']):,}")
131
+ if p.get("cache_read_tokens"):
132
+ llm_parts.append(f"cache_hit={int(p['cache_read_tokens']):,}")
133
+ if p.get("cache_creation_tokens"):
134
+ llm_parts.append(f"cache_new={int(p['cache_creation_tokens']):,}")
135
+ llm_suffix = f" [{' '.join(llm_parts)}]" if llm_parts else ""
136
+ print(
137
+ f"{self._label(event)} ctx {tokens:,} / {max_tokens:,} tokens "
138
+ f"{pct:.0f}%{suffix}{llm_suffix}",
139
+ file=self._out,
140
+ )
141
+
142
+ elif t == EventType.MEMORY:
143
+ before = p.get("before") if isinstance(p.get("before"), dict) else {}
144
+ after = p.get("after") if isinstance(p.get("after"), dict) else {}
145
+ print(
146
+ f"{self._label(event)} memory summarized "
147
+ f"{int(before.get('tokens') or 0):,} -> {int(after.get('tokens') or 0):,} tokens",
148
+ file=self._out,
149
+ )
150
+
120
151
  elif t == EventType.HUMAN_GUIDANCE:
121
152
  print(
122
153
  f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
@@ -19,6 +19,8 @@ Event lifecycle within a single goal:
19
19
  PLAN — orchestrator emitted a static DAG
20
20
  (per task in DAG)
21
21
  HUMAN_GUIDANCE? — async steering drained at top of step
22
+ CONTEXT — working-memory context budget estimate
23
+ MEMORY — working-memory compaction/summarization marker
22
24
  THOUGHT — agent's next-step reasoning
23
25
  TOKEN* — partial LLM output (only when client streams)
24
26
  ACTION — agent chose a tool + args
@@ -47,6 +49,8 @@ class EventType(str, Enum):
47
49
  TOKEN = "token"
48
50
  ACTION = "action"
49
51
  OBSERVATION = "observation"
52
+ CONTEXT = "context"
53
+ MEMORY = "memory"
50
54
  HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
51
55
  TASK_DONE = "task_done"
52
56
  REPLAN = "replan"
@@ -0,0 +1,242 @@
1
+ """
2
+ Anthropic LLM adapter (direct API key, no OAuth).
3
+
4
+ Implements the harness LLM client contract:
5
+ - async def complete(system, messages, **kwargs) -> dict
6
+ - async def stream_complete(system, messages) -> AsyncGenerator[str, None]
7
+
8
+ Prompt caching
9
+ --------------
10
+ Enabled by default (`prompt_caching=True`). When active:
11
+ - The system prompt is sent as a content-block list with `cache_control`
12
+ on the last block so Anthropic can cache the compiled KV state.
13
+ - The last user message's text block also gets `cache_control` so
14
+ multi-turn ReAct loops that share a common leading prefix cache cheaply.
15
+
16
+ Cache reads cost ~10% of normal input tokens. Callers that pass a `cost_fn`
17
+ receive `cache_read_tokens` and `cache_creation_tokens` in the usage dict so
18
+ they can apply the correct per-tier pricing.
19
+
20
+ Usage tracking
21
+ --------------
22
+ `last_usage` is populated after every call::
23
+
24
+ {
25
+ "tokens_in": int, # non-cached input tokens
26
+ "tokens_out": int, # output tokens
27
+ "cache_read_tokens": int, # tokens served from cache
28
+ "cache_creation_tokens": int, # tokens written to cache
29
+ "model": str, # model id echoed from response
30
+ }
31
+
32
+ Cost tracking
33
+ -------------
34
+ An optional `cost_fn(usage) -> float` may be supplied to convert the usage
35
+ dict to dollars. This is handy for callers that know the per-model pricing
36
+ schedule. When `set_budget(guard)` is called (typically by AgentRuntime),
37
+ the adapter forwards computed costs to the guard's `add_cost()` method.
38
+
39
+ Install:
40
+ pip install -e ".[anthropic]"
41
+
42
+ Usage:
43
+ from harness.llm.anthropic import AnthropicLLM
44
+ llm = AnthropicLLM(model="claude-sonnet-4-6") # reads ANTHROPIC_API_KEY
45
+ """
46
+
47
+ from __future__ import annotations
48
+
49
+ import logging
50
+ import os
51
+ from collections.abc import AsyncGenerator, Callable
52
+ from typing import Any
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+
57
+ class AnthropicLLM:
58
+ def __init__(
59
+ self,
60
+ *,
61
+ model: str = "claude-sonnet-4-6",
62
+ api_key: str | None = None, # falls back to ANTHROPIC_API_KEY env
63
+ max_tokens: int = 1024,
64
+ cost_fn: Callable[[dict], float] | None = None,
65
+ prompt_caching: bool = True,
66
+ ) -> None:
67
+ try:
68
+ import anthropic
69
+ except ImportError as e:
70
+ raise ImportError(
71
+ 'anthropic package not installed. Run: pip install -e ".[anthropic]"'
72
+ ) from e
73
+
74
+ resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
75
+ self._client = anthropic.AsyncAnthropic(api_key=resolved_key)
76
+ self._model = model
77
+ self._max_tokens = max_tokens
78
+ self._cost_fn = cost_fn
79
+ self._prompt_caching = prompt_caching
80
+ self._budget: Any = None
81
+ # Populated after every successful call; streaming callers read it here.
82
+ self.last_usage: dict | None = None
83
+
84
+ def set_budget(self, guard: Any) -> None:
85
+ """Inject a BudgetGuard; AgentRuntime calls this at the start of each run."""
86
+ self._budget = guard
87
+
88
+ # ── Non-streaming ──────────────────────────────────────────────────────────
89
+
90
+ async def complete(
91
+ self,
92
+ system: str | None,
93
+ messages: list[dict],
94
+ **kwargs: Any,
95
+ ) -> dict:
96
+ max_tokens = int(kwargs.pop("max_tokens", self._max_tokens))
97
+ sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
98
+ built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
99
+
100
+ request: dict[str, Any] = {
101
+ "model": self._model,
102
+ "max_tokens": max_tokens,
103
+ "messages": built_messages,
104
+ }
105
+ if sys_blocks:
106
+ request["system"] = sys_blocks
107
+
108
+ resp = await self._client.messages.create(**request)
109
+ usage = _extract_usage(resp.usage, resp.model or self._model)
110
+ cost = _compute_cost(usage, self._cost_fn)
111
+ if cost is not None:
112
+ usage["cost_usd"] = cost
113
+ self._record_cost(usage)
114
+ self.last_usage = usage
115
+
116
+ text = _collect_text(resp.content)
117
+ return {"text": text, "usage": usage}
118
+
119
+ # ── Streaming ──────────────────────────────────────────────────────────────
120
+
121
+ async def stream_complete(
122
+ self,
123
+ system: str | None,
124
+ messages: list[dict],
125
+ ) -> AsyncGenerator[str, None]:
126
+ sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
127
+ built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
128
+
129
+ request: dict[str, Any] = {
130
+ "model": self._model,
131
+ "max_tokens": self._max_tokens,
132
+ "messages": built_messages,
133
+ }
134
+ if sys_blocks:
135
+ request["system"] = sys_blocks
136
+
137
+ async with self._client.messages.stream(**request) as stream:
138
+ async for text in stream.text_stream:
139
+ yield text
140
+
141
+ final = await stream.get_final_message()
142
+ usage = _extract_usage(final.usage, final.model or self._model)
143
+ cost = _compute_cost(usage, self._cost_fn)
144
+ if cost is not None:
145
+ usage["cost_usd"] = cost
146
+ self._record_cost(usage)
147
+ self.last_usage = usage
148
+
149
+ # ── Internals ─────────────────────────────────────────────────────────────
150
+
151
+ def _record_cost(self, usage: dict) -> None:
152
+ if not self._budget:
153
+ return
154
+ cost = usage.get("cost_usd")
155
+ if cost and cost > 0:
156
+ self._budget.add_cost(cost)
157
+
158
+
159
+ # ── Module-level helpers ──────────────────────────────────────────────────────
160
+
161
+
162
+ def _system_blocks(system: str | None, *, prompt_caching: bool) -> list[dict[str, Any]]:
163
+ """Return the system param as a content-block list (or empty list for no system)."""
164
+ if not system:
165
+ return []
166
+ block: dict[str, Any] = {"type": "text", "text": system}
167
+ if prompt_caching:
168
+ block["cache_control"] = {"type": "ephemeral"}
169
+ return [block]
170
+
171
+
172
+ def _build_messages(messages: list[dict], *, prompt_caching: bool) -> list[dict[str, Any]]:
173
+ """Convert harness message dicts to Anthropic message format.
174
+
175
+ System-role messages are silently dropped (callers should pass them via
176
+ the `system` parameter). The last user message gets `cache_control` when
177
+ prompt_caching is enabled.
178
+ """
179
+ built: list[dict[str, Any]] = []
180
+ for msg in messages:
181
+ role = msg.get("role", "user")
182
+ if role == "system":
183
+ continue # consumed by caller as the system param
184
+ if role not in {"user", "assistant"}:
185
+ role = "user"
186
+ content = msg.get("content", "")
187
+ built.append(
188
+ {
189
+ "role": role,
190
+ "content": [{"type": "text", "text": str(content)}],
191
+ }
192
+ )
193
+
194
+ if prompt_caching:
195
+ _apply_last_user_cache_control(built)
196
+
197
+ return built
198
+
199
+
200
+ def _apply_last_user_cache_control(messages: list[dict]) -> None:
201
+ """Add cache_control to the last user message's single text block."""
202
+ for message in reversed(messages):
203
+ if message.get("role") != "user":
204
+ continue
205
+ content = message.get("content")
206
+ if isinstance(content, list) and len(content) == 1 and content[0].get("type") == "text":
207
+ content[0]["cache_control"] = {"type": "ephemeral"}
208
+ break
209
+
210
+
211
+ def _extract_usage(usage: Any, model: str) -> dict:
212
+ """Build the standard harness usage dict from an Anthropic usage object."""
213
+ return {
214
+ "tokens_in": getattr(usage, "input_tokens", 0),
215
+ "tokens_out": getattr(usage, "output_tokens", 0),
216
+ "cache_read_tokens": getattr(usage, "cache_read_input_tokens", 0) or 0,
217
+ "cache_creation_tokens": getattr(usage, "cache_creation_input_tokens", 0) or 0,
218
+ "model": model,
219
+ }
220
+
221
+
222
+ def _collect_text(content: Any) -> str:
223
+ """Extract plain text from an Anthropic response content list."""
224
+ if not content:
225
+ return ""
226
+ parts: list[str] = []
227
+ for block in content:
228
+ if hasattr(block, "text"):
229
+ parts.append(block.text)
230
+ elif isinstance(block, dict) and block.get("type") == "text":
231
+ parts.append(block.get("text", ""))
232
+ return "".join(parts)
233
+
234
+
235
+ def _compute_cost(usage: dict, cost_fn: Callable[[dict], float] | None) -> float | None:
236
+ if cost_fn is None:
237
+ return None
238
+ try:
239
+ return float(cost_fn(usage))
240
+ except Exception as e:
241
+ logger.warning("cost_fn raised: %s — skipping cost for this call", e)
242
+ return None
@@ -42,6 +42,7 @@ class ClaudeCodeLLM:
42
42
  http_client: Any | None = None,
43
43
  user_agent: str | None = None,
44
44
  betas: str = CLAUDE_CODE_BETAS,
45
+ prompt_caching: bool = True,
45
46
  ) -> None:
46
47
  if credential_provider is None:
47
48
  if auth_file is None:
@@ -66,6 +67,7 @@ class ClaudeCodeLLM:
66
67
  self._owns_client = http_client is None
67
68
  self._user_agent = user_agent or _default_user_agent()
68
69
  self._betas = betas
70
+ self._prompt_caching = prompt_caching
69
71
  self.last_usage: dict | None = None
70
72
 
71
73
  async def complete(
@@ -123,6 +125,7 @@ class ClaudeCodeLLM:
123
125
  messages=messages,
124
126
  max_tokens=max_tokens,
125
127
  extra=extra,
128
+ prompt_caching=self._prompt_caching,
126
129
  )
127
130
  payload["stream"] = True
128
131
  url = f"{self._base_url}/v1/messages"
@@ -142,6 +145,8 @@ class ClaudeCodeLLM:
142
145
 
143
146
  tokens_in = 0
144
147
  tokens_out = 0
148
+ cache_read_tokens = 0
149
+ cache_creation_tokens = 0
145
150
  async for _event_type, data in aiter_sse_events(response):
146
151
  if not data or data == "[DONE]":
147
152
  continue
@@ -161,6 +166,10 @@ class ClaudeCodeLLM:
161
166
  elif otype == "message_start":
162
167
  msg_usage = (obj.get("message") or {}).get("usage") or {}
163
168
  tokens_in = int(msg_usage.get("input_tokens") or 0)
169
+ cache_read_tokens = int(msg_usage.get("cache_read_input_tokens") or 0)
170
+ cache_creation_tokens = int(
171
+ msg_usage.get("cache_creation_input_tokens") or 0
172
+ )
164
173
  elif otype == "message_delta":
165
174
  delta_usage = obj.get("usage") or {}
166
175
  tokens_out = int(delta_usage.get("output_tokens") or 0)
@@ -168,6 +177,8 @@ class ClaudeCodeLLM:
168
177
  self.last_usage = {
169
178
  "tokens_in": tokens_in,
170
179
  "tokens_out": tokens_out,
180
+ "cache_read_tokens": cache_read_tokens,
181
+ "cache_creation_tokens": cache_creation_tokens,
171
182
  "total_tokens": tokens_in + tokens_out,
172
183
  "provider": "claude-code",
173
184
  }
@@ -251,6 +262,7 @@ def _build_payload(
251
262
  messages: list[dict],
252
263
  max_tokens: int,
253
264
  extra: dict[str, Any],
265
+ prompt_caching: bool = True,
254
266
  ) -> dict[str, Any]:
255
267
  instructions = system or ""
256
268
  input_messages: list[dict] = []
@@ -261,11 +273,14 @@ def _build_payload(
261
273
  instructions = f"{instructions}\n\n{text}" if instructions else text
262
274
  continue
263
275
  input_messages.append(message)
276
+ built_messages = [_message_payload(message) for message in input_messages]
277
+ if prompt_caching:
278
+ _apply_last_user_cache_control(built_messages)
264
279
  payload: dict[str, Any] = {
265
280
  "model": model,
266
281
  "max_tokens": max_tokens,
267
- "system": _system_blocks(instructions),
268
- "messages": [_message_payload(message) for message in input_messages],
282
+ "system": _system_blocks(instructions, prompt_caching=prompt_caching),
283
+ "messages": built_messages,
269
284
  }
270
285
  for key in ("temperature", "top_p", "top_k", "stop_sequences", "thinking"):
271
286
  if key in extra:
@@ -273,7 +288,7 @@ def _build_payload(
273
288
  return payload
274
289
 
275
290
 
276
- def _system_blocks(system: str | None) -> list[dict[str, Any]]:
291
+ def _system_blocks(system: str | None, *, prompt_caching: bool = True) -> list[dict[str, Any]]:
277
292
  cc_version = _resolve_cc_version()
278
293
  blocks: list[dict[str, Any]] = [
279
294
  {
@@ -286,16 +301,33 @@ def _system_blocks(system: str | None) -> list[dict[str, Any]]:
286
301
  {"type": "text", "text": CLAUDE_CODE_IDENTITY},
287
302
  ]
288
303
  if system:
289
- blocks.append(
290
- {
291
- "type": "text",
292
- "text": system,
293
- "cache_control": {"type": "ephemeral"},
294
- }
295
- )
304
+ block: dict[str, Any] = {"type": "text", "text": system}
305
+ if prompt_caching:
306
+ block["cache_control"] = {"type": "ephemeral"}
307
+ blocks.append(block)
296
308
  return blocks
297
309
 
298
310
 
311
+ def _apply_last_user_cache_control(messages: list[dict]) -> None:
312
+ """Add cache_control to the last user message's content block (string only).
313
+
314
+ This marks the current task/goal as cacheable so repeated ReAct steps
315
+ that share the same leading conversation prefix benefit from the cache.
316
+ Only mutates messages whose last user-role entry has a plain-string
317
+ content block (skips multimodal / already-list content).
318
+ """
319
+ for message in reversed(messages):
320
+ if message.get("role") != "user":
321
+ continue
322
+ content = message.get("content")
323
+ if not isinstance(content, list):
324
+ break
325
+ # content is already a list of blocks from _message_payload
326
+ if len(content) == 1 and content[0].get("type") == "text":
327
+ content[0]["cache_control"] = {"type": "ephemeral"}
328
+ break
329
+
330
+
299
331
  def _message_payload(message: dict) -> dict[str, Any]:
300
332
  role = message.get("role", "user")
301
333
  if role not in {"user", "assistant"}:
@@ -215,6 +215,23 @@ class WorkingMemory:
215
215
  def token_count(self) -> int:
216
216
  return self._token_total
217
217
 
218
+ def context_usage(self) -> dict:
219
+ percent = self._token_total / self.max_tokens if self.max_tokens > 0 else 0.0
220
+ if percent >= 0.95:
221
+ level = "critical"
222
+ elif percent >= 0.80:
223
+ level = "warning"
224
+ else:
225
+ level = "normal"
226
+ return {
227
+ "tokens": self._token_total,
228
+ "max_tokens": self.max_tokens,
229
+ "percent": percent,
230
+ "level": level,
231
+ "messages": len(self._messages),
232
+ "summarizations": self._summarization_count,
233
+ }
234
+
218
235
  def clear(self) -> None:
219
236
  self._messages.clear()
220
237
  self._token_total = 0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "react-agent-harness"
7
- version = "0.4.0"
7
+ version = "0.5.0"
8
8
  description = "Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: react-agent-harness
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
5
5
  Requires-Python: >=3.10
6
6
  License-File: LICENSE
@@ -18,6 +18,7 @@ harness/tool_policy.py
18
18
  harness/utils.py
19
19
  harness/llm/__init__.py
20
20
  harness/llm/_streaming.py
21
+ harness/llm/anthropic.py
21
22
  harness/llm/auth.py
22
23
  harness/llm/claude_code.py
23
24
  harness/llm/openai.py
@@ -38,9 +39,11 @@ react_agent_harness.egg-info/requires.txt
38
39
  react_agent_harness.egg-info/top_level.txt
39
40
  tests/test_agents_base.py
40
41
  tests/test_annotation.py
42
+ tests/test_anthropic_llm.py
41
43
  tests/test_checkpoint_resume.py
42
44
  tests/test_claude_code_llm.py
43
45
  tests/test_cli.py
46
+ tests/test_console_renderer.py
44
47
  tests/test_executor_bridge.py
45
48
  tests/test_http_fetch.py
46
49
  tests/test_llm_auth.py