react-agent-harness 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.4.0/react_agent_harness.egg-info → react_agent_harness-0.5.0}/PKG-INFO +1 -1
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/agents/base.py +36 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/console.py +31 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/events.py +4 -0
- react_agent_harness-0.5.0/harness/llm/anthropic.py +242 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/claude_code.py +42 -10
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/working.py +17 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/pyproject.toml +1 -1
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0/react_agent_harness.egg-info}/PKG-INFO +1 -1
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt +3 -0
- react_agent_harness-0.5.0/tests/test_anthropic_llm.py +401 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_claude_code_llm.py +115 -2
- react_agent_harness-0.5.0/tests/test_console_renderer.py +52 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_streaming.py +1 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_working_memory.py +15 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/LICENSE +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/README.md +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/agents/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/annotation.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/checkpoint.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/cli.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/hitl.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/_streaming.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/auth.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/openai.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/openai_codex.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/otel.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/runtime.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/steering.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/tool_policy.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/utils.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/manager.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/redis_store.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/stores.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/orchestrator/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/orchestrator/planner.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/entry_points.txt +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/requires.txt +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/setup.cfg +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_checkpoint_resume.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_cli.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_llm_auth.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_mcp_adapter.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_mcp_auth.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_memory.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_openai_codex_llm.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_openai_llm.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_orchestrator.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_otel.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_steering.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_tool_policy.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_utils.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tests/test_vision.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/builtin/http_fetch.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/__init__.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/adapter.py +0 -0
- {react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/tools/mcp/auth.py +0 -0
|
@@ -381,6 +381,8 @@ class BaseAgent:
|
|
|
381
381
|
elif thought_event.type == EventType.THOUGHT:
|
|
382
382
|
response = thought_event.payload.get("response")
|
|
383
383
|
yield thought_event
|
|
384
|
+
else:
|
|
385
|
+
yield thought_event
|
|
384
386
|
|
|
385
387
|
if response is None:
|
|
386
388
|
reason = self._last_think_error or "LLM returned unparseable response"
|
|
@@ -642,6 +644,14 @@ class BaseAgent:
|
|
|
642
644
|
"""
|
|
643
645
|
messages = self._working_memory.get_messages()
|
|
644
646
|
accumulated = ""
|
|
647
|
+
before_usage = self._working_memory.context_usage()
|
|
648
|
+
before_summarizations = self._working_memory.summarization_count
|
|
649
|
+
|
|
650
|
+
yield BusEvent(
|
|
651
|
+
type=EventType.CONTEXT,
|
|
652
|
+
agent_id=self.config.agent_id,
|
|
653
|
+
payload=before_usage,
|
|
654
|
+
)
|
|
645
655
|
|
|
646
656
|
try:
|
|
647
657
|
if hasattr(self._llm, "stream_complete"):
|
|
@@ -686,6 +696,32 @@ class BaseAgent:
|
|
|
686
696
|
if response is not None:
|
|
687
697
|
self._last_think_error = None
|
|
688
698
|
|
|
699
|
+
after_usage = self._working_memory.context_usage()
|
|
700
|
+
if self._working_memory.summarization_count > before_summarizations:
|
|
701
|
+
yield BusEvent(
|
|
702
|
+
type=EventType.MEMORY,
|
|
703
|
+
agent_id=self.config.agent_id,
|
|
704
|
+
payload={
|
|
705
|
+
"event": "summarized",
|
|
706
|
+
"before": before_usage,
|
|
707
|
+
"after": after_usage,
|
|
708
|
+
"summarizations": self._working_memory.summarization_count,
|
|
709
|
+
},
|
|
710
|
+
)
|
|
711
|
+
llm_usage = getattr(self._llm, "last_usage", None) or {}
|
|
712
|
+
if llm_usage or after_usage != before_usage:
|
|
713
|
+
yield BusEvent(
|
|
714
|
+
type=EventType.CONTEXT,
|
|
715
|
+
agent_id=self.config.agent_id,
|
|
716
|
+
payload={
|
|
717
|
+
**after_usage,
|
|
718
|
+
"tokens_in": llm_usage.get("tokens_in"),
|
|
719
|
+
"tokens_out": llm_usage.get("tokens_out"),
|
|
720
|
+
"cache_read_tokens": llm_usage.get("cache_read_tokens"),
|
|
721
|
+
"cache_creation_tokens": llm_usage.get("cache_creation_tokens"),
|
|
722
|
+
},
|
|
723
|
+
)
|
|
724
|
+
|
|
689
725
|
yield BusEvent(
|
|
690
726
|
type=EventType.THOUGHT,
|
|
691
727
|
agent_id=self.config.agent_id,
|
|
@@ -117,6 +117,37 @@ class ConsoleRenderer:
|
|
|
117
117
|
file=self._out,
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
+
elif t == EventType.CONTEXT:
|
|
121
|
+
tokens = int(p.get("tokens") or 0)
|
|
122
|
+
max_tokens = int(p.get("max_tokens") or 0)
|
|
123
|
+
pct = float(p.get("percent") or 0.0) * 100
|
|
124
|
+
level = p.get("level") or "normal"
|
|
125
|
+
suffix = "" if level == "normal" else f" {level}"
|
|
126
|
+
llm_parts: list[str] = []
|
|
127
|
+
if p.get("tokens_in") is not None:
|
|
128
|
+
llm_parts.append(f"in={int(p['tokens_in']):,}")
|
|
129
|
+
if p.get("tokens_out") is not None:
|
|
130
|
+
llm_parts.append(f"out={int(p['tokens_out']):,}")
|
|
131
|
+
if p.get("cache_read_tokens"):
|
|
132
|
+
llm_parts.append(f"cache_hit={int(p['cache_read_tokens']):,}")
|
|
133
|
+
if p.get("cache_creation_tokens"):
|
|
134
|
+
llm_parts.append(f"cache_new={int(p['cache_creation_tokens']):,}")
|
|
135
|
+
llm_suffix = f" [{' '.join(llm_parts)}]" if llm_parts else ""
|
|
136
|
+
print(
|
|
137
|
+
f"{self._label(event)} ctx {tokens:,} / {max_tokens:,} tokens "
|
|
138
|
+
f"{pct:.0f}%{suffix}{llm_suffix}",
|
|
139
|
+
file=self._out,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
elif t == EventType.MEMORY:
|
|
143
|
+
before = p.get("before") if isinstance(p.get("before"), dict) else {}
|
|
144
|
+
after = p.get("after") if isinstance(p.get("after"), dict) else {}
|
|
145
|
+
print(
|
|
146
|
+
f"{self._label(event)} memory summarized "
|
|
147
|
+
f"{int(before.get('tokens') or 0):,} -> {int(after.get('tokens') or 0):,} tokens",
|
|
148
|
+
file=self._out,
|
|
149
|
+
)
|
|
150
|
+
|
|
120
151
|
elif t == EventType.HUMAN_GUIDANCE:
|
|
121
152
|
print(
|
|
122
153
|
f"\n{self._label(event)} ▶ steered step={p.get('step')} text={p.get('text')!r}",
|
|
@@ -19,6 +19,8 @@ Event lifecycle within a single goal:
|
|
|
19
19
|
PLAN — orchestrator emitted a static DAG
|
|
20
20
|
(per task in DAG)
|
|
21
21
|
HUMAN_GUIDANCE? — async steering drained at top of step
|
|
22
|
+
CONTEXT — working-memory context budget estimate
|
|
23
|
+
MEMORY — working-memory compaction/summarization marker
|
|
22
24
|
THOUGHT — agent's next-step reasoning
|
|
23
25
|
TOKEN* — partial LLM output (only when client streams)
|
|
24
26
|
ACTION — agent chose a tool + args
|
|
@@ -47,6 +49,8 @@ class EventType(str, Enum):
|
|
|
47
49
|
TOKEN = "token"
|
|
48
50
|
ACTION = "action"
|
|
49
51
|
OBSERVATION = "observation"
|
|
52
|
+
CONTEXT = "context"
|
|
53
|
+
MEMORY = "memory"
|
|
50
54
|
HUMAN_GUIDANCE = "human_guidance" # async steering injected at step boundary
|
|
51
55
|
TASK_DONE = "task_done"
|
|
52
56
|
REPLAN = "replan"
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Anthropic LLM adapter (direct API key, no OAuth).
|
|
3
|
+
|
|
4
|
+
Implements the harness LLM client contract:
|
|
5
|
+
- async def complete(system, messages, **kwargs) -> dict
|
|
6
|
+
- async def stream_complete(system, messages) -> AsyncGenerator[str, None]
|
|
7
|
+
|
|
8
|
+
Prompt caching
|
|
9
|
+
--------------
|
|
10
|
+
Enabled by default (`prompt_caching=True`). When active:
|
|
11
|
+
- The system prompt is sent as a content-block list with `cache_control`
|
|
12
|
+
on the last block so Anthropic can cache the compiled KV state.
|
|
13
|
+
- The last user message's text block also gets `cache_control` so
|
|
14
|
+
multi-turn ReAct loops that share a common leading prefix cache cheaply.
|
|
15
|
+
|
|
16
|
+
Cache reads cost ~10% of normal input tokens. Callers that pass a `cost_fn`
|
|
17
|
+
receive `cache_read_tokens` and `cache_creation_tokens` in the usage dict so
|
|
18
|
+
they can apply the correct per-tier pricing.
|
|
19
|
+
|
|
20
|
+
Usage tracking
|
|
21
|
+
--------------
|
|
22
|
+
`last_usage` is populated after every call::
|
|
23
|
+
|
|
24
|
+
{
|
|
25
|
+
"tokens_in": int, # non-cached input tokens
|
|
26
|
+
"tokens_out": int, # output tokens
|
|
27
|
+
"cache_read_tokens": int, # tokens served from cache
|
|
28
|
+
"cache_creation_tokens": int, # tokens written to cache
|
|
29
|
+
"model": str, # model id echoed from response
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
Cost tracking
|
|
33
|
+
-------------
|
|
34
|
+
An optional `cost_fn(usage) -> float` may be supplied to convert the usage
|
|
35
|
+
dict to dollars. This is handy for callers that know the per-model pricing
|
|
36
|
+
schedule. When `set_budget(guard)` is called (typically by AgentRuntime),
|
|
37
|
+
the adapter forwards computed costs to the guard's `add_cost()` method.
|
|
38
|
+
|
|
39
|
+
Install:
|
|
40
|
+
pip install -e ".[anthropic]"
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
from harness.llm.anthropic import AnthropicLLM
|
|
44
|
+
llm = AnthropicLLM(model="claude-sonnet-4-6") # reads ANTHROPIC_API_KEY
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
from __future__ import annotations
|
|
48
|
+
|
|
49
|
+
import logging
|
|
50
|
+
import os
|
|
51
|
+
from collections.abc import AsyncGenerator, Callable
|
|
52
|
+
from typing import Any
|
|
53
|
+
|
|
54
|
+
logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AnthropicLLM:
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
*,
|
|
61
|
+
model: str = "claude-sonnet-4-6",
|
|
62
|
+
api_key: str | None = None, # falls back to ANTHROPIC_API_KEY env
|
|
63
|
+
max_tokens: int = 1024,
|
|
64
|
+
cost_fn: Callable[[dict], float] | None = None,
|
|
65
|
+
prompt_caching: bool = True,
|
|
66
|
+
) -> None:
|
|
67
|
+
try:
|
|
68
|
+
import anthropic
|
|
69
|
+
except ImportError as e:
|
|
70
|
+
raise ImportError(
|
|
71
|
+
'anthropic package not installed. Run: pip install -e ".[anthropic]"'
|
|
72
|
+
) from e
|
|
73
|
+
|
|
74
|
+
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
75
|
+
self._client = anthropic.AsyncAnthropic(api_key=resolved_key)
|
|
76
|
+
self._model = model
|
|
77
|
+
self._max_tokens = max_tokens
|
|
78
|
+
self._cost_fn = cost_fn
|
|
79
|
+
self._prompt_caching = prompt_caching
|
|
80
|
+
self._budget: Any = None
|
|
81
|
+
# Populated after every successful call; streaming callers read it here.
|
|
82
|
+
self.last_usage: dict | None = None
|
|
83
|
+
|
|
84
|
+
def set_budget(self, guard: Any) -> None:
|
|
85
|
+
"""Inject a BudgetGuard; AgentRuntime calls this at the start of each run."""
|
|
86
|
+
self._budget = guard
|
|
87
|
+
|
|
88
|
+
# ── Non-streaming ──────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
async def complete(
|
|
91
|
+
self,
|
|
92
|
+
system: str | None,
|
|
93
|
+
messages: list[dict],
|
|
94
|
+
**kwargs: Any,
|
|
95
|
+
) -> dict:
|
|
96
|
+
max_tokens = int(kwargs.pop("max_tokens", self._max_tokens))
|
|
97
|
+
sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
|
|
98
|
+
built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
|
|
99
|
+
|
|
100
|
+
request: dict[str, Any] = {
|
|
101
|
+
"model": self._model,
|
|
102
|
+
"max_tokens": max_tokens,
|
|
103
|
+
"messages": built_messages,
|
|
104
|
+
}
|
|
105
|
+
if sys_blocks:
|
|
106
|
+
request["system"] = sys_blocks
|
|
107
|
+
|
|
108
|
+
resp = await self._client.messages.create(**request)
|
|
109
|
+
usage = _extract_usage(resp.usage, resp.model or self._model)
|
|
110
|
+
cost = _compute_cost(usage, self._cost_fn)
|
|
111
|
+
if cost is not None:
|
|
112
|
+
usage["cost_usd"] = cost
|
|
113
|
+
self._record_cost(usage)
|
|
114
|
+
self.last_usage = usage
|
|
115
|
+
|
|
116
|
+
text = _collect_text(resp.content)
|
|
117
|
+
return {"text": text, "usage": usage}
|
|
118
|
+
|
|
119
|
+
# ── Streaming ──────────────────────────────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
async def stream_complete(
|
|
122
|
+
self,
|
|
123
|
+
system: str | None,
|
|
124
|
+
messages: list[dict],
|
|
125
|
+
) -> AsyncGenerator[str, None]:
|
|
126
|
+
sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
|
|
127
|
+
built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
|
|
128
|
+
|
|
129
|
+
request: dict[str, Any] = {
|
|
130
|
+
"model": self._model,
|
|
131
|
+
"max_tokens": self._max_tokens,
|
|
132
|
+
"messages": built_messages,
|
|
133
|
+
}
|
|
134
|
+
if sys_blocks:
|
|
135
|
+
request["system"] = sys_blocks
|
|
136
|
+
|
|
137
|
+
async with self._client.messages.stream(**request) as stream:
|
|
138
|
+
async for text in stream.text_stream:
|
|
139
|
+
yield text
|
|
140
|
+
|
|
141
|
+
final = await stream.get_final_message()
|
|
142
|
+
usage = _extract_usage(final.usage, final.model or self._model)
|
|
143
|
+
cost = _compute_cost(usage, self._cost_fn)
|
|
144
|
+
if cost is not None:
|
|
145
|
+
usage["cost_usd"] = cost
|
|
146
|
+
self._record_cost(usage)
|
|
147
|
+
self.last_usage = usage
|
|
148
|
+
|
|
149
|
+
# ── Internals ─────────────────────────────────────────────────────────────
|
|
150
|
+
|
|
151
|
+
def _record_cost(self, usage: dict) -> None:
|
|
152
|
+
if not self._budget:
|
|
153
|
+
return
|
|
154
|
+
cost = usage.get("cost_usd")
|
|
155
|
+
if cost and cost > 0:
|
|
156
|
+
self._budget.add_cost(cost)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ── Module-level helpers ──────────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _system_blocks(system: str | None, *, prompt_caching: bool) -> list[dict[str, Any]]:
|
|
163
|
+
"""Return the system param as a content-block list (or empty list for no system)."""
|
|
164
|
+
if not system:
|
|
165
|
+
return []
|
|
166
|
+
block: dict[str, Any] = {"type": "text", "text": system}
|
|
167
|
+
if prompt_caching:
|
|
168
|
+
block["cache_control"] = {"type": "ephemeral"}
|
|
169
|
+
return [block]
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _build_messages(messages: list[dict], *, prompt_caching: bool) -> list[dict[str, Any]]:
|
|
173
|
+
"""Convert harness message dicts to Anthropic message format.
|
|
174
|
+
|
|
175
|
+
System-role messages are silently dropped (callers should pass them via
|
|
176
|
+
the `system` parameter). The last user message gets `cache_control` when
|
|
177
|
+
prompt_caching is enabled.
|
|
178
|
+
"""
|
|
179
|
+
built: list[dict[str, Any]] = []
|
|
180
|
+
for msg in messages:
|
|
181
|
+
role = msg.get("role", "user")
|
|
182
|
+
if role == "system":
|
|
183
|
+
continue # consumed by caller as the system param
|
|
184
|
+
if role not in {"user", "assistant"}:
|
|
185
|
+
role = "user"
|
|
186
|
+
content = msg.get("content", "")
|
|
187
|
+
built.append(
|
|
188
|
+
{
|
|
189
|
+
"role": role,
|
|
190
|
+
"content": [{"type": "text", "text": str(content)}],
|
|
191
|
+
}
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if prompt_caching:
|
|
195
|
+
_apply_last_user_cache_control(built)
|
|
196
|
+
|
|
197
|
+
return built
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _apply_last_user_cache_control(messages: list[dict]) -> None:
|
|
201
|
+
"""Add cache_control to the last user message's single text block."""
|
|
202
|
+
for message in reversed(messages):
|
|
203
|
+
if message.get("role") != "user":
|
|
204
|
+
continue
|
|
205
|
+
content = message.get("content")
|
|
206
|
+
if isinstance(content, list) and len(content) == 1 and content[0].get("type") == "text":
|
|
207
|
+
content[0]["cache_control"] = {"type": "ephemeral"}
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _extract_usage(usage: Any, model: str) -> dict:
|
|
212
|
+
"""Build the standard harness usage dict from an Anthropic usage object."""
|
|
213
|
+
return {
|
|
214
|
+
"tokens_in": getattr(usage, "input_tokens", 0),
|
|
215
|
+
"tokens_out": getattr(usage, "output_tokens", 0),
|
|
216
|
+
"cache_read_tokens": getattr(usage, "cache_read_input_tokens", 0) or 0,
|
|
217
|
+
"cache_creation_tokens": getattr(usage, "cache_creation_input_tokens", 0) or 0,
|
|
218
|
+
"model": model,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _collect_text(content: Any) -> str:
|
|
223
|
+
"""Extract plain text from an Anthropic response content list."""
|
|
224
|
+
if not content:
|
|
225
|
+
return ""
|
|
226
|
+
parts: list[str] = []
|
|
227
|
+
for block in content:
|
|
228
|
+
if hasattr(block, "text"):
|
|
229
|
+
parts.append(block.text)
|
|
230
|
+
elif isinstance(block, dict) and block.get("type") == "text":
|
|
231
|
+
parts.append(block.get("text", ""))
|
|
232
|
+
return "".join(parts)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _compute_cost(usage: dict, cost_fn: Callable[[dict], float] | None) -> float | None:
|
|
236
|
+
if cost_fn is None:
|
|
237
|
+
return None
|
|
238
|
+
try:
|
|
239
|
+
return float(cost_fn(usage))
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning("cost_fn raised: %s — skipping cost for this call", e)
|
|
242
|
+
return None
|
|
@@ -42,6 +42,7 @@ class ClaudeCodeLLM:
|
|
|
42
42
|
http_client: Any | None = None,
|
|
43
43
|
user_agent: str | None = None,
|
|
44
44
|
betas: str = CLAUDE_CODE_BETAS,
|
|
45
|
+
prompt_caching: bool = True,
|
|
45
46
|
) -> None:
|
|
46
47
|
if credential_provider is None:
|
|
47
48
|
if auth_file is None:
|
|
@@ -66,6 +67,7 @@ class ClaudeCodeLLM:
|
|
|
66
67
|
self._owns_client = http_client is None
|
|
67
68
|
self._user_agent = user_agent or _default_user_agent()
|
|
68
69
|
self._betas = betas
|
|
70
|
+
self._prompt_caching = prompt_caching
|
|
69
71
|
self.last_usage: dict | None = None
|
|
70
72
|
|
|
71
73
|
async def complete(
|
|
@@ -123,6 +125,7 @@ class ClaudeCodeLLM:
|
|
|
123
125
|
messages=messages,
|
|
124
126
|
max_tokens=max_tokens,
|
|
125
127
|
extra=extra,
|
|
128
|
+
prompt_caching=self._prompt_caching,
|
|
126
129
|
)
|
|
127
130
|
payload["stream"] = True
|
|
128
131
|
url = f"{self._base_url}/v1/messages"
|
|
@@ -142,6 +145,8 @@ class ClaudeCodeLLM:
|
|
|
142
145
|
|
|
143
146
|
tokens_in = 0
|
|
144
147
|
tokens_out = 0
|
|
148
|
+
cache_read_tokens = 0
|
|
149
|
+
cache_creation_tokens = 0
|
|
145
150
|
async for _event_type, data in aiter_sse_events(response):
|
|
146
151
|
if not data or data == "[DONE]":
|
|
147
152
|
continue
|
|
@@ -161,6 +166,10 @@ class ClaudeCodeLLM:
|
|
|
161
166
|
elif otype == "message_start":
|
|
162
167
|
msg_usage = (obj.get("message") or {}).get("usage") or {}
|
|
163
168
|
tokens_in = int(msg_usage.get("input_tokens") or 0)
|
|
169
|
+
cache_read_tokens = int(msg_usage.get("cache_read_input_tokens") or 0)
|
|
170
|
+
cache_creation_tokens = int(
|
|
171
|
+
msg_usage.get("cache_creation_input_tokens") or 0
|
|
172
|
+
)
|
|
164
173
|
elif otype == "message_delta":
|
|
165
174
|
delta_usage = obj.get("usage") or {}
|
|
166
175
|
tokens_out = int(delta_usage.get("output_tokens") or 0)
|
|
@@ -168,6 +177,8 @@ class ClaudeCodeLLM:
|
|
|
168
177
|
self.last_usage = {
|
|
169
178
|
"tokens_in": tokens_in,
|
|
170
179
|
"tokens_out": tokens_out,
|
|
180
|
+
"cache_read_tokens": cache_read_tokens,
|
|
181
|
+
"cache_creation_tokens": cache_creation_tokens,
|
|
171
182
|
"total_tokens": tokens_in + tokens_out,
|
|
172
183
|
"provider": "claude-code",
|
|
173
184
|
}
|
|
@@ -251,6 +262,7 @@ def _build_payload(
|
|
|
251
262
|
messages: list[dict],
|
|
252
263
|
max_tokens: int,
|
|
253
264
|
extra: dict[str, Any],
|
|
265
|
+
prompt_caching: bool = True,
|
|
254
266
|
) -> dict[str, Any]:
|
|
255
267
|
instructions = system or ""
|
|
256
268
|
input_messages: list[dict] = []
|
|
@@ -261,11 +273,14 @@ def _build_payload(
|
|
|
261
273
|
instructions = f"{instructions}\n\n{text}" if instructions else text
|
|
262
274
|
continue
|
|
263
275
|
input_messages.append(message)
|
|
276
|
+
built_messages = [_message_payload(message) for message in input_messages]
|
|
277
|
+
if prompt_caching:
|
|
278
|
+
_apply_last_user_cache_control(built_messages)
|
|
264
279
|
payload: dict[str, Any] = {
|
|
265
280
|
"model": model,
|
|
266
281
|
"max_tokens": max_tokens,
|
|
267
|
-
"system": _system_blocks(instructions),
|
|
268
|
-
"messages":
|
|
282
|
+
"system": _system_blocks(instructions, prompt_caching=prompt_caching),
|
|
283
|
+
"messages": built_messages,
|
|
269
284
|
}
|
|
270
285
|
for key in ("temperature", "top_p", "top_k", "stop_sequences", "thinking"):
|
|
271
286
|
if key in extra:
|
|
@@ -273,7 +288,7 @@ def _build_payload(
|
|
|
273
288
|
return payload
|
|
274
289
|
|
|
275
290
|
|
|
276
|
-
def _system_blocks(system: str | None) -> list[dict[str, Any]]:
|
|
291
|
+
def _system_blocks(system: str | None, *, prompt_caching: bool = True) -> list[dict[str, Any]]:
|
|
277
292
|
cc_version = _resolve_cc_version()
|
|
278
293
|
blocks: list[dict[str, Any]] = [
|
|
279
294
|
{
|
|
@@ -286,16 +301,33 @@ def _system_blocks(system: str | None) -> list[dict[str, Any]]:
|
|
|
286
301
|
{"type": "text", "text": CLAUDE_CODE_IDENTITY},
|
|
287
302
|
]
|
|
288
303
|
if system:
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
"cache_control": {"type": "ephemeral"},
|
|
294
|
-
}
|
|
295
|
-
)
|
|
304
|
+
block: dict[str, Any] = {"type": "text", "text": system}
|
|
305
|
+
if prompt_caching:
|
|
306
|
+
block["cache_control"] = {"type": "ephemeral"}
|
|
307
|
+
blocks.append(block)
|
|
296
308
|
return blocks
|
|
297
309
|
|
|
298
310
|
|
|
311
|
+
def _apply_last_user_cache_control(messages: list[dict]) -> None:
|
|
312
|
+
"""Add cache_control to the last user message's content block (string only).
|
|
313
|
+
|
|
314
|
+
This marks the current task/goal as cacheable so repeated ReAct steps
|
|
315
|
+
that share the same leading conversation prefix benefit from the cache.
|
|
316
|
+
Only mutates messages whose last user-role entry has a plain-string
|
|
317
|
+
content block (skips multimodal / already-list content).
|
|
318
|
+
"""
|
|
319
|
+
for message in reversed(messages):
|
|
320
|
+
if message.get("role") != "user":
|
|
321
|
+
continue
|
|
322
|
+
content = message.get("content")
|
|
323
|
+
if not isinstance(content, list):
|
|
324
|
+
break
|
|
325
|
+
# content is already a list of blocks from _message_payload
|
|
326
|
+
if len(content) == 1 and content[0].get("type") == "text":
|
|
327
|
+
content[0]["cache_control"] = {"type": "ephemeral"}
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
|
|
299
331
|
def _message_payload(message: dict) -> dict[str, Any]:
|
|
300
332
|
role = message.get("role", "user")
|
|
301
333
|
if role not in {"user", "assistant"}:
|
|
@@ -215,6 +215,23 @@ class WorkingMemory:
|
|
|
215
215
|
def token_count(self) -> int:
|
|
216
216
|
return self._token_total
|
|
217
217
|
|
|
218
|
+
def context_usage(self) -> dict:
|
|
219
|
+
percent = self._token_total / self.max_tokens if self.max_tokens > 0 else 0.0
|
|
220
|
+
if percent >= 0.95:
|
|
221
|
+
level = "critical"
|
|
222
|
+
elif percent >= 0.80:
|
|
223
|
+
level = "warning"
|
|
224
|
+
else:
|
|
225
|
+
level = "normal"
|
|
226
|
+
return {
|
|
227
|
+
"tokens": self._token_total,
|
|
228
|
+
"max_tokens": self.max_tokens,
|
|
229
|
+
"percent": percent,
|
|
230
|
+
"level": level,
|
|
231
|
+
"messages": len(self._messages),
|
|
232
|
+
"summarizations": self._summarization_count,
|
|
233
|
+
}
|
|
234
|
+
|
|
218
235
|
def clear(self) -> None:
|
|
219
236
|
self._messages.clear()
|
|
220
237
|
self._token_total = 0
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "react-agent-harness"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.0"
|
|
8
8
|
description = "Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
dependencies = [
|
{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt
RENAMED
|
@@ -18,6 +18,7 @@ harness/tool_policy.py
|
|
|
18
18
|
harness/utils.py
|
|
19
19
|
harness/llm/__init__.py
|
|
20
20
|
harness/llm/_streaming.py
|
|
21
|
+
harness/llm/anthropic.py
|
|
21
22
|
harness/llm/auth.py
|
|
22
23
|
harness/llm/claude_code.py
|
|
23
24
|
harness/llm/openai.py
|
|
@@ -38,9 +39,11 @@ react_agent_harness.egg-info/requires.txt
|
|
|
38
39
|
react_agent_harness.egg-info/top_level.txt
|
|
39
40
|
tests/test_agents_base.py
|
|
40
41
|
tests/test_annotation.py
|
|
42
|
+
tests/test_anthropic_llm.py
|
|
41
43
|
tests/test_checkpoint_resume.py
|
|
42
44
|
tests/test_claude_code_llm.py
|
|
43
45
|
tests/test_cli.py
|
|
46
|
+
tests/test_console_renderer.py
|
|
44
47
|
tests/test_executor_bridge.py
|
|
45
48
|
tests/test_http_fetch.py
|
|
46
49
|
tests/test_llm_auth.py
|