PyPI - react-agent-harness - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

react-agent-harness 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{react_agent_harness-0.4.0/react_agent_harness.egg-info → react_agent_harness-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: react-agent-harness
-Version: 0.4.0
+Version: 0.5.0
 Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
 Requires-Python: >=3.10
 License-File: LICENSE

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/agents/base.py RENAMED Viewed

@@ -381,6 +381,8 @@ class BaseAgent:
                 elif thought_event.type == EventType.THOUGHT:
                     response = thought_event.payload.get("response")
                     yield thought_event
+                else:
+                    yield thought_event
             if response is None:
                 reason = self._last_think_error or "LLM returned unparseable response"
@@ -642,6 +644,14 @@ class BaseAgent:
         """
         messages = self._working_memory.get_messages()
         accumulated = ""
+        before_usage = self._working_memory.context_usage()
+        before_summarizations = self._working_memory.summarization_count
+        yield BusEvent(
+            type=EventType.CONTEXT,
+            agent_id=self.config.agent_id,
+            payload=before_usage,
+        )
         try:
             if hasattr(self._llm, "stream_complete"):
@@ -686,6 +696,32 @@ class BaseAgent:
             if response is not None:
                 self._last_think_error = None
+        after_usage = self._working_memory.context_usage()
+        if self._working_memory.summarization_count > before_summarizations:
+            yield BusEvent(
+                type=EventType.MEMORY,
+                agent_id=self.config.agent_id,
+                payload={
+                    "event": "summarized",
+                    "before": before_usage,
+                    "after": after_usage,
+                    "summarizations": self._working_memory.summarization_count,
+                },
+            )
+        llm_usage = getattr(self._llm, "last_usage", None) or {}
+        if llm_usage or after_usage != before_usage:
+            yield BusEvent(
+                type=EventType.CONTEXT,
+                agent_id=self.config.agent_id,
+                payload={
+                    **after_usage,
+                    "tokens_in": llm_usage.get("tokens_in"),
+                    "tokens_out": llm_usage.get("tokens_out"),
+                    "cache_read_tokens": llm_usage.get("cache_read_tokens"),
+                    "cache_creation_tokens": llm_usage.get("cache_creation_tokens"),
+                },
+            )
         yield BusEvent(
             type=EventType.THOUGHT,
             agent_id=self.config.agent_id,

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/console.py RENAMED Viewed

@@ -117,6 +117,37 @@ class ConsoleRenderer:
                 file=self._out,
             )
+        elif t == EventType.CONTEXT:
+            tokens = int(p.get("tokens") or 0)
+            max_tokens = int(p.get("max_tokens") or 0)
+            pct = float(p.get("percent") or 0.0) * 100
+            level = p.get("level") or "normal"
+            suffix = "" if level == "normal" else f"  {level}"
+            llm_parts: list[str] = []
+            if p.get("tokens_in") is not None:
+                llm_parts.append(f"in={int(p['tokens_in']):,}")
+            if p.get("tokens_out") is not None:
+                llm_parts.append(f"out={int(p['tokens_out']):,}")
+            if p.get("cache_read_tokens"):
+                llm_parts.append(f"cache_hit={int(p['cache_read_tokens']):,}")
+            if p.get("cache_creation_tokens"):
+                llm_parts.append(f"cache_new={int(p['cache_creation_tokens']):,}")
+            llm_suffix = f"  [{' '.join(llm_parts)}]" if llm_parts else ""
+            print(
+                f"{self._label(event)} ctx     {tokens:,} / {max_tokens:,} tokens  "
+                f"{pct:.0f}%{suffix}{llm_suffix}",
+                file=self._out,
+            )
+        elif t == EventType.MEMORY:
+            before = p.get("before") if isinstance(p.get("before"), dict) else {}
+            after = p.get("after") if isinstance(p.get("after"), dict) else {}
+            print(
+                f"{self._label(event)} memory  summarized  "
+                f"{int(before.get('tokens') or 0):,} -> {int(after.get('tokens') or 0):,} tokens",
+                file=self._out,
+            )
         elif t == EventType.HUMAN_GUIDANCE:
             print(
                 f"\n{self._label(event)} ▶ steered  step={p.get('step')}  text={p.get('text')!r}",

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/events.py RENAMED Viewed

@@ -19,6 +19,8 @@ Event lifecycle within a single goal:
     PLAN                  — orchestrator emitted a static DAG
     (per task in DAG)
         HUMAN_GUIDANCE?   — async steering drained at top of step
+        CONTEXT           — working-memory context budget estimate
+        MEMORY            — working-memory compaction/summarization marker
         THOUGHT           — agent's next-step reasoning
         TOKEN*            — partial LLM output (only when client streams)
         ACTION            — agent chose a tool + args
@@ -47,6 +49,8 @@ class EventType(str, Enum):
     TOKEN = "token"
     ACTION = "action"
     OBSERVATION = "observation"
+    CONTEXT = "context"
+    MEMORY = "memory"
     HUMAN_GUIDANCE = "human_guidance"  # async steering injected at step boundary
     TASK_DONE = "task_done"
     REPLAN = "replan"

react_agent_harness-0.5.0/harness/llm/anthropic.py ADDED Viewed

@@ -0,0 +1,242 @@
+"""
+Anthropic LLM adapter (direct API key, no OAuth).
+Implements the harness LLM client contract:
+  - async def complete(system, messages, **kwargs) -> dict
+  - async def stream_complete(system, messages) -> AsyncGenerator[str, None]
+Prompt caching
+--------------
+Enabled by default (`prompt_caching=True`). When active:
+  - The system prompt is sent as a content-block list with `cache_control`
+    on the last block so Anthropic can cache the compiled KV state.
+  - The last user message's text block also gets `cache_control` so
+    multi-turn ReAct loops that share a common leading prefix cache cheaply.
+Cache reads cost ~10% of normal input tokens. Callers that pass a `cost_fn`
+receive `cache_read_tokens` and `cache_creation_tokens` in the usage dict so
+they can apply the correct per-tier pricing.
+Usage tracking
+--------------
+`last_usage` is populated after every call::
+    {
+        "tokens_in": int,                 # non-cached input tokens
+        "tokens_out": int,                # output tokens
+        "cache_read_tokens": int,         # tokens served from cache
+        "cache_creation_tokens": int,     # tokens written to cache
+        "model": str,                     # model id echoed from response
+    }
+Cost tracking
+-------------
+An optional `cost_fn(usage) -> float` may be supplied to convert the usage
+dict to dollars. This is handy for callers that know the per-model pricing
+schedule. When `set_budget(guard)` is called (typically by AgentRuntime),
+the adapter forwards computed costs to the guard's `add_cost()` method.
+Install:
+    pip install -e ".[anthropic]"
+Usage:
+    from harness.llm.anthropic import AnthropicLLM
+    llm = AnthropicLLM(model="claude-sonnet-4-6")  # reads ANTHROPIC_API_KEY
+"""
+from __future__ import annotations
+import logging
+import os
+from collections.abc import AsyncGenerator, Callable
+from typing import Any
+logger = logging.getLogger(__name__)
+class AnthropicLLM:
+    def __init__(
+        self,
+        *,
+        model: str = "claude-sonnet-4-6",
+        api_key: str | None = None,  # falls back to ANTHROPIC_API_KEY env
+        max_tokens: int = 1024,
+        cost_fn: Callable[[dict], float] | None = None,
+        prompt_caching: bool = True,
+    ) -> None:
+        try:
+            import anthropic
+        except ImportError as e:
+            raise ImportError(
+                'anthropic package not installed. Run: pip install -e ".[anthropic]"'
+            ) from e
+        resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        self._client = anthropic.AsyncAnthropic(api_key=resolved_key)
+        self._model = model
+        self._max_tokens = max_tokens
+        self._cost_fn = cost_fn
+        self._prompt_caching = prompt_caching
+        self._budget: Any = None
+        # Populated after every successful call; streaming callers read it here.
+        self.last_usage: dict | None = None
+    def set_budget(self, guard: Any) -> None:
+        """Inject a BudgetGuard; AgentRuntime calls this at the start of each run."""
+        self._budget = guard
+    # ── Non-streaming ──────────────────────────────────────────────────────────
+    async def complete(
+        self,
+        system: str | None,
+        messages: list[dict],
+        **kwargs: Any,
+    ) -> dict:
+        max_tokens = int(kwargs.pop("max_tokens", self._max_tokens))
+        sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
+        built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
+        request: dict[str, Any] = {
+            "model": self._model,
+            "max_tokens": max_tokens,
+            "messages": built_messages,
+        }
+        if sys_blocks:
+            request["system"] = sys_blocks
+        resp = await self._client.messages.create(**request)
+        usage = _extract_usage(resp.usage, resp.model or self._model)
+        cost = _compute_cost(usage, self._cost_fn)
+        if cost is not None:
+            usage["cost_usd"] = cost
+        self._record_cost(usage)
+        self.last_usage = usage
+        text = _collect_text(resp.content)
+        return {"text": text, "usage": usage}
+    # ── Streaming ──────────────────────────────────────────────────────────────
+    async def stream_complete(
+        self,
+        system: str | None,
+        messages: list[dict],
+    ) -> AsyncGenerator[str, None]:
+        sys_blocks = _system_blocks(system, prompt_caching=self._prompt_caching)
+        built_messages = _build_messages(messages, prompt_caching=self._prompt_caching)
+        request: dict[str, Any] = {
+            "model": self._model,
+            "max_tokens": self._max_tokens,
+            "messages": built_messages,
+        }
+        if sys_blocks:
+            request["system"] = sys_blocks
+        async with self._client.messages.stream(**request) as stream:
+            async for text in stream.text_stream:
+                yield text
+            final = await stream.get_final_message()
+            usage = _extract_usage(final.usage, final.model or self._model)
+            cost = _compute_cost(usage, self._cost_fn)
+            if cost is not None:
+                usage["cost_usd"] = cost
+            self._record_cost(usage)
+            self.last_usage = usage
+    # ── Internals ─────────────────────────────────────────────────────────────
+    def _record_cost(self, usage: dict) -> None:
+        if not self._budget:
+            return
+        cost = usage.get("cost_usd")
+        if cost and cost > 0:
+            self._budget.add_cost(cost)
+# ── Module-level helpers ──────────────────────────────────────────────────────
+def _system_blocks(system: str | None, *, prompt_caching: bool) -> list[dict[str, Any]]:
+    """Return the system param as a content-block list (or empty list for no system)."""
+    if not system:
+        return []
+    block: dict[str, Any] = {"type": "text", "text": system}
+    if prompt_caching:
+        block["cache_control"] = {"type": "ephemeral"}
+    return [block]
+def _build_messages(messages: list[dict], *, prompt_caching: bool) -> list[dict[str, Any]]:
+    """Convert harness message dicts to Anthropic message format.
+    System-role messages are silently dropped (callers should pass them via
+    the `system` parameter). The last user message gets `cache_control` when
+    prompt_caching is enabled.
+    """
+    built: list[dict[str, Any]] = []
+    for msg in messages:
+        role = msg.get("role", "user")
+        if role == "system":
+            continue  # consumed by caller as the system param
+        if role not in {"user", "assistant"}:
+            role = "user"
+        content = msg.get("content", "")
+        built.append(
+            {
+                "role": role,
+                "content": [{"type": "text", "text": str(content)}],
+            }
+        )
+    if prompt_caching:
+        _apply_last_user_cache_control(built)
+    return built
+def _apply_last_user_cache_control(messages: list[dict]) -> None:
+    """Add cache_control to the last user message's single text block."""
+    for message in reversed(messages):
+        if message.get("role") != "user":
+            continue
+        content = message.get("content")
+        if isinstance(content, list) and len(content) == 1 and content[0].get("type") == "text":
+            content[0]["cache_control"] = {"type": "ephemeral"}
+        break
+def _extract_usage(usage: Any, model: str) -> dict:
+    """Build the standard harness usage dict from an Anthropic usage object."""
+    return {
+        "tokens_in": getattr(usage, "input_tokens", 0),
+        "tokens_out": getattr(usage, "output_tokens", 0),
+        "cache_read_tokens": getattr(usage, "cache_read_input_tokens", 0) or 0,
+        "cache_creation_tokens": getattr(usage, "cache_creation_input_tokens", 0) or 0,
+        "model": model,
+    }
+def _collect_text(content: Any) -> str:
+    """Extract plain text from an Anthropic response content list."""
+    if not content:
+        return ""
+    parts: list[str] = []
+    for block in content:
+        if hasattr(block, "text"):
+            parts.append(block.text)
+        elif isinstance(block, dict) and block.get("type") == "text":
+            parts.append(block.get("text", ""))
+    return "".join(parts)
+def _compute_cost(usage: dict, cost_fn: Callable[[dict], float] | None) -> float | None:
+    if cost_fn is None:
+        return None
+    try:
+        return float(cost_fn(usage))
+    except Exception as e:
+        logger.warning("cost_fn raised: %s — skipping cost for this call", e)
+        return None

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/harness/llm/claude_code.py RENAMED Viewed

@@ -42,6 +42,7 @@ class ClaudeCodeLLM:
         http_client: Any | None = None,
         user_agent: str | None = None,
         betas: str = CLAUDE_CODE_BETAS,
+        prompt_caching: bool = True,
     ) -> None:
         if credential_provider is None:
             if auth_file is None:
@@ -66,6 +67,7 @@ class ClaudeCodeLLM:
         self._owns_client = http_client is None
         self._user_agent = user_agent or _default_user_agent()
         self._betas = betas
+        self._prompt_caching = prompt_caching
         self.last_usage: dict | None = None
     async def complete(
@@ -123,6 +125,7 @@ class ClaudeCodeLLM:
             messages=messages,
             max_tokens=max_tokens,
             extra=extra,
+            prompt_caching=self._prompt_caching,
         )
         payload["stream"] = True
         url = f"{self._base_url}/v1/messages"
@@ -142,6 +145,8 @@ class ClaudeCodeLLM:
                 tokens_in = 0
                 tokens_out = 0
+                cache_read_tokens = 0
+                cache_creation_tokens = 0
                 async for _event_type, data in aiter_sse_events(response):
                     if not data or data == "[DONE]":
                         continue
@@ -161,6 +166,10 @@ class ClaudeCodeLLM:
                     elif otype == "message_start":
                         msg_usage = (obj.get("message") or {}).get("usage") or {}
                         tokens_in = int(msg_usage.get("input_tokens") or 0)
+                        cache_read_tokens = int(msg_usage.get("cache_read_input_tokens") or 0)
+                        cache_creation_tokens = int(
+                            msg_usage.get("cache_creation_input_tokens") or 0
+                        )
                     elif otype == "message_delta":
                         delta_usage = obj.get("usage") or {}
                         tokens_out = int(delta_usage.get("output_tokens") or 0)
@@ -168,6 +177,8 @@ class ClaudeCodeLLM:
                 self.last_usage = {
                     "tokens_in": tokens_in,
                     "tokens_out": tokens_out,
+                    "cache_read_tokens": cache_read_tokens,
+                    "cache_creation_tokens": cache_creation_tokens,
                     "total_tokens": tokens_in + tokens_out,
                     "provider": "claude-code",
                 }
@@ -251,6 +262,7 @@ def _build_payload(
     messages: list[dict],
     max_tokens: int,
     extra: dict[str, Any],
+    prompt_caching: bool = True,
 ) -> dict[str, Any]:
     instructions = system or ""
     input_messages: list[dict] = []
@@ -261,11 +273,14 @@ def _build_payload(
                 instructions = f"{instructions}\n\n{text}" if instructions else text
             continue
         input_messages.append(message)
+    built_messages = [_message_payload(message) for message in input_messages]
+    if prompt_caching:
+        _apply_last_user_cache_control(built_messages)
     payload: dict[str, Any] = {
         "model": model,
         "max_tokens": max_tokens,
-        "system": _system_blocks(instructions),
-        "messages": [_message_payload(message) for message in input_messages],
+        "system": _system_blocks(instructions, prompt_caching=prompt_caching),
+        "messages": built_messages,
     }
     for key in ("temperature", "top_p", "top_k", "stop_sequences", "thinking"):
         if key in extra:
@@ -273,7 +288,7 @@ def _build_payload(
     return payload
-def _system_blocks(system: str | None) -> list[dict[str, Any]]:
+def _system_blocks(system: str | None, *, prompt_caching: bool = True) -> list[dict[str, Any]]:
     cc_version = _resolve_cc_version()
     blocks: list[dict[str, Any]] = [
         {
@@ -286,16 +301,33 @@ def _system_blocks(system: str | None) -> list[dict[str, Any]]:
         {"type": "text", "text": CLAUDE_CODE_IDENTITY},
     ]
     if system:
-        blocks.append(
-            {
-                "type": "text",
-                "text": system,
-                "cache_control": {"type": "ephemeral"},
-            }
-        )
+        block: dict[str, Any] = {"type": "text", "text": system}
+        if prompt_caching:
+            block["cache_control"] = {"type": "ephemeral"}
+        blocks.append(block)
     return blocks
+def _apply_last_user_cache_control(messages: list[dict]) -> None:
+    """Add cache_control to the last user message's content block (string only).
+    This marks the current task/goal as cacheable so repeated ReAct steps
+    that share the same leading conversation prefix benefit from the cache.
+    Only mutates messages whose last user-role entry has a plain-string
+    content block (skips multimodal / already-list content).
+    """
+    for message in reversed(messages):
+        if message.get("role") != "user":
+            continue
+        content = message.get("content")
+        if not isinstance(content, list):
+            break
+        # content is already a list of blocks from _message_payload
+        if len(content) == 1 and content[0].get("type") == "text":
+            content[0]["cache_control"] = {"type": "ephemeral"}
+        break
 def _message_payload(message: dict) -> dict[str, Any]:
     role = message.get("role", "user")
     if role not in {"user", "assistant"}:

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/memory/working.py RENAMED Viewed

@@ -215,6 +215,23 @@ class WorkingMemory:
     def token_count(self) -> int:
         return self._token_total
+    def context_usage(self) -> dict:
+        percent = self._token_total / self.max_tokens if self.max_tokens > 0 else 0.0
+        if percent >= 0.95:
+            level = "critical"
+        elif percent >= 0.80:
+            level = "warning"
+        else:
+            level = "normal"
+        return {
+            "tokens": self._token_total,
+            "max_tokens": self.max_tokens,
+            "percent": percent,
+            "level": level,
+            "messages": len(self._messages),
+            "summarizations": self._summarization_count,
+        }
     def clear(self) -> None:
         self._messages.clear()
         self._token_total = 0

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "react-agent-harness"
-version = "0.4.0"
+version = "0.5.0"
 description = "Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming"
 requires-python = ">=3.10"
 dependencies = [

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0/react_agent_harness.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: react-agent-harness
-Version: 0.4.0
+Version: 0.5.0
 Summary: Multi-agent LLM orchestration: hybrid DAG planning, two-tier memory, streaming
 Requires-Python: >=3.10
 License-File: LICENSE

{react_agent_harness-0.4.0 → react_agent_harness-0.5.0}/react_agent_harness.egg-info/SOURCES.txt RENAMED Viewed

@@ -18,6 +18,7 @@ harness/tool_policy.py
 harness/utils.py
 harness/llm/__init__.py
 harness/llm/_streaming.py
+harness/llm/anthropic.py
 harness/llm/auth.py
 harness/llm/claude_code.py
 harness/llm/openai.py
@@ -38,9 +39,11 @@ react_agent_harness.egg-info/requires.txt
 react_agent_harness.egg-info/top_level.txt
 tests/test_agents_base.py
 tests/test_annotation.py
+tests/test_anthropic_llm.py
 tests/test_checkpoint_resume.py
 tests/test_claude_code_llm.py
 tests/test_cli.py
+tests/test_console_renderer.py
 tests/test_executor_bridge.py
 tests/test_http_fetch.py
 tests/test_llm_auth.py

react-agent-harness 0.4.0__tar.gz → 0.5.0__tar.gz

react-agent-harness 0.4.0tar.gz → 0.5.0tar.gz