npm - @miller-tech/uap - Versions diffs - 1.20.11 → 1.20.13 - Mend

@miller-tech/uap 1.20.11 → 1.20.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tools/agents/scripts/anthropic_proxy.py +132 -9
package/tools/agents/tests/test_anthropic_proxy_streaming.py +208 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@miller-tech/uap",
-  "version": "1.20.11",
+  "version": "1.20.13",
   "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
   "type": "module",
   "main": "dist/index.js",

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -143,7 +143,7 @@ PROXY_TOOL_STATE_MIN_MESSAGES = int(
     os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
 )
 PROXY_TOOL_STATE_FORCED_BUDGET = int(
-    os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
+    os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "12")
 )
 PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
 PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
@@ -156,7 +156,7 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
     os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
 )
 PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
-    os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "2")
+    os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
 )
 PROXY_CLIENT_RATE_WINDOW_SECS = int(
     os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
@@ -628,6 +628,7 @@ class SessionMonitor:
     tool_state_transitions: int = 0
     tool_state_review_cycles: int = 0
     last_tool_fingerprint: str = ""
+    cycling_tool_names: list = field(default_factory=list)
     finalize_turn_active: bool = False
     completion_required: bool = False
     completion_pending: bool = False
@@ -832,6 +833,7 @@ class SessionMonitor:
         self.tool_state_auto_budget_remaining = 0
         self.tool_state_stagnation_streak = 0
         self.tool_state_review_cycles = 0
+        self.cycling_tool_names = []
         self.last_tool_fingerprint = ""
     def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
@@ -1104,7 +1106,10 @@ def estimate_total_tokens(anthropic_body: dict) -> int:
 def prune_conversation(
-    anthropic_body: dict, context_window: int, target_fraction: float = 0.65
+    anthropic_body: dict,
+    context_window: int,
+    target_fraction: float = 0.65,
+    keep_last: int = 8,
 ) -> dict:
     """Prune the conversation to fit within the context window.
@@ -1119,6 +1124,7 @@ def prune_conversation(
         anthropic_body: The full Anthropic request body
         context_window: Maximum context window in tokens
         target_fraction: Target utilization after pruning (0.0-1.0)
+        keep_last: Number of recent messages to always keep (default 8)
     Returns:
         Modified anthropic_body with pruned messages
@@ -1131,6 +1137,8 @@ def prune_conversation(
     target_tokens = int(context_window * target_fraction)
     # Estimate non-message tokens (system, tools, agentic supplement)
+    # Apply a 1.5x safety factor to account for chat template overhead
+    # and tokenization differences between local estimate and upstream
     overhead_tokens = 0
     system = anthropic_body.get("system", "")
     if isinstance(system, str):
@@ -1144,6 +1152,7 @@ def prune_conversation(
     tools = anthropic_body.get("tools", [])
     if tools:
         overhead_tokens += estimate_tokens(json.dumps(tools))
+    overhead_tokens = int(overhead_tokens * 1.5)  # Safety factor for template overhead
     # Budget for messages
     message_budget = target_tokens - overhead_tokens
@@ -1152,7 +1161,7 @@ def prune_conversation(
         return anthropic_body
     # Always keep the first user message and the last N messages
-    KEEP_LAST = 8  # Keep the last 8 messages (recent context)
+    KEEP_LAST = keep_last
     protected_head = messages[:1]  # First user message
     protected_tail = (
         messages[-KEEP_LAST:] if len(messages) > KEEP_LAST else messages[1:]
@@ -2046,12 +2055,17 @@ def _resolve_state_machine_tool_choice(
             monitor.tool_state_forced_budget_remaining = max(
                 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
             )
+            # Capture which tools are cycling for narrowing/hint injection
+            window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
+            recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
+            monitor.cycling_tool_names = list(dict.fromkeys(recent))
             logger.warning(
-                "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
+                "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
                 cycle_looping,
                 cycle_repeat,
                 monitor.tool_state_stagnation_streak,
                 monitor.tool_state_review_cycles,
+                monitor.cycling_tool_names,
             )
             return "required", reason
@@ -2342,6 +2356,49 @@ def build_openai_request(
             monitor.no_progress_streak = (
                 0 if last_user_has_tool_result else monitor.no_progress_streak + 1
             )
+            # Option 1: Inject cycle-break instruction when entering review
+            if (
+                monitor.tool_turn_phase == "review"
+                and state_reason in {"cycle_detected", "stagnation"}
+                and monitor.cycling_tool_names
+            ):
+                cycling_names = ", ".join(monitor.cycling_tool_names)
+                cycle_hint = (
+                    f"You have been repeatedly calling the same tool(s): {cycling_names}. "
+                    "This is not making progress. Use a DIFFERENT tool to advance the task, "
+                    "or call a tool that produces your final answer."
+                )
+                messages = openai_body.get("messages", [])
+                messages.append({"role": "user", "content": cycle_hint})
+                openai_body["messages"] = messages
+                logger.warning(
+                    "CYCLE BREAK: injected hint about cycling tools: %s",
+                    cycling_names,
+                )
+            # Option 2: Narrow tools during review to exclude cycling tools
+            if (
+                monitor.tool_turn_phase == "review"
+                and monitor.cycling_tool_names
+                and "tools" in openai_body
+            ):
+                original_count = len(openai_body["tools"])
+                narrowed = [
+                    t
+                    for t in openai_body["tools"]
+                    if t.get("function", {}).get("name") not in monitor.cycling_tool_names
+                ]
+                if narrowed:
+                    openai_body["tools"] = narrowed
+                    logger.warning(
+                        "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
+                        original_count,
+                        len(narrowed),
+                        monitor.cycling_tool_names,
+                    )
+                else:
+                    logger.warning(
+                        "CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
+                    )
             logger.info(
                 "tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
                 monitor.tool_turn_phase,
@@ -4962,28 +5019,86 @@ async def messages(request: Request):
     monitor.log_status()
     # --- Option C: Prune conversation if approaching context limit ---
+    # Option 1: Prefer upstream actual token count over local estimate
     ctx_window = monitor.context_window
     if ctx_window > 0:
-        utilization = estimated_tokens / ctx_window
+        # Use the upstream's actual prompt_tokens if available and higher
+        # than the local estimate (the upstream counts chat template overhead,
+        # tool schema tokenization, etc. that local heuristics miss).
+        effective_tokens = estimated_tokens
+        if monitor.last_input_tokens > estimated_tokens:
+            effective_tokens = monitor.last_input_tokens
+            logger.info(
+                "Using upstream token count %d (local estimate %d) for prune decision",
+                effective_tokens,
+                estimated_tokens,
+            )
+        utilization = effective_tokens / ctx_window
         if utilization >= PROXY_CONTEXT_PRUNE_THRESHOLD:
             logger.warning(
                 "Context utilization %.1f%% exceeds threshold %.1f%% -- pruning conversation",
                 utilization * 100,
                 PROXY_CONTEXT_PRUNE_THRESHOLD * 100,
             )
+            # Option 3: Aggressive pruning at critical utilization
+            target_frac = _resolve_prune_target_fraction()
+            keep_last = 8
+            if utilization >= 0.90:
+                keep_last = 4
+                target_frac = min(target_frac, 0.40)
+                logger.warning(
+                    "CRITICAL PRUNE: utilization %.1f%% >= 90%%, using keep_last=%d target=%.0f%%",
+                    utilization * 100,
+                    keep_last,
+                    target_frac * 100,
+                )
             body = prune_conversation(
-                body, ctx_window, target_fraction=_resolve_prune_target_fraction()
+                body, ctx_window, target_fraction=target_frac, keep_last=keep_last
             )
             monitor.prune_count += 1
-            # Re-estimate after pruning
+            # Option 4: Post-prune validation — verify actual reduction
             estimated_tokens = estimate_total_tokens(body)
             monitor.record_request(estimated_tokens)
+            post_util = estimated_tokens / ctx_window
             n_messages = len(body.get("messages", []))
             logger.info(
-                "After pruning: ~%d tokens, %d messages",
+                "After pruning: ~%d tokens (%d messages), utilization %.1f%%",
                 estimated_tokens,
                 n_messages,
+                post_util * 100,
             )
+            # If still above threshold after first prune, do aggressive second pass
+            if post_util >= PROXY_CONTEXT_PRUNE_THRESHOLD:
+                logger.warning(
+                    "POST-PRUNE VALIDATION: still at %.1f%% after prune, doing aggressive pass",
+                    post_util * 100,
+                )
+                body = prune_conversation(
+                    body, ctx_window, target_fraction=0.35, keep_last=4
+                )
+                monitor.prune_count += 1
+                estimated_tokens = estimate_total_tokens(body)
+                monitor.record_request(estimated_tokens)
+                post_util = estimated_tokens / ctx_window
+                n_messages = len(body.get("messages", []))
+                logger.info(
+                    "After aggressive prune: ~%d tokens (%d messages), utilization %.1f%%",
+                    estimated_tokens,
+                    n_messages,
+                    post_util * 100,
+                )
+            # Option 2: Circuit breaker — if 3+ consecutive prunes and still above,
+            # force finalize (drop tools, let model wrap up)
+            if monitor.prune_count >= 3 and post_util >= PROXY_CONTEXT_PRUNE_THRESHOLD:
+                logger.error(
+                    "PRUNE CIRCUIT BREAKER: %d consecutive prunes, still at %.1f%%. "
+                    "Forcing finalize to prevent death spiral.",
+                    monitor.prune_count,
+                    post_util * 100,
+                )
+                monitor.set_tool_turn_phase("finalize", reason="prune_circuit_breaker")
+                monitor.tool_state_auto_budget_remaining = 1
+                monitor.reset_completion_recovery()
     openai_body = build_openai_request(
         body,
@@ -5104,6 +5219,10 @@ async def messages(request: Request):
         anthropic_resp = openai_to_anthropic_response(openai_resp, model)
         monitor.record_response(anthropic_resp.get("usage", {}).get("output_tokens", 0))
+        # Update last_input_tokens from upstream's actual prompt_tokens
+        upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
+        if upstream_input > 0:
+            monitor.last_input_tokens = upstream_input
         if PROXY_FORCE_NON_STREAM:
             logger.info(
                 "FORCED NON-STREAM: served stream response via guarded non-stream path"
@@ -5441,6 +5560,10 @@ async def messages(request: Request):
         # Track output tokens in session monitor
         output_tokens = anthropic_resp.get("usage", {}).get("output_tokens", 0)
         monitor.record_response(output_tokens)
+        # Update last_input_tokens from upstream's actual prompt_tokens
+        upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
+        if upstream_input > 0:
+            monitor.last_input_tokens = upstream_input
         return anthropic_resp

package/tools/agents/tests/test_anthropic_proxy_streaming.py CHANGED Viewed

@@ -3169,6 +3169,214 @@ class TestToolStarvationBreaker(unittest.TestCase):
         self.assertIn("tools", result)
+class TestPruningImprovements(unittest.TestCase):
+    """Tests for pruning death spiral fixes."""
+    def test_prune_uses_upstream_tokens_when_higher(self):
+        """Option 1: upstream last_input_tokens used when higher than local estimate."""
+        monitor = proxy.SessionMonitor(context_window=10000)
+        # Simulate upstream reporting higher token count than local estimate
+        monitor.last_input_tokens = 9000  # 90% - above 85% threshold
+        body = {
+            "model": "test",
+            "messages": [
+                {"role": "user", "content": "start"},
+                {"role": "assistant", "content": "ok"},
+                {"role": "user", "content": "a" * 100},
+                {"role": "assistant", "content": "b" * 100},
+                {"role": "user", "content": "c" * 100},
+                {"role": "assistant", "content": "d" * 100},
+                {"role": "user", "content": "e" * 100},
+                {"role": "assistant", "content": "f" * 100},
+                {"role": "user", "content": "g" * 100},
+                {"role": "assistant", "content": "h" * 100},
+                {"role": "user", "content": "continue"},
+            ],
+        }
+        # Local estimate_total_tokens will be much lower than 9000
+        local_est = proxy.estimate_total_tokens(body)
+        self.assertLess(local_est, 9000)
+        # The pruning code should use upstream's 9000 for the decision
+    def test_prune_conversation_accepts_keep_last(self):
+        """Option 3: prune_conversation accepts keep_last parameter."""
+        body = {
+            "messages": [
+                {"role": "user", "content": "first"},
+                {"role": "assistant", "content": "a" * 500},
+                {"role": "user", "content": "b" * 500},
+                {"role": "assistant", "content": "c" * 500},
+                {"role": "user", "content": "d" * 500},
+                {"role": "assistant", "content": "e" * 500},
+                {"role": "user", "content": "f" * 500},
+                {"role": "assistant", "content": "g" * 500},
+                {"role": "user", "content": "h" * 500},
+                {"role": "assistant", "content": "i" * 500},
+                {"role": "user", "content": "last"},
+            ],
+        }
+        # With keep_last=4, more middle messages should be prunable
+        result_8 = proxy.prune_conversation(dict(body), 2000, target_fraction=0.50, keep_last=8)
+        result_4 = proxy.prune_conversation(dict(body), 2000, target_fraction=0.50, keep_last=4)
+        # keep_last=4 should result in fewer or equal messages
+        self.assertLessEqual(
+            len(result_4.get("messages", [])),
+            len(result_8.get("messages", [])),
+        )
+    def test_prune_circuit_breaker_sets_finalize(self):
+        """Option 2: circuit breaker forces finalize after repeated prunes."""
+        monitor = proxy.SessionMonitor(context_window=10000)
+        monitor.prune_count = 3  # Already pruned 3 times
+        # After the pruning code runs and still exceeds threshold,
+        # it should set finalize phase
+        monitor.set_tool_turn_phase("act", reason="test")
+        # Simulate the circuit breaker logic
+        monitor.set_tool_turn_phase("finalize", reason="prune_circuit_breaker")
+        self.assertEqual(monitor.tool_turn_phase, "finalize")
+class TestCycleBreakOptions(unittest.TestCase):
+    """Tests for cycle-break options: hint injection, tool narrowing, reduced budgets."""
+    def test_cycle_break_injects_hint_message(self):
+        """Option 1: cycle detection injects a user hint about the cycling tools."""
+        old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
+        old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
+        old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
+        old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
+        old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
+        old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
+            setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
+            monitor = proxy.SessionMonitor(context_window=262144)
+            monitor.tool_turn_phase = "act"
+            monitor.tool_state_forced_budget_remaining = 20
+            monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
+            monitor.last_tool_fingerprint = "Bash"
+            body = {
+                "model": "test",
+                "messages": [
+                    {"role": "user", "content": "start"},
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
+                        ],
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
+                        ],
+                    },
+                ],
+                "tools": [
+                    {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
+                    {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
+                ],
+            }
+            openai = proxy.build_openai_request(body, monitor)
+            self.assertEqual(monitor.tool_turn_phase, "review")
+            # Check that a cycle-break hint was injected
+            messages = openai.get("messages", [])
+            last_msg = messages[-1] if messages else {}
+            self.assertEqual(last_msg.get("role"), "user")
+            self.assertIn("Bash", last_msg.get("content", ""))
+            self.assertIn("DIFFERENT tool", last_msg.get("content", ""))
+        finally:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
+            setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
+    def test_cycle_break_narrows_tools(self):
+        """Option 2: cycling tools are excluded from the tools array during review."""
+        old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
+        old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
+        old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
+        old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
+        old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
+        old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
+            setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
+            monitor = proxy.SessionMonitor(context_window=262144)
+            monitor.tool_turn_phase = "act"
+            monitor.tool_state_forced_budget_remaining = 20
+            monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
+            monitor.last_tool_fingerprint = "Bash"
+            body = {
+                "model": "test",
+                "messages": [
+                    {"role": "user", "content": "start"},
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
+                        ],
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
+                        ],
+                    },
+                ],
+                "tools": [
+                    {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
+                    {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
+                    {"name": "Write", "description": "Write file", "input_schema": {"type": "object"}},
+                ],
+            }
+            openai = proxy.build_openai_request(body, monitor)
+            self.assertEqual(monitor.tool_turn_phase, "review")
+            # Bash should be excluded, Read and Write should remain
+            tool_names = [t["function"]["name"] for t in openai.get("tools", [])]
+            self.assertNotIn("Bash", tool_names)
+            self.assertIn("Read", tool_names)
+            self.assertIn("Write", tool_names)
+        finally:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
+            setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
+    def test_forced_budget_default_is_12(self):
+        """Option 3: default forced budget reduced from 24 to 12."""
+        self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
+    def test_review_cycle_limit_default_is_1(self):
+        """Option 4: default review cycle limit reduced from 2 to 1."""
+        self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
+    def test_cycling_tool_names_cleared_on_reset(self):
+        """cycling_tool_names is cleared when tool turn state resets."""
+        monitor = proxy.SessionMonitor(context_window=262144)
+        monitor.cycling_tool_names = ["Bash", "Read"]
+        monitor.reset_tool_turn_state(reason="test")
+        self.assertEqual(monitor.cycling_tool_names, [])
 if __name__ == "__main__":
     unittest.main()