npm - @miller-tech/uap - Versions diffs - 1.20.25 → 1.20.26 - Mend

@miller-tech/uap 1.20.25 → 1.20.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tools/agents/scripts/anthropic_proxy.py +26 -14
package/tools/agents/tests/test_anthropic_proxy_streaming.py +169 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@miller-tech/uap",
-  "version": "1.20.25",
+  "version": "1.20.26",
   "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
   "type": "module",
   "main": "dist/index.js",

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -2281,11 +2281,13 @@ def _resolve_state_machine_tool_choice(
                 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
             )
             # If stagnation cleared during review, the model tried a
-            # different approach — reward by reducing cycle pressure.
+            # different approach — reward by reducing cycle pressure and
+            # lifting persistent tool exclusion.
             if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
                 monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
+                monitor.cycling_tool_names = []
                 logger.info(
-                    "TOOL STATE MACHINE: review_cycles decremented to %d (stagnation cleared)",
+                    "TOOL STATE MACHINE: review_cycles decremented to %d, cycling exclusion lifted (stagnation cleared)",
                     monitor.tool_state_review_cycles,
                 )
             return "required", "review_complete"
@@ -2589,31 +2591,41 @@ def build_openai_request(
             monitor.no_progress_streak = (
                 0 if last_user_has_tool_result else monitor.no_progress_streak + 1
             )
-            # Option 1: Inject cycle-break instruction when entering review
+            # Inject cycle-break instruction when entering review
+            # Option 3 (Cycle 14): Escalate hint text based on review cycle count
             if (
                 monitor.tool_turn_phase == "review"
                 and state_reason in {"cycle_detected", "stagnation"}
                 and monitor.cycling_tool_names
             ):
                 cycling_names = ", ".join(monitor.cycling_tool_names)
-                cycle_hint = (
-                    f"You have been repeatedly calling the same tool(s): {cycling_names}. "
-                    "This is not making progress. Use a DIFFERENT tool to advance the task, "
-                    "or call a tool that produces your final answer."
-                )
+                cycles = monitor.tool_state_review_cycles
+                if cycles <= 1:
+                    cycle_hint = (
+                        f"You have been repeatedly calling the same tool(s): {cycling_names}. "
+                        "This is not making progress. Use a DIFFERENT tool to advance the task, "
+                        "or call a tool that produces your final answer."
+                    )
+                else:
+                    cycle_hint = (
+                        f"CRITICAL: You have cycled {cycling_names} for {cycles} review rounds without progress. "
+                        "State what you have accomplished so far and what the next DIFFERENT action should be. "
+                        "Do NOT call the same tool again. Choose a completely different approach or "
+                        "produce your final answer now."
+                    )
                 messages = openai_body.get("messages", [])
                 messages.append({"role": "user", "content": cycle_hint})
                 openai_body["messages"] = messages
                 logger.warning(
-                    "CYCLE BREAK: injected hint about cycling tools: %s",
+                    "CYCLE BREAK: injected hint about cycling tools: %s (escalation=%d)",
                     cycling_names,
+                    cycles,
                 )
-            # Option 2: Narrow tools during review to exclude cycling tools
-            # Option 1 enhancement: if any cycling tool is read-only, exclude
-            # the entire read-only class to prevent tool-hopping (read→glob→grep)
+            # Narrow tools to exclude cycling tools
+            # Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
+            # Option 1 (Cycle 14): persist exclusion during act phase too, not just review
             if (
-                monitor.tool_turn_phase == "review"
-                and monitor.cycling_tool_names
+                monitor.cycling_tool_names
                 and "tools" in openai_body
             ):
                 exclude_set = set(monitor.cycling_tool_names)

package/tools/agents/tests/test_anthropic_proxy_streaming.py CHANGED Viewed

@@ -4406,3 +4406,172 @@ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
         dup, _ = monitor.has_duplicate_read_target(threshold=3)
         self.assertFalse(dup)
+class TestPersistentCycleExclusion(unittest.TestCase):
+    """Tests for Cycle 14: persistent exclusion, escalating hints, and
+    exclusion across review→act transitions."""
+    def _make_body_with_tools(self, tool_names, active_tool="bash", active_input=None):
+        tools = [
+            {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
+            for n in tool_names
+        ]
+        inp = active_input or {"command": "ls"}
+        return {
+            "model": "test",
+            "messages": [
+                {"role": "user", "content": "do something"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {"type": "tool_use", "id": "t1", "name": active_tool, "input": inp}
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
+                    ],
+                },
+            ],
+            "tools": tools,
+        }
+    def test_exclusion_persists_through_act_phase(self):
+        """Option 1: cycling_tool_names exclusion persists in act phase after review."""
+        old_vals = {}
+        for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
+                   "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
+                   "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
+            old_vals[k] = getattr(proxy, k)
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
+            all_tools = ["bash", "read", "write", "edit"]
+            body = self._make_body_with_tools(all_tools)
+            monitor = proxy.SessionMonitor(context_window=262144)
+            # Simulate bash cycling that triggers review
+            monitor.cycling_tool_names = ["bash"]
+            monitor.tool_turn_phase = "act"
+            monitor.tool_state_forced_budget_remaining = 5
+            openai = proxy.build_openai_request(body, monitor)
+            # In act phase with cycling_tool_names set, bash should be excluded
+            remaining = [t["function"]["name"] for t in openai.get("tools", [])]
+            self.assertNotIn("bash", remaining)
+            self.assertIn("read", remaining)
+            self.assertIn("write", remaining)
+        finally:
+            for k, v in old_vals.items():
+                setattr(proxy, k, v)
+    def test_exclusion_cleared_on_stagnation_clear(self):
+        """Option 1: cycling exclusion is lifted when stagnation clears in review."""
+        monitor = proxy.SessionMonitor(context_window=262144)
+        monitor.tool_turn_phase = "review"
+        monitor.tool_state_review_cycles = 1
+        monitor.tool_state_stagnation_streak = 0  # stagnation cleared
+        monitor.cycling_tool_names = ["bash"]
+        monitor.tool_state_auto_budget_remaining = 0
+        monitor.tool_state_forced_budget_remaining = 6
+        # This should transition review→act and clear cycling names
+        old_vals = {}
+        for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
+                   "PROXY_TOOL_STATE_FORCED_BUDGET"]:
+            old_vals[k] = getattr(proxy, k)
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
+            body = self._make_body_with_tools(["bash", "read", "write"])
+            proxy.build_openai_request(body, monitor)
+            self.assertEqual(monitor.tool_turn_phase, "act")
+            self.assertEqual(monitor.cycling_tool_names, [])
+        finally:
+            for k, v in old_vals.items():
+                setattr(proxy, k, v)
+    def test_escalated_hint_on_cycle_2(self):
+        """Option 3: cycle 2+ gets escalated CRITICAL hint text."""
+        old_vals = {}
+        for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
+                   "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
+                   "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
+            old_vals[k] = getattr(proxy, k)
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
+            all_tools = ["bash", "read", "write"]
+            body = self._make_body_with_tools(all_tools)
+            monitor = proxy.SessionMonitor(context_window=262144)
+            # Pre-set as if we've already been through 1 review cycle
+            monitor.tool_turn_phase = "act"
+            monitor.tool_state_review_cycles = 1
+            monitor.tool_state_forced_budget_remaining = 20
+            monitor.tool_state_stagnation_streak = 3
+            fp = "bash:781c24ad"
+            monitor.tool_call_history = [fp, fp, fp]
+            monitor.last_tool_fingerprint = fp
+            openai = proxy.build_openai_request(body, monitor)
+            # Should now be in review with cycles=2 and escalated hint
+            self.assertEqual(monitor.tool_turn_phase, "review")
+            self.assertEqual(monitor.tool_state_review_cycles, 2)
+            messages = openai.get("messages", [])
+            last_user = [m for m in messages if m.get("role") == "user"][-1]
+            self.assertIn("CRITICAL", last_user["content"])
+            self.assertIn("2 review rounds", last_user["content"])
+        finally:
+            for k, v in old_vals.items():
+                setattr(proxy, k, v)
+    def test_mild_hint_on_cycle_1(self):
+        """Option 3: cycle 1 gets mild hint, not escalated."""
+        old_vals = {}
+        for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
+                   "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
+                   "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
+            old_vals[k] = getattr(proxy, k)
+        try:
+            setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
+            setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
+            setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
+            setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
+            body = self._make_body_with_tools(["bash", "read", "write"])
+            monitor = proxy.SessionMonitor(context_window=262144)
+            monitor.tool_turn_phase = "act"
+            monitor.tool_state_review_cycles = 0
+            monitor.tool_state_forced_budget_remaining = 20
+            monitor.tool_state_stagnation_streak = 3
+            fp = "bash:781c24ad"
+            monitor.tool_call_history = [fp, fp, fp]
+            monitor.last_tool_fingerprint = fp
+            openai = proxy.build_openai_request(body, monitor)
+            self.assertEqual(monitor.tool_turn_phase, "review")
+            self.assertEqual(monitor.tool_state_review_cycles, 1)
+            messages = openai.get("messages", [])
+            last_user = [m for m in messages if m.get("role") == "user"][-1]
+            self.assertNotIn("CRITICAL", last_user["content"])
+            self.assertIn("DIFFERENT tool", last_user["content"])
+        finally:
+            for k, v in old_vals.items():
+                setattr(proxy, k, v)