npm - @miller-tech/uap - Versions diffs - 1.20.49 → 1.20.51 - Mend

@miller-tech/uap 1.20.49 → 1.20.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tools/agents/scripts/anthropic_proxy.py +94 -39
package/tools/agents/tests/test_anthropic_proxy_streaming.py +122 -18

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@miller-tech/uap",
-  "version": "1.20.49",
+  "version": "1.20.51",
   "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
   "type": "module",
   "main": "dist/index.js",

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -207,6 +207,19 @@ _READ_ONLY_TOOL_CLASS = frozenset({
     "search", "Search", "list_files", "ListFiles",
 })
+# Tools that produce or mutate a deliverable. Using any of these in a turn
+# means the agent is converging from exploration toward output, and resets
+# the recon-convergence streak (B1). This is deliberately a SHORT allowlist
+# of write tools, NOT a read-only denylist: exploration happens through an
+# open-ended set of tools (Bash, WebFetch, Agent, ...) that cannot be
+# enumerated, but "the agent produced a write" is a small, stable signal.
+# Names are matched case-insensitively (callers lower() before lookup).
+_WRITE_TOOL_CLASS = frozenset({
+    "write", "edit", "multiedit", "notebookedit",
+    "str_replace", "str_replace_editor", "str_replace_based_edit_tool",
+    "create_file", "applypatch", "apply_patch",
+})
 PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
     "0",
     "false",
@@ -224,12 +237,15 @@ PROXY_FINALIZE_CONTINUATION_MAX = int(
 PROXY_FINALIZE_SESSION_HARD_CAP = int(
     os.environ.get("PROXY_FINALIZE_SESSION_HARD_CAP", "3")
 )
-# Recon-convergence guardrail: after this many consecutive turns of PURE
-# read-only exploration (Read/Grep/Glob/etc. — no write/edit/deliverable
-# tool), the proxy injects a directive telling the model to stop exploring
-# and produce its deliverable. Targets the failure mode where an agentic
-# recon task reads files for hundreds of turns and never converges to the
+# Recon-convergence guardrail: after this many consecutive turns that use
+# tools but produce NO write/deliverable tool call (see _WRITE_TOOL_CLASS),
+# the proxy injects a directive telling the model to stop exploring and
+# produce its deliverable. Targets the failure mode where an agentic recon
+# task explores for hundreds of turns and never converges to the
 # synthesis/write step (observed: 664-turn recon, no deliverable started).
+# Defined as write-tool ABSENCE rather than read-tool presence: a real
+# recon agent explores via Bash/WebFetch/Agent, not just Read/Grep, so a
+# "all tools are recognized read-only" test never accumulates a streak.
 # 0 disables.
 PROXY_RECON_CONVERGENCE_THRESHOLD = int(
     os.environ.get("PROXY_RECON_CONVERGENCE_THRESHOLD", "40")
@@ -727,7 +743,7 @@ class SessionMonitor:
     )
     loop_warnings_emitted: int = 0  # How many loop warnings sent to the model
     no_progress_streak: int = 0  # Forced tool turns without new tool_result
-    consecutive_readonly_turns: int = 0  # turns of pure read-only exploration (B1)
+    consecutive_no_write_turns: int = 0  # turns exploring with no write tool (B1)
     unexpected_end_turn_count: int = 0  # end_turn without tool_use in active loop
     tool_starvation_streak: int = 0  # Consecutive forced turns with no tool_calls produced
     malformed_tool_streak: int = 0  # consecutive malformed pseudo tool payloads
@@ -885,15 +901,19 @@ class SessionMonitor:
         if len(self.tool_call_history) > 30:
             self.tool_call_history = self.tool_call_history[-30:]
-        # Recon-convergence (B1): count consecutive turns of PURE read-only
-        # exploration. A turn that uses any non-read-only tool (write, edit,
-        # a deliverable tool) resets the streak — that's the model
-        # converging from exploration toward synthesis/action.
-        _ro = {n.lower() for n in _READ_ONLY_TOOL_CLASS}
-        if tool_names and all(n.lower() in _ro for n in tool_names):
-            self.consecutive_readonly_turns += 1
-        else:
-            self.consecutive_readonly_turns = 0
+        # Recon-convergence (B1): count consecutive turns that use tools but
+        # produce NO write/deliverable tool call. A turn that uses any write
+        # tool resets the streak — that's the model converging from
+        # exploration toward synthesis/output. A turn with no tool calls at
+        # all is a plain-text turn (neither exploration nor a write) and
+        # leaves the streak unchanged. This is the inverse of the old
+        # "all tools are recognized read-only" test, which reset on any
+        # Bash/WebFetch/Agent turn and so never accumulated for real agents.
+        if tool_names:
+            if any(n.lower() in _WRITE_TOOL_CLASS for n in tool_names):
+                self.consecutive_no_write_turns = 0
+            else:
+                self.consecutive_no_write_turns += 1
         # Track read-only tool targets for dedup (Option 3)
         if tool_targets:
@@ -3268,48 +3288,78 @@ def _resolve_state_machine_tool_choice(
     return None, "unknown_phase"
-def _maybe_inject_recon_convergence(openai_body: dict, monitor: "SessionMonitor") -> None:
-    """Nudge a session stuck in prolonged read-only exploration toward its
-    deliverable.
-    Fires when `consecutive_readonly_turns` crosses
-    PROXY_RECON_CONVERGENCE_THRESHOLD — the model has read files for many
-    turns without writing anything. Targets the observed failure mode of
-    an agentic recon task wandering for hundreds of turns and never
-    converging to the synthesis/write step. Two escalation tiers: a firm
-    "switch to synthesis" directive, then a hard "STOP, write it now" once
-    the streak is 2x over threshold.
+def _maybe_inject_recon_convergence(
+    openai_body: dict,
+    monitor: "SessionMonitor",
+    full_tools: list[dict] | None = None,
+) -> None:
+    """Nudge a session stuck in prolonged exploration toward its deliverable.
+    Fires when `consecutive_no_write_turns` crosses
+    PROXY_RECON_CONVERGENCE_THRESHOLD — the model has used tools for many
+    turns without producing any write/deliverable tool call. Targets the
+    observed failure mode of an agentic recon task wandering for hundreds
+    of turns and never converging to the synthesis/write step. Two
+    escalation tiers: a firm "switch to synthesis" directive, then a hard
+    "STOP, write it now" once the streak is 2x over threshold.
+    `full_tools` is the request's tool list *before* `_narrow_tools_for_request`
+    pruned it. When the directive fires, any write/deliverable tool that
+    narrowing dropped is re-injected into `openai_body["tools"]` — narrowing
+    scores tools against the (exploration-heavy) recon prompt and runs before
+    this guardrail, so it routinely strips the very write tool the directive
+    tells the model to use, leaving the directive impossible to satisfy.
     """
     if PROXY_RECON_CONVERGENCE_THRESHOLD <= 0:
         return
-    streak = monitor.consecutive_readonly_turns
+    streak = monitor.consecutive_no_write_turns
     if streak < PROXY_RECON_CONVERGENCE_THRESHOLD:
         return
     util = monitor.get_utilization()
     if streak >= 2 * PROXY_RECON_CONVERGENCE_THRESHOLD:
         directive = (
             f"STOP exploring. You have run {streak} consecutive turns of "
-            f"read-only exploration and context is at {util * 100:.0f}%. "
-            "You will NOT finish if you keep reading files. Produce your "
-            "deliverable NOW from the information you already have — write "
-            "it to a file with the appropriate tool. Do not read anything else."
+            f"exploration without producing a deliverable and context is at "
+            f"{util * 100:.0f}%. You will NOT finish if you keep exploring. "
+            "Produce your deliverable NOW from the information you already "
+            "have — write it to a file with the appropriate tool. Do not "
+            "read or run anything else."
         )
         tier = "hard"
     else:
         directive = (
-            f"You have read files for {streak} consecutive turns without "
+            f"You have explored for {streak} consecutive turns without "
             f"producing a deliverable (context {util * 100:.0f}%). You have "
             "enough to begin. Switch from exploration to synthesis: write "
-            "your deliverable now. Read at most one more file, and only if "
-            "strictly required to write it."
+            "your deliverable now. Explore at most one more time, and only "
+            "if strictly required to write it."
         )
         tier = "firm"
     msgs = openai_body.get("messages", [])
     msgs.append({"role": "user", "content": directive})
     openai_body["messages"] = msgs
+    # Re-inject any write/deliverable tool that narrowing dropped, so the
+    # "write your deliverable" directive is actually satisfiable. Without
+    # this the model is told to write but has no write tool to call, picks
+    # another read tool, and the streak climbs unbounded.
+    restored: list[str] = []
+    if full_tools:
+        present = {
+            (t.get("function", {}).get("name", "") or "").lower()
+            for t in openai_body.get("tools", [])
+        }
+        for tool in full_tools:
+            name = (tool.get("function", {}).get("name", "") or "")
+            if name.lower() in _WRITE_TOOL_CLASS and name.lower() not in present:
+                openai_body.setdefault("tools", []).append(tool)
+                present.add(name.lower())
+                restored.append(name)
     logger.warning(
-        "RECON CONVERGENCE: injected %s directive (readonly_streak=%d, ctx=%.0f%%)",
-        tier, streak, util * 100,
+        "RECON CONVERGENCE: injected %s directive (no_write_streak=%d, ctx=%.0f%%, "
+        "restored_write_tools=%s)",
+        tier, streak, util * 100, restored or "none",
     )
@@ -3555,10 +3605,14 @@ def build_openai_request(
             )
     # Convert Anthropic tools to OpenAI function-calling tools
+    full_openai_tools: list[dict] = []
     if has_tools:
         openai_body["tools"] = _convert_anthropic_tools_to_openai(
             anthropic_body.get("tools", [])
         )
+        # Keep the full (pre-narrowing) list so the recon-convergence
+        # guardrail can restore a write tool that narrowing dropped.
+        full_openai_tools = openai_body["tools"]
         openai_body["tools"] = _narrow_tools_for_request(
             anthropic_body, openai_body["tools"]
         )
@@ -3821,9 +3875,10 @@ def build_openai_request(
         _apply_tool_call_grammar(openai_body, grammar_override=profile_grammar)
     # Recon-convergence guardrail (B1) — runs on every built request so a
-    # session wandering in read-only exploration is nudged toward its
-    # deliverable regardless of tool-turn phase.
-    _maybe_inject_recon_convergence(openai_body, monitor)
+    # session wandering in exploration without producing a write is nudged
+    # toward its deliverable regardless of tool-turn phase. Passed the full
+    # pre-narrowing toolset so it can restore a dropped write tool.
+    _maybe_inject_recon_convergence(openai_body, monitor, full_openai_tools)
     return openai_body

package/tools/agents/tests/test_anthropic_proxy_streaming.py CHANGED Viewed

@@ -5375,10 +5375,13 @@ class TestSlotSaveRestore(unittest.TestCase):
 class TestReconConvergence(unittest.TestCase):
     """Tests for the B1 recon-convergence guardrail — nudges a session
-    stuck doing read-only exploration toward producing its deliverable.
+    stuck exploring without producing a write toward its deliverable.
-    Targets the observed failure: a 664-turn agentic recon task that read
-    files for hours and never converged to the synthesis/write step."""
+    The streak is defined as write-tool ABSENCE, not read-tool presence: a
+    real recon agent explores via Bash/WebFetch/Agent, so an "all tools are
+    recognized read-only" test never accumulates. Targets the observed
+    failure: a 664-turn agentic recon task that explored for hours and
+    never converged to the synthesis/write step."""
     def setUp(self):
         self._threshold = proxy.PROXY_RECON_CONVERGENCE_THRESHOLD
@@ -5387,37 +5390,60 @@ class TestReconConvergence(unittest.TestCase):
         proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = self._threshold
     def test_readonly_turns_increment_the_streak(self):
-        """Consecutive turns using only read-only tools grow the streak."""
+        """Consecutive turns using only read tools grow the streak."""
         m = proxy.SessionMonitor(context_window=131072)
         for _ in range(5):
             m.record_tool_calls(["Read"])
-        self.assertEqual(m.consecutive_readonly_turns, 5)
+        self.assertEqual(m.consecutive_no_write_turns, 5)
         m.record_tool_calls(["Grep", "Glob"])
-        self.assertEqual(m.consecutive_readonly_turns, 6)
+        self.assertEqual(m.consecutive_no_write_turns, 6)
-    def test_non_readonly_tool_resets_the_streak(self):
+    def test_bash_and_webfetch_turns_increment_the_streak(self):
+        """The core fix: exploration via Bash/WebFetch/Agent — tools the old
+        read-only allowlist did not recognize — must grow the streak. The
+        old logic reset on every such turn, so the streak never built."""
+        m = proxy.SessionMonitor(context_window=131072)
+        m.record_tool_calls(["Bash"])
+        m.record_tool_calls(["WebFetch"])
+        m.record_tool_calls(["Agent"])
+        m.record_tool_calls(["Read", "Bash"])  # mixed exploration, no write
+        self.assertEqual(m.consecutive_no_write_turns, 4)
+    def test_write_tool_resets_the_streak(self):
         """A turn using a write/edit tool means the model converged toward
-        action — the streak resets to 0."""
+        output — the streak resets to 0."""
         m = proxy.SessionMonitor(context_window=131072)
         for _ in range(10):
-            m.record_tool_calls(["Read"])
-        self.assertEqual(m.consecutive_readonly_turns, 10)
+            m.record_tool_calls(["Bash"])
+        self.assertEqual(m.consecutive_no_write_turns, 10)
         m.record_tool_calls(["Write"])
-        self.assertEqual(m.consecutive_readonly_turns, 0)
+        self.assertEqual(m.consecutive_no_write_turns, 0)
     def test_mixed_turn_with_one_write_resets(self):
-        """A turn mixing read-only and a write tool still counts as
-        converging — any non-read-only tool resets."""
+        """A turn mixing exploration and a write tool still counts as
+        converging — any write tool resets."""
         m = proxy.SessionMonitor(context_window=131072)
         for _ in range(10):
             m.record_tool_calls(["Read"])
         m.record_tool_calls(["Read", "Edit"])
-        self.assertEqual(m.consecutive_readonly_turns, 0)
+        self.assertEqual(m.consecutive_no_write_turns, 0)
+    def test_no_tool_turn_leaves_streak_unchanged(self):
+        """A plain-text turn (no tool calls) is neither exploration nor a
+        write — it must leave the streak untouched, not reset it."""
+        m = proxy.SessionMonitor(context_window=131072)
+        for _ in range(7):
+            m.record_tool_calls(["Bash"])
+        self.assertEqual(m.consecutive_no_write_turns, 7)
+        m.record_tool_calls([])  # plain-text turn
+        self.assertEqual(m.consecutive_no_write_turns, 7)
+        m.record_tool_calls(["Read"])
+        self.assertEqual(m.consecutive_no_write_turns, 8)
     def test_no_injection_below_threshold(self):
         proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
         m = proxy.SessionMonitor(context_window=131072)
-        m.consecutive_readonly_turns = 39
+        m.consecutive_no_write_turns = 39
         body = {"messages": [{"role": "user", "content": "go"}]}
         proxy._maybe_inject_recon_convergence(body, m)
         self.assertEqual(len(body["messages"]), 1)
@@ -5425,7 +5451,7 @@ class TestReconConvergence(unittest.TestCase):
     def test_firm_directive_at_threshold(self):
         proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
         m = proxy.SessionMonitor(context_window=131072)
-        m.consecutive_readonly_turns = 45
+        m.consecutive_no_write_turns = 45
         m.last_input_tokens = 120000
         body = {"messages": [{"role": "user", "content": "go"}]}
         proxy._maybe_inject_recon_convergence(body, m)
@@ -5438,7 +5464,7 @@ class TestReconConvergence(unittest.TestCase):
         """Once the streak is 2x over threshold, escalate to a hard STOP."""
         proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
         m = proxy.SessionMonitor(context_window=131072)
-        m.consecutive_readonly_turns = 80
+        m.consecutive_no_write_turns = 80
         m.last_input_tokens = 250000  # over budget — the real-incident shape
         body = {"messages": [{"role": "user", "content": "go"}]}
         proxy._maybe_inject_recon_convergence(body, m)
@@ -5448,11 +5474,89 @@ class TestReconConvergence(unittest.TestCase):
     def test_disabled_when_threshold_zero(self):
         proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 0
         m = proxy.SessionMonitor(context_window=131072)
-        m.consecutive_readonly_turns = 500
+        m.consecutive_no_write_turns = 500
         body = {"messages": [{"role": "user", "content": "go"}]}
         proxy._maybe_inject_recon_convergence(body, m)
         self.assertEqual(len(body["messages"]), 1)
+    @staticmethod
+    def _tool(name: str) -> dict:
+        return {"type": "function", "function": {"name": name, "description": f"{name} tool"}}
+    def test_dropped_write_tool_is_restored_when_directive_fires(self):
+        """The core fix: if narrowing left no write tool in the request,
+        a firing directive re-injects it from the full pre-narrowing set."""
+        proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
+        m = proxy.SessionMonitor(context_window=131072)
+        m.consecutive_no_write_turns = 45
+        # narrowed toolset — exploration tools only, no write tool
+        body = {
+            "messages": [{"role": "user", "content": "go"}],
+            "tools": [self._tool("Read"), self._tool("Grep"), self._tool("Bash")],
+        }
+        # full pre-narrowing set DID include a write tool
+        full = body["tools"] + [self._tool("Edit")]
+        proxy._maybe_inject_recon_convergence(body, m, full)
+        names = [t["function"]["name"] for t in body["tools"]]
+        self.assertIn("Edit", names)
+    def test_present_write_tool_not_duplicated(self):
+        """If a write tool already survived narrowing, it is not added twice."""
+        proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
+        m = proxy.SessionMonitor(context_window=131072)
+        m.consecutive_no_write_turns = 45
+        body = {
+            "messages": [{"role": "user", "content": "go"}],
+            "tools": [self._tool("Read"), self._tool("Edit")],
+        }
+        full = list(body["tools"])
+        proxy._maybe_inject_recon_convergence(body, m, full)
+        names = [t["function"]["name"] for t in body["tools"]]
+        self.assertEqual(names.count("Edit"), 1)
+    def test_no_write_tool_anywhere_is_safe(self):
+        """A recon agent whose toolset has no write tool at all: nothing to
+        restore, no crash."""
+        proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
+        m = proxy.SessionMonitor(context_window=131072)
+        m.consecutive_no_write_turns = 45
+        body = {
+            "messages": [{"role": "user", "content": "go"}],
+            "tools": [self._tool("Read"), self._tool("Bash")],
+        }
+        proxy._maybe_inject_recon_convergence(body, m, list(body["tools"]))
+        names = [t["function"]["name"] for t in body["tools"]]
+        self.assertEqual(names, ["Read", "Bash"])
+    def test_full_tools_omitted_is_safe(self):
+        """Called without full_tools (default None) — directive still fires,
+        no tool restoration attempted, no crash."""
+        proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
+        m = proxy.SessionMonitor(context_window=131072)
+        m.consecutive_no_write_turns = 45
+        body = {
+            "messages": [{"role": "user", "content": "go"}],
+            "tools": [self._tool("Read")],
+        }
+        proxy._maybe_inject_recon_convergence(body, m)
+        self.assertEqual(len(body["messages"]), 2)
+        self.assertEqual([t["function"]["name"] for t in body["tools"]], ["Read"])
+    def test_no_restore_below_threshold(self):
+        """Below threshold the directive does not fire, so no write tool is
+        restored even if narrowing dropped one."""
+        proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
+        m = proxy.SessionMonitor(context_window=131072)
+        m.consecutive_no_write_turns = 39
+        body = {
+            "messages": [{"role": "user", "content": "go"}],
+            "tools": [self._tool("Read")],
+        }
+        full = body["tools"] + [self._tool("Write")]
+        proxy._maybe_inject_recon_convergence(body, m, full)
+        names = [t["function"]["name"] for t in body["tools"]]
+        self.assertEqual(names, ["Read"])
 class TestPrunerRework(unittest.TestCase):
     """Tests for the reworked context pruner (B2 + B3): contiguous