npm - @miller-tech/uap - Versions diffs - 1.14.1 → 1.15.1 - Mend

@miller-tech/uap 1.14.1 → 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tools/agents/scripts/anthropic_proxy.py +178 -21
package/tools/agents/tests/test_anthropic_proxy_streaming.py +111 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@miller-tech/uap",
-  "version": "1.14.1",
+  "version": "1.15.1",
   "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
   "type": "module",
   "main": "dist/index.js",

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -162,7 +162,7 @@ PROXY_MALFORMED_TOOL_GUARDRAIL = os.environ.get(
     "no",
 }
 PROXY_MALFORMED_TOOL_RETRY_MAX = int(
-    os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "1")
+    os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "2")
 )
 PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS = int(
     os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", "2048")
@@ -203,6 +203,20 @@ PROXY_SESSION_CONTAMINATION_KEEP_LAST = int(
 PROXY_AGENTIC_SUPPLEMENT_MODE = (
     os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
 )
+PROXY_ANALYSIS_ONLY_ROUTE = os.environ.get(
+    "PROXY_ANALYSIS_ONLY_ROUTE", "off"
+).lower() not in {
+    "0",
+    "false",
+    "off",
+    "no",
+}
+PROXY_ANALYSIS_ONLY_MIN_TOOLS = int(
+    os.environ.get("PROXY_ANALYSIS_ONLY_MIN_TOOLS", "12")
+)
+PROXY_ANALYSIS_ONLY_MAX_MESSAGES = int(
+    os.environ.get("PROXY_ANALYSIS_ONLY_MAX_MESSAGES", "2")
+)
 # ---------------------------------------------------------------------------
 # Logging
@@ -549,8 +563,9 @@ def estimate_total_tokens(anthropic_body: dict) -> int:
             if isinstance(block, dict) and block.get("type") == "text":
                 tokens += estimate_tokens(block.get("text", ""))
-    # Agentic supplement tokens (always injected)
-    tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
+    # Agentic supplement tokens (only when tool mode is active)
+    if _has_tool_definitions(anthropic_body):
+        tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
     # Messages
     for msg in anthropic_body.get("messages", []):
@@ -600,7 +615,8 @@ def prune_conversation(
         for block in system:
             if isinstance(block, dict) and block.get("type") == "text":
                 overhead_tokens += estimate_tokens(block.get("text", ""))
-    overhead_tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
+    if _has_tool_definitions(anthropic_body):
+        overhead_tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
     tools = anthropic_body.get("tools", [])
     if tools:
         overhead_tokens += estimate_tokens(json.dumps(tools))
@@ -768,7 +784,7 @@ async def lifespan(app: FastAPI):
         _resolve_prune_target_fraction() * 100,
     )
     logger.info(
-        "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d)",
+        "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
         PROXY_MALFORMED_TOOL_GUARDRAIL,
         PROXY_MALFORMED_TOOL_STREAM_STRICT,
         PROXY_FORCE_NON_STREAM,
@@ -776,6 +792,9 @@ async def lifespan(app: FastAPI):
         PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
         PROXY_SESSION_CONTAMINATION_BREAKER,
         PROXY_SESSION_CONTAMINATION_THRESHOLD,
+        PROXY_ANALYSIS_ONLY_ROUTE,
+        PROXY_ANALYSIS_ONLY_MIN_TOOLS,
+        PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
     )
     yield
@@ -879,6 +898,112 @@ def _extract_text(content) -> str:
     return str(content)
+def _has_tool_definitions(anthropic_body: dict) -> bool:
+    tools = anthropic_body.get("tools")
+    return isinstance(tools, list) and len(tools) > 0
+def _message_has_tool_result(content) -> bool:
+    return isinstance(content, list) and any(
+        isinstance(block, dict) and block.get("type") == "tool_result"
+        for block in content
+    )
+def _last_user_text(anthropic_body: dict) -> str:
+    for msg in reversed(anthropic_body.get("messages", [])):
+        if msg.get("role") == "user":
+            return _extract_text(msg.get("content", "")).strip().lower()
+    return ""
+def _is_analysis_only_prompt(text: str) -> bool:
+    if not text:
+        return False
+    analysis_markers = (
+        "analy",
+        "review",
+        "audit",
+        "summar",
+        "explain",
+        "plan",
+        "recommend",
+        "assess",
+        "compare",
+        "investigate",
+        "diagnose",
+    )
+    action_markers = (
+        "fix",
+        "edit",
+        "write",
+        "create",
+        "implement",
+        "patch",
+        "change",
+        "update",
+        "run ",
+        "execute",
+        "command",
+        "use tool",
+        "call tool",
+        "apply",
+        "commit",
+        "push",
+        "merge",
+        "publish",
+        "deploy",
+        "test",
+        "build",
+        "refactor",
+        "rename",
+        "delete",
+        "install",
+    )
+    has_analysis = any(marker in text for marker in analysis_markers)
+    has_action = any(marker in text for marker in action_markers)
+    return has_analysis and not has_action
+def _should_route_analysis_without_tools(anthropic_body: dict) -> bool:
+    if not PROXY_ANALYSIS_ONLY_ROUTE:
+        return False
+    tools = anthropic_body.get("tools")
+    if not isinstance(tools, list) or len(tools) < max(
+        1, PROXY_ANALYSIS_ONLY_MIN_TOOLS
+    ):
+        return False
+    messages = anthropic_body.get("messages", [])
+    if not isinstance(messages, list) or not messages:
+        return False
+    if len(messages) > max(1, PROXY_ANALYSIS_ONLY_MAX_MESSAGES):
+        return False
+    if any(msg.get("role") == "assistant" for msg in messages):
+        return False
+    if any(_message_has_tool_result(msg.get("content")) for msg in messages):
+        return False
+    return _is_analysis_only_prompt(_last_user_text(anthropic_body))
+def _maybe_route_analysis_without_tools(anthropic_body: dict) -> tuple[dict, int]:
+    if not _should_route_analysis_without_tools(anthropic_body):
+        return anthropic_body, 0
+    tools = anthropic_body.get("tools")
+    removed = len(tools) if isinstance(tools, list) else 0
+    updated = dict(anthropic_body)
+    updated.pop("tools", None)
+    return updated, removed
 _AGENTIC_SYSTEM_SUPPLEMENT_LEGACY = (
     "\n\n<agentic-protocol>\n"
     "You are operating in an agentic coding loop with tool access. Follow these rules:\n"
@@ -1076,19 +1201,24 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
         "stream": anthropic_body.get("stream", False),
     }
-    # Inject agentic protocol instructions into the system message so
-    # the model knows it must use tools to complete work, not just explain.
-    if openai_body["messages"] and openai_body["messages"][0].get("role") == "system":
-        openai_body["messages"][0]["content"] += _AGENTIC_SYSTEM_SUPPLEMENT
-    else:
-        # No system message from the client; inject one.
-        openai_body["messages"].insert(
-            0,
-            {
-                "role": "system",
-                "content": _AGENTIC_SYSTEM_SUPPLEMENT.strip(),
-            },
-        )
+    has_tools = _has_tool_definitions(anthropic_body)
+    # Inject agentic protocol instructions only for tool-enabled turns.
+    if has_tools:
+        if (
+            openai_body["messages"]
+            and openai_body["messages"][0].get("role") == "system"
+        ):
+            openai_body["messages"][0]["content"] += _AGENTIC_SYSTEM_SUPPLEMENT
+        else:
+            # No system message from the client; inject one.
+            openai_body["messages"].insert(
+                0,
+                {
+                    "role": "system",
+                    "content": _AGENTIC_SYSTEM_SUPPLEMENT.strip(),
+                },
+            )
     if "max_tokens" in anthropic_body:
         # Enforce configurable minimum floor for thinking mode: model needs
@@ -1137,7 +1267,7 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
         openai_body["stop"] = anthropic_body["stop_sequences"]
     # Convert Anthropic tools to OpenAI function-calling tools
-    if "tools" in anthropic_body:
+    if has_tools:
         openai_body["tools"] = _convert_anthropic_tools_to_openai(
             anthropic_body.get("tools", [])
         )
@@ -1517,6 +1647,13 @@ def _looks_malformed_tool_payload(text: str) -> bool:
         return False
     lowered = text.lower()
+    apology_markers = (
+        "i could not produce a valid tool-call format in this turn",
+        "i will issue exactly one valid tool call next",
+    )
+    if any(marker in lowered for marker in apology_markers):
+        return True
     primary_markers = ("</parameter", "<parameter", "<tool_call", "<function=")
     if any(marker in lowered for marker in primary_markers):
         return True
@@ -1575,6 +1712,18 @@ def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict
     retry_body["tool_choice"] = "required"
     retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
+    malformed_retry_instruction = {
+        "role": "user",
+        "content": (
+            "Your previous response had invalid tool-call formatting. "
+            "Respond with exactly one valid tool call using the provided tools. "
+            "Do not output prose, markdown, XML tags, or schema snippets."
+        ),
+    }
+    existing_messages = retry_body.get("messages")
+    if isinstance(existing_messages, list) and existing_messages:
+        retry_body["messages"] = [*existing_messages, malformed_retry_instruction]
     if PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
         current_max = int(
             retry_body.get("max_tokens", PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS)
@@ -1608,8 +1757,8 @@ def _build_clean_guardrail_openai_response(openai_resp: dict) -> dict:
                 "message": {
                     "role": "assistant",
                     "content": (
-                        "I could not produce a valid tool-call format in this turn. "
-                        "Please continue; I will issue exactly one valid tool call next."
+                        "Tool-call formatting failed after automatic retries. "
+                        "Please retry the same request."
                     ),
                 },
             }
@@ -2200,6 +2349,14 @@ async def messages(request: Request):
     last_session_id = session_id
     body = _maybe_apply_session_contamination_breaker(body, monitor, session_id)
+    body, analysis_tools_removed = _maybe_route_analysis_without_tools(body)
+    if analysis_tools_removed > 0:
+        monitor.consecutive_forced_count = 0
+        monitor.no_progress_streak = 0
+        logger.info(
+            "ANALYSIS ROUTE: disabled %d tools for analysis-only prompt",
+            analysis_tools_removed,
+        )
     # Debug: log request summary
     n_messages = len(body.get("messages", []))

package/tools/agents/tests/test_anthropic_proxy_streaming.py CHANGED Viewed

@@ -164,6 +164,27 @@ class TestMalformedToolGuardrail(unittest.TestCase):
         }
         self.assertTrue(proxy._is_malformed_tool_response(openai_resp, anthropic_body))
+    def test_detects_tool_call_apology_text_as_malformed(self):
+        openai_resp = {
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "message": {
+                        "content": (
+                            "I could not produce a valid tool-call format in this turn. "
+                            "Please continue; I will issue exactly one valid tool call next."
+                        ),
+                        "tool_calls": [],
+                    },
+                }
+            ]
+        }
+        anthropic_body = {
+            "tools": [{"name": "Read", "input_schema": {"type": "object"}}],
+            "messages": [{"role": "user", "content": "fix this"}],
+        }
+        self.assertTrue(proxy._is_malformed_tool_response(openai_resp, anthropic_body))
     def test_clean_tool_call_response_is_not_malformed(self):
         openai_resp = {
             "choices": [
@@ -385,6 +406,7 @@ class TestMalformedToolGuardrail(unittest.TestCase):
             openai_body = {
                 "model": "test",
                 "max_tokens": 4000,
+                "messages": [{"role": "user", "content": "fix the issue"}],
                 "tools": [{"type": "function", "function": {"name": "Read"}}],
             }
             anthropic_body = {
@@ -402,11 +424,24 @@ class TestMalformedToolGuardrail(unittest.TestCase):
             self.assertEqual(retry["max_tokens"], 512)
             self.assertEqual(len(retry["tools"]), 3)
             self.assertFalse(retry["enable_thinking"])
+            self.assertEqual(retry["messages"][-1]["role"], "user")
+            self.assertIn(
+                "invalid tool-call formatting",
+                retry["messages"][-1]["content"],
+            )
         finally:
             setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", old_cap)
             setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE", old_temp)
             setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
+    def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
+        guardrail = proxy._build_clean_guardrail_openai_response(
+            {"model": "test-model"}
+        )
+        text = guardrail["choices"][0]["message"]["content"]
+        self.assertIn("Please retry the same request", text)
+        self.assertNotIn("I will issue exactly one valid tool call next", text)
 class TestToolTurnControls(unittest.TestCase):
     def test_tool_narrowing_reduces_tool_count(self):
@@ -483,6 +518,82 @@ class TestToolTurnControls(unittest.TestCase):
         finally:
             setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
+    def test_no_tools_does_not_inject_agentic_system_message(self):
+        body = {
+            "model": "test",
+            "messages": [{"role": "user", "content": "analyze architecture"}],
+        }
+        openai = proxy.build_openai_request(
+            body, proxy.SessionMonitor(context_window=262144)
+        )
+        self.assertEqual(openai["messages"][0]["role"], "user")
+        self.assertNotIn("tools", openai)
+    def test_analysis_only_route_removes_tools(self):
+        old_route = getattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE")
+        old_min_tools = getattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS")
+        old_max_messages = getattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES")
+        try:
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", True)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", 4)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", 2)
+            body = {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "analyze lifecycle and plan options to improve performance and compliance",
+                    }
+                ],
+                "tools": [
+                    {"name": "Read", "input_schema": {"type": "object"}},
+                    {"name": "Edit", "input_schema": {"type": "object"}},
+                    {"name": "Write", "input_schema": {"type": "object"}},
+                    {"name": "Bash", "input_schema": {"type": "object"}},
+                ],
+            }
+            updated, removed = proxy._maybe_route_analysis_without_tools(body)
+            self.assertEqual(removed, 4)
+            self.assertNotIn("tools", updated)
+        finally:
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", old_route)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", old_min_tools)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", old_max_messages)
+    def test_analysis_only_route_keeps_tools_for_action_prompt(self):
+        old_route = getattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE")
+        old_min_tools = getattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS")
+        old_max_messages = getattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES")
+        try:
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", True)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", 4)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", 2)
+            body = {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "analyze failing run and fix the bug",
+                    }
+                ],
+                "tools": [
+                    {"name": "Read", "input_schema": {"type": "object"}},
+                    {"name": "Edit", "input_schema": {"type": "object"}},
+                    {"name": "Write", "input_schema": {"type": "object"}},
+                    {"name": "Bash", "input_schema": {"type": "object"}},
+                ],
+            }
+            updated, removed = proxy._maybe_route_analysis_without_tools(body)
+            self.assertEqual(removed, 0)
+            self.assertIn("tools", updated)
+        finally:
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", old_route)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", old_min_tools)
+            setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", old_max_messages)
 class TestSessionContaminationBreaker(unittest.TestCase):
     def test_contamination_breaker_trims_and_resets_streak(self):