npm - @miller-tech/uap - Versions diffs - 1.20.44 → 1.20.46 - Mend

@miller-tech/uap 1.20.44 → 1.20.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tools/agents/scripts/anthropic_proxy.py +42 -7
package/tools/agents/tests/test_anthropic_proxy_streaming.py +100 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@miller-tech/uap",
-  "version": "1.20.44",
+  "version": "1.20.46",
   "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
   "type": "module",
   "main": "dist/index.js",

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -1523,6 +1523,14 @@ PROXY_SLOT_SAVE_DIR = os.environ.get(
 PROXY_SLOT_CACHE_MAX_FILES = int(os.environ.get("PROXY_SLOT_CACHE_MAX_FILES", "12"))
 # llama-server slot id — always 0 under --parallel 1.
 PROXY_SLOT_ID = int(os.environ.get("PROXY_SLOT_ID", "0"))
+# HTTP timeouts for the /slots save|restore calls. A large session's KV
+# state (131k ctx) is ~1 GiB; serializing it to / loading it from disk on
+# a slower model (e.g. Qwen3.6-35B-A3B MoE) can exceed the original
+# hardcoded 60s/120s, surfacing as `SLOT SAVE/RESTORE error` with an empty
+# httpx-timeout exception. Restore is given more headroom than save since
+# it also waits on the disk read + KV reload.
+PROXY_SLOT_SAVE_TIMEOUT = float(os.environ.get("PROXY_SLOT_SAVE_TIMEOUT", "180"))
+PROXY_SLOT_RESTORE_TIMEOUT = float(os.environ.get("PROXY_SLOT_RESTORE_TIMEOUT", "300"))
 # Module state. Mutated only inside the upstream_semaphore-held section
 # (_post_with_retry), so no extra lock is needed.
@@ -1556,7 +1564,9 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
     fn = _slot_filename(session_id)
     url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=save"
     try:
-        resp = await client.post(url, json={"filename": fn}, timeout=60.0)
+        resp = await client.post(
+            url, json={"filename": fn}, timeout=PROXY_SLOT_SAVE_TIMEOUT
+        )
         if resp.status_code == 200:
             logger.info("SLOT SAVE: session=%s -> %s", session_id, fn)
             return True
@@ -1565,7 +1575,12 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
             session_id, resp.status_code, resp.text[:200],
         )
     except Exception as exc:
-        logger.warning("SLOT SAVE error: session=%s %s", session_id, exc)
+        # Include the exception TYPE — httpx timeout exceptions stringify
+        # to "" and an empty message log line is undiagnosable.
+        logger.warning(
+            "SLOT SAVE error: session=%s %s: %s",
+            session_id, type(exc).__name__, exc,
+        )
     return False
@@ -1581,7 +1596,9 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
         return False
     url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=restore"
     try:
-        resp = await client.post(url, json={"filename": fn}, timeout=120.0)
+        resp = await client.post(
+            url, json={"filename": fn}, timeout=PROXY_SLOT_RESTORE_TIMEOUT
+        )
         if resp.status_code == 200:
             logger.info("SLOT RESTORE: session=%s <- %s", session_id, fn)
             return True
@@ -1590,7 +1607,12 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
             session_id, resp.status_code, resp.text[:200],
         )
     except Exception as exc:
-        logger.warning("SLOT RESTORE error: session=%s %s", session_id, exc)
+        # Include the exception TYPE — httpx timeout exceptions stringify
+        # to "" and an empty message log line is undiagnosable.
+        logger.warning(
+            "SLOT RESTORE error: session=%s %s: %s",
+            session_id, type(exc).__name__, exc,
+        )
     return False
@@ -2472,8 +2494,21 @@ def _completion_blockers(
 def _sanitize_tool_schema_for_llama(schema):
     """Remove JSON Schema keywords that generate unsupported regex grammar.
-    llama.cpp's tool grammar generator can fail on regex-heavy schema fields
-    such as "pattern" and "patternProperties" (for example "\\w").
+    llama.cpp's tool grammar generator can fail on regex-heavy schema fields:
+    - "pattern" / "patternProperties" — regex strings (e.g. "\\w").
+    - "format" — string formats. llama.cpp's json-schema-to-grammar turns
+      "format": "date" / "date-time" / "time" / "uuid" into grammar rules
+      built from `\\d`, which its own GBNF parser then rejects with
+      `error parsing grammar: unknown escape at \\d...` → `failed to parse
+      grammar`. Observed on MCP tools with date fields (Atlassian
+      getJiraIssue, tempo bulkCreateWorklogs). "format" is an advisory
+      annotation — dropping it just leaves the field as an unconstrained
+      string in the tool-call grammar, which is correct behaviour.
+    All three are stripped only when they appear as schema *keywords*, not
+    when they are property *names* (a tool may legitimately have a parameter
+    literally called "pattern" or "format").
     """
     removed = 0
@@ -2486,7 +2521,7 @@ def _sanitize_tool_schema_for_llama(schema):
             for key, value in node.items():
                 key_is_property_name = parent_key in property_map_keys
                 if (
-                    key == "pattern"
+                    key in ("pattern", "format")
                     and isinstance(value, str)
                     and not key_is_property_name
                 ):

package/tools/agents/tests/test_anthropic_proxy_streaming.py CHANGED Viewed

@@ -284,6 +284,79 @@ class TestToolSchemaSanitization(unittest.TestCase):
         self.assertIn("pattern", params["required"])
         self.assertEqual(params["properties"]["pattern"]["type"], "string")
+    def test_convert_tools_strips_format_fields(self):
+        """A string field with "format": "date" must have format stripped.
+        llama.cpp's json-schema-to-grammar turns format:date/date-time/etc.
+        into `\\d`-based grammar rules that its own GBNF parser then rejects
+        ('unknown escape at \\d' -> 'failed to parse grammar'). Observed on
+        MCP tools like tempo bulkCreateWorklogs (a worklogEntries[].date
+        field) and Atlassian getJiraIssue."""
+        anthropic_tools = [
+            {
+                "name": "bulkCreateWorklogs",
+                "description": "test",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {
+                        "worklogEntries": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "date": {
+                                        "type": "string",
+                                        "format": "date",
+                                    },
+                                    "started": {
+                                        "type": "string",
+                                        "format": "date-time",
+                                    },
+                                },
+                            },
+                        }
+                    },
+                },
+            }
+        ]
+        converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
+        item = converted[0]["function"]["parameters"]["properties"][
+            "worklogEntries"
+        ]["items"]
+        self.assertNotIn("format", item["properties"]["date"])
+        self.assertNotIn("format", item["properties"]["started"])
+        # The field itself and its type survive — only the format hint goes.
+        self.assertEqual(item["properties"]["date"]["type"], "string")
+    def test_convert_tools_keeps_property_named_format(self):
+        """A tool parameter literally named "format" (e.g. an output-format
+        selector) must NOT be stripped — only the format *keyword* is."""
+        anthropic_tools = [
+            {
+                "name": "ExportTool",
+                "description": "test",
+                "input_schema": {
+                    "type": "object",
+                    "required": ["format"],
+                    "properties": {
+                        "format": {
+                            "type": "string",
+                            "enum": ["json", "csv", "yaml"],
+                            "description": "Output format",
+                        },
+                    },
+                },
+            }
+        ]
+        converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
+        params = converted[0]["function"]["parameters"]
+        self.assertIn("format", params["required"])
+        self.assertEqual(params["properties"]["format"]["type"], "string")
+        self.assertEqual(
+            params["properties"]["format"]["enum"], ["json", "csv", "yaml"]
+        )
 class TestStreamGuardedPathSelection(unittest.TestCase):
     def test_required_tool_turn_uses_guarded_non_stream(self):
@@ -5139,14 +5212,14 @@ class TestThinkingBlockExtraction(unittest.TestCase):
 class _SlotFakeClient:
-    """Records POST calls for slot save/restore tests."""
+    """Records POST calls (incl. the timeout kwarg) for slot tests."""
     def __init__(self, status_code=200):
         self.calls = []
         self._status = status_code
     async def post(self, url, json=None, timeout=None):  # noqa: A002
-        self.calls.append({"url": url, "json": json})
+        self.calls.append({"url": url, "json": json, "timeout": timeout})
         return _FakeResponse({}, status_code=self._status)
@@ -5167,6 +5240,8 @@ class TestSlotSaveRestore(unittest.TestCase):
                 "PROXY_SLOT_SAVE_RESTORE",
                 "PROXY_SLOT_CACHE_MAX_FILES",
                 "PROXY_SLOT_ID",
+                "PROXY_SLOT_SAVE_TIMEOUT",
+                "PROXY_SLOT_RESTORE_TIMEOUT",
                 "_slot_owner_session",
             )
         }
@@ -5253,6 +5328,29 @@ class TestSlotSaveRestore(unittest.TestCase):
         self.assertIn("fp:aaaa", proxy._slot_lru)
         self.assertIn("fp:bbbb", proxy._slot_lru)
+    def test_slot_timeout_defaults_are_sane(self):
+        """Slot save/restore HTTP timeouts must be configurable and large
+        enough for a slow model's ~1 GiB KV serialization. Restore gets more
+        headroom than save (it also waits on disk read + KV reload)."""
+        self.assertIsInstance(proxy.PROXY_SLOT_SAVE_TIMEOUT, float)
+        self.assertIsInstance(proxy.PROXY_SLOT_RESTORE_TIMEOUT, float)
+        # Both above the original hardcoded 60s/120s that were too tight
+        # for the 35B-A3B (surfaced as empty-message SLOT SAVE/RESTORE errors).
+        self.assertGreaterEqual(proxy.PROXY_SLOT_SAVE_TIMEOUT, 120.0)
+        self.assertGreaterEqual(proxy.PROXY_SLOT_RESTORE_TIMEOUT, 180.0)
+        self.assertGreaterEqual(
+            proxy.PROXY_SLOT_RESTORE_TIMEOUT, proxy.PROXY_SLOT_SAVE_TIMEOUT
+        )
+    def test_save_slot_passes_configured_timeout(self):
+        """_save_slot must hand its httpx POST the configured
+        PROXY_SLOT_SAVE_TIMEOUT, not a hardcoded value."""
+        proxy.PROXY_SLOT_SAVE_TIMEOUT = 222.0
+        client = _SlotFakeClient(status_code=200)
+        asyncio.run(proxy._save_slot(client, "fp:timeoutcheck"))
+        self.assertEqual(len(client.calls), 1)
+        self.assertEqual(client.calls[0]["timeout"], 222.0)
     def test_evict_slot_files_respects_lru_cap_and_owner(self):
         """LRU eviction removes oldest entries beyond the cap but never the
         session currently owning the slot."""