@miller-tech/uap 1.20.45 → 1.20.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.45",
3
+ "version": "1.20.46",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1523,6 +1523,14 @@ PROXY_SLOT_SAVE_DIR = os.environ.get(
1523
1523
  PROXY_SLOT_CACHE_MAX_FILES = int(os.environ.get("PROXY_SLOT_CACHE_MAX_FILES", "12"))
1524
1524
  # llama-server slot id — always 0 under --parallel 1.
1525
1525
  PROXY_SLOT_ID = int(os.environ.get("PROXY_SLOT_ID", "0"))
1526
+ # HTTP timeouts for the /slots save|restore calls. A large session's KV
1527
+ # state (131k ctx) is ~1 GiB; serializing it to / loading it from disk on
1528
+ # a slower model (e.g. Qwen3.6-35B-A3B MoE) can exceed the original
1529
+ # hardcoded 60s/120s, surfacing as `SLOT SAVE/RESTORE error` with an empty
1530
+ # httpx-timeout exception. Restore is given more headroom than save since
1531
+ # it also waits on the disk read + KV reload.
1532
+ PROXY_SLOT_SAVE_TIMEOUT = float(os.environ.get("PROXY_SLOT_SAVE_TIMEOUT", "180"))
1533
+ PROXY_SLOT_RESTORE_TIMEOUT = float(os.environ.get("PROXY_SLOT_RESTORE_TIMEOUT", "300"))
1526
1534
 
1527
1535
  # Module state. Mutated only inside the upstream_semaphore-held section
1528
1536
  # (_post_with_retry), so no extra lock is needed.
@@ -1556,7 +1564,9 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1556
1564
  fn = _slot_filename(session_id)
1557
1565
  url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=save"
1558
1566
  try:
1559
- resp = await client.post(url, json={"filename": fn}, timeout=60.0)
1567
+ resp = await client.post(
1568
+ url, json={"filename": fn}, timeout=PROXY_SLOT_SAVE_TIMEOUT
1569
+ )
1560
1570
  if resp.status_code == 200:
1561
1571
  logger.info("SLOT SAVE: session=%s -> %s", session_id, fn)
1562
1572
  return True
@@ -1565,7 +1575,12 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1565
1575
  session_id, resp.status_code, resp.text[:200],
1566
1576
  )
1567
1577
  except Exception as exc:
1568
- logger.warning("SLOT SAVE error: session=%s %s", session_id, exc)
1578
+ # Include the exception TYPE httpx timeout exceptions stringify
1579
+ # to "" and an empty message log line is undiagnosable.
1580
+ logger.warning(
1581
+ "SLOT SAVE error: session=%s %s: %s",
1582
+ session_id, type(exc).__name__, exc,
1583
+ )
1569
1584
  return False
1570
1585
 
1571
1586
 
@@ -1581,7 +1596,9 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1581
1596
  return False
1582
1597
  url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=restore"
1583
1598
  try:
1584
- resp = await client.post(url, json={"filename": fn}, timeout=120.0)
1599
+ resp = await client.post(
1600
+ url, json={"filename": fn}, timeout=PROXY_SLOT_RESTORE_TIMEOUT
1601
+ )
1585
1602
  if resp.status_code == 200:
1586
1603
  logger.info("SLOT RESTORE: session=%s <- %s", session_id, fn)
1587
1604
  return True
@@ -1590,7 +1607,12 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1590
1607
  session_id, resp.status_code, resp.text[:200],
1591
1608
  )
1592
1609
  except Exception as exc:
1593
- logger.warning("SLOT RESTORE error: session=%s %s", session_id, exc)
1610
+ # Include the exception TYPE httpx timeout exceptions stringify
1611
+ # to "" and an empty message log line is undiagnosable.
1612
+ logger.warning(
1613
+ "SLOT RESTORE error: session=%s %s: %s",
1614
+ session_id, type(exc).__name__, exc,
1615
+ )
1594
1616
  return False
1595
1617
 
1596
1618
 
@@ -5212,14 +5212,14 @@ class TestThinkingBlockExtraction(unittest.TestCase):
5212
5212
 
5213
5213
 
5214
5214
  class _SlotFakeClient:
5215
- """Records POST calls for slot save/restore tests."""
5215
+ """Records POST calls (incl. the timeout kwarg) for slot tests."""
5216
5216
 
5217
5217
  def __init__(self, status_code=200):
5218
5218
  self.calls = []
5219
5219
  self._status = status_code
5220
5220
 
5221
5221
  async def post(self, url, json=None, timeout=None): # noqa: A002
5222
- self.calls.append({"url": url, "json": json})
5222
+ self.calls.append({"url": url, "json": json, "timeout": timeout})
5223
5223
  return _FakeResponse({}, status_code=self._status)
5224
5224
 
5225
5225
 
@@ -5240,6 +5240,8 @@ class TestSlotSaveRestore(unittest.TestCase):
5240
5240
  "PROXY_SLOT_SAVE_RESTORE",
5241
5241
  "PROXY_SLOT_CACHE_MAX_FILES",
5242
5242
  "PROXY_SLOT_ID",
5243
+ "PROXY_SLOT_SAVE_TIMEOUT",
5244
+ "PROXY_SLOT_RESTORE_TIMEOUT",
5243
5245
  "_slot_owner_session",
5244
5246
  )
5245
5247
  }
@@ -5326,6 +5328,29 @@ class TestSlotSaveRestore(unittest.TestCase):
5326
5328
  self.assertIn("fp:aaaa", proxy._slot_lru)
5327
5329
  self.assertIn("fp:bbbb", proxy._slot_lru)
5328
5330
 
5331
+ def test_slot_timeout_defaults_are_sane(self):
5332
+ """Slot save/restore HTTP timeouts must be configurable and large
5333
+ enough for a slow model's ~1 GiB KV serialization. Restore gets more
5334
+ headroom than save (it also waits on disk read + KV reload)."""
5335
+ self.assertIsInstance(proxy.PROXY_SLOT_SAVE_TIMEOUT, float)
5336
+ self.assertIsInstance(proxy.PROXY_SLOT_RESTORE_TIMEOUT, float)
5337
+ # Both above the original hardcoded 60s/120s that were too tight
5338
+ # for the 35B-A3B (surfaced as empty-message SLOT SAVE/RESTORE errors).
5339
+ self.assertGreaterEqual(proxy.PROXY_SLOT_SAVE_TIMEOUT, 120.0)
5340
+ self.assertGreaterEqual(proxy.PROXY_SLOT_RESTORE_TIMEOUT, 180.0)
5341
+ self.assertGreaterEqual(
5342
+ proxy.PROXY_SLOT_RESTORE_TIMEOUT, proxy.PROXY_SLOT_SAVE_TIMEOUT
5343
+ )
5344
+
5345
+ def test_save_slot_passes_configured_timeout(self):
5346
+ """_save_slot must hand its httpx POST the configured
5347
+ PROXY_SLOT_SAVE_TIMEOUT, not a hardcoded value."""
5348
+ proxy.PROXY_SLOT_SAVE_TIMEOUT = 222.0
5349
+ client = _SlotFakeClient(status_code=200)
5350
+ asyncio.run(proxy._save_slot(client, "fp:timeoutcheck"))
5351
+ self.assertEqual(len(client.calls), 1)
5352
+ self.assertEqual(client.calls[0]["timeout"], 222.0)
5353
+
5329
5354
  def test_evict_slot_files_respects_lru_cap_and_owner(self):
5330
5355
  """LRU eviction removes oldest entries beyond the cap but never the
5331
5356
  session currently owning the slot."""