@miller-tech/uap 1.20.44 → 1.20.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.44",
3
+ "version": "1.20.46",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1523,6 +1523,14 @@ PROXY_SLOT_SAVE_DIR = os.environ.get(
1523
1523
  PROXY_SLOT_CACHE_MAX_FILES = int(os.environ.get("PROXY_SLOT_CACHE_MAX_FILES", "12"))
1524
1524
  # llama-server slot id — always 0 under --parallel 1.
1525
1525
  PROXY_SLOT_ID = int(os.environ.get("PROXY_SLOT_ID", "0"))
1526
+ # HTTP timeouts for the /slots save|restore calls. A large session's KV
1527
+ # state (131k ctx) is ~1 GiB; serializing it to / loading it from disk on
1528
+ # a slower model (e.g. Qwen3.6-35B-A3B MoE) can exceed the original
1529
+ # hardcoded 60s/120s, surfacing as `SLOT SAVE/RESTORE error` with an empty
1530
+ # httpx-timeout exception. Restore is given more headroom than save since
1531
+ # it also waits on the disk read + KV reload.
1532
+ PROXY_SLOT_SAVE_TIMEOUT = float(os.environ.get("PROXY_SLOT_SAVE_TIMEOUT", "180"))
1533
+ PROXY_SLOT_RESTORE_TIMEOUT = float(os.environ.get("PROXY_SLOT_RESTORE_TIMEOUT", "300"))
1526
1534
 
1527
1535
  # Module state. Mutated only inside the upstream_semaphore-held section
1528
1536
  # (_post_with_retry), so no extra lock is needed.
@@ -1556,7 +1564,9 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1556
1564
  fn = _slot_filename(session_id)
1557
1565
  url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=save"
1558
1566
  try:
1559
- resp = await client.post(url, json={"filename": fn}, timeout=60.0)
1567
+ resp = await client.post(
1568
+ url, json={"filename": fn}, timeout=PROXY_SLOT_SAVE_TIMEOUT
1569
+ )
1560
1570
  if resp.status_code == 200:
1561
1571
  logger.info("SLOT SAVE: session=%s -> %s", session_id, fn)
1562
1572
  return True
@@ -1565,7 +1575,12 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1565
1575
  session_id, resp.status_code, resp.text[:200],
1566
1576
  )
1567
1577
  except Exception as exc:
1568
- logger.warning("SLOT SAVE error: session=%s %s", session_id, exc)
1578
+ # Include the exception TYPE httpx timeout exceptions stringify
1579
+ # to "" and an empty message log line is undiagnosable.
1580
+ logger.warning(
1581
+ "SLOT SAVE error: session=%s %s: %s",
1582
+ session_id, type(exc).__name__, exc,
1583
+ )
1569
1584
  return False
1570
1585
 
1571
1586
 
@@ -1581,7 +1596,9 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1581
1596
  return False
1582
1597
  url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=restore"
1583
1598
  try:
1584
- resp = await client.post(url, json={"filename": fn}, timeout=120.0)
1599
+ resp = await client.post(
1600
+ url, json={"filename": fn}, timeout=PROXY_SLOT_RESTORE_TIMEOUT
1601
+ )
1585
1602
  if resp.status_code == 200:
1586
1603
  logger.info("SLOT RESTORE: session=%s <- %s", session_id, fn)
1587
1604
  return True
@@ -1590,7 +1607,12 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
1590
1607
  session_id, resp.status_code, resp.text[:200],
1591
1608
  )
1592
1609
  except Exception as exc:
1593
- logger.warning("SLOT RESTORE error: session=%s %s", session_id, exc)
1610
+ # Include the exception TYPE httpx timeout exceptions stringify
1611
+ # to "" and an empty message log line is undiagnosable.
1612
+ logger.warning(
1613
+ "SLOT RESTORE error: session=%s %s: %s",
1614
+ session_id, type(exc).__name__, exc,
1615
+ )
1594
1616
  return False
1595
1617
 
1596
1618
 
@@ -2472,8 +2494,21 @@ def _completion_blockers(
2472
2494
  def _sanitize_tool_schema_for_llama(schema):
2473
2495
  """Remove JSON Schema keywords that generate unsupported regex grammar.
2474
2496
 
2475
- llama.cpp's tool grammar generator can fail on regex-heavy schema fields
2476
- such as "pattern" and "patternProperties" (for example "\\w").
2497
+ llama.cpp's tool grammar generator can fail on regex-heavy schema fields:
2498
+
2499
+ - "pattern" / "patternProperties" — regex strings (e.g. "\\w").
2500
+ - "format" — string formats. llama.cpp's json-schema-to-grammar turns
2501
+ "format": "date" / "date-time" / "time" / "uuid" into grammar rules
2502
+ built from `\\d`, which its own GBNF parser then rejects with
2503
+ `error parsing grammar: unknown escape at \\d...` → `failed to parse
2504
+ grammar`. Observed on MCP tools with date fields (Atlassian
2505
+ getJiraIssue, tempo bulkCreateWorklogs). "format" is an advisory
2506
+ annotation — dropping it just leaves the field as an unconstrained
2507
+ string in the tool-call grammar, which is correct behaviour.
2508
+
2509
+ All three are stripped only when they appear as schema *keywords*, not
2510
+ when they are property *names* (a tool may legitimately have a parameter
2511
+ literally called "pattern" or "format").
2477
2512
  """
2478
2513
 
2479
2514
  removed = 0
@@ -2486,7 +2521,7 @@ def _sanitize_tool_schema_for_llama(schema):
2486
2521
  for key, value in node.items():
2487
2522
  key_is_property_name = parent_key in property_map_keys
2488
2523
  if (
2489
- key == "pattern"
2524
+ key in ("pattern", "format")
2490
2525
  and isinstance(value, str)
2491
2526
  and not key_is_property_name
2492
2527
  ):
@@ -284,6 +284,79 @@ class TestToolSchemaSanitization(unittest.TestCase):
284
284
  self.assertIn("pattern", params["required"])
285
285
  self.assertEqual(params["properties"]["pattern"]["type"], "string")
286
286
 
287
+ def test_convert_tools_strips_format_fields(self):
288
+ """A string field with "format": "date" must have format stripped.
289
+ llama.cpp's json-schema-to-grammar turns format:date/date-time/etc.
290
+ into `\\d`-based grammar rules that its own GBNF parser then rejects
291
+ ('unknown escape at \\d' -> 'failed to parse grammar'). Observed on
292
+ MCP tools like tempo bulkCreateWorklogs (a worklogEntries[].date
293
+ field) and Atlassian getJiraIssue."""
294
+ anthropic_tools = [
295
+ {
296
+ "name": "bulkCreateWorklogs",
297
+ "description": "test",
298
+ "input_schema": {
299
+ "type": "object",
300
+ "properties": {
301
+ "worklogEntries": {
302
+ "type": "array",
303
+ "items": {
304
+ "type": "object",
305
+ "properties": {
306
+ "date": {
307
+ "type": "string",
308
+ "format": "date",
309
+ },
310
+ "started": {
311
+ "type": "string",
312
+ "format": "date-time",
313
+ },
314
+ },
315
+ },
316
+ }
317
+ },
318
+ },
319
+ }
320
+ ]
321
+
322
+ converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
323
+ item = converted[0]["function"]["parameters"]["properties"][
324
+ "worklogEntries"
325
+ ]["items"]
326
+ self.assertNotIn("format", item["properties"]["date"])
327
+ self.assertNotIn("format", item["properties"]["started"])
328
+ # The field itself and its type survive — only the format hint goes.
329
+ self.assertEqual(item["properties"]["date"]["type"], "string")
330
+
331
+ def test_convert_tools_keeps_property_named_format(self):
332
+ """A tool parameter literally named "format" (e.g. an output-format
333
+ selector) must NOT be stripped — only the format *keyword* is."""
334
+ anthropic_tools = [
335
+ {
336
+ "name": "ExportTool",
337
+ "description": "test",
338
+ "input_schema": {
339
+ "type": "object",
340
+ "required": ["format"],
341
+ "properties": {
342
+ "format": {
343
+ "type": "string",
344
+ "enum": ["json", "csv", "yaml"],
345
+ "description": "Output format",
346
+ },
347
+ },
348
+ },
349
+ }
350
+ ]
351
+
352
+ converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
353
+ params = converted[0]["function"]["parameters"]
354
+ self.assertIn("format", params["required"])
355
+ self.assertEqual(params["properties"]["format"]["type"], "string")
356
+ self.assertEqual(
357
+ params["properties"]["format"]["enum"], ["json", "csv", "yaml"]
358
+ )
359
+
287
360
 
288
361
  class TestStreamGuardedPathSelection(unittest.TestCase):
289
362
  def test_required_tool_turn_uses_guarded_non_stream(self):
@@ -5139,14 +5212,14 @@ class TestThinkingBlockExtraction(unittest.TestCase):
5139
5212
 
5140
5213
 
5141
5214
  class _SlotFakeClient:
5142
- """Records POST calls for slot save/restore tests."""
5215
+ """Records POST calls (incl. the timeout kwarg) for slot tests."""
5143
5216
 
5144
5217
  def __init__(self, status_code=200):
5145
5218
  self.calls = []
5146
5219
  self._status = status_code
5147
5220
 
5148
5221
  async def post(self, url, json=None, timeout=None): # noqa: A002
5149
- self.calls.append({"url": url, "json": json})
5222
+ self.calls.append({"url": url, "json": json, "timeout": timeout})
5150
5223
  return _FakeResponse({}, status_code=self._status)
5151
5224
 
5152
5225
 
@@ -5167,6 +5240,8 @@ class TestSlotSaveRestore(unittest.TestCase):
5167
5240
  "PROXY_SLOT_SAVE_RESTORE",
5168
5241
  "PROXY_SLOT_CACHE_MAX_FILES",
5169
5242
  "PROXY_SLOT_ID",
5243
+ "PROXY_SLOT_SAVE_TIMEOUT",
5244
+ "PROXY_SLOT_RESTORE_TIMEOUT",
5170
5245
  "_slot_owner_session",
5171
5246
  )
5172
5247
  }
@@ -5253,6 +5328,29 @@ class TestSlotSaveRestore(unittest.TestCase):
5253
5328
  self.assertIn("fp:aaaa", proxy._slot_lru)
5254
5329
  self.assertIn("fp:bbbb", proxy._slot_lru)
5255
5330
 
5331
+ def test_slot_timeout_defaults_are_sane(self):
5332
+ """Slot save/restore HTTP timeouts must be configurable and large
5333
+ enough for a slow model's ~1 GiB KV serialization. Restore gets more
5334
+ headroom than save (it also waits on disk read + KV reload)."""
5335
+ self.assertIsInstance(proxy.PROXY_SLOT_SAVE_TIMEOUT, float)
5336
+ self.assertIsInstance(proxy.PROXY_SLOT_RESTORE_TIMEOUT, float)
5337
+ # Both above the original hardcoded 60s/120s that were too tight
5338
+ # for the 35B-A3B (surfaced as empty-message SLOT SAVE/RESTORE errors).
5339
+ self.assertGreaterEqual(proxy.PROXY_SLOT_SAVE_TIMEOUT, 120.0)
5340
+ self.assertGreaterEqual(proxy.PROXY_SLOT_RESTORE_TIMEOUT, 180.0)
5341
+ self.assertGreaterEqual(
5342
+ proxy.PROXY_SLOT_RESTORE_TIMEOUT, proxy.PROXY_SLOT_SAVE_TIMEOUT
5343
+ )
5344
+
5345
+ def test_save_slot_passes_configured_timeout(self):
5346
+ """_save_slot must hand its httpx POST the configured
5347
+ PROXY_SLOT_SAVE_TIMEOUT, not a hardcoded value."""
5348
+ proxy.PROXY_SLOT_SAVE_TIMEOUT = 222.0
5349
+ client = _SlotFakeClient(status_code=200)
5350
+ asyncio.run(proxy._save_slot(client, "fp:timeoutcheck"))
5351
+ self.assertEqual(len(client.calls), 1)
5352
+ self.assertEqual(client.calls[0]["timeout"], 222.0)
5353
+
5256
5354
  def test_evict_slot_files_respects_lru_cap_and_owner(self):
5257
5355
  """LRU eviction removes oldest entries beyond the cap but never the
5258
5356
  session currently owning the slot."""