@miller-tech/uap 1.20.44 → 1.20.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1523,6 +1523,14 @@ PROXY_SLOT_SAVE_DIR = os.environ.get(
|
|
|
1523
1523
|
PROXY_SLOT_CACHE_MAX_FILES = int(os.environ.get("PROXY_SLOT_CACHE_MAX_FILES", "12"))
|
|
1524
1524
|
# llama-server slot id — always 0 under --parallel 1.
|
|
1525
1525
|
PROXY_SLOT_ID = int(os.environ.get("PROXY_SLOT_ID", "0"))
|
|
1526
|
+
# HTTP timeouts for the /slots save|restore calls. A large session's KV
|
|
1527
|
+
# state (131k ctx) is ~1 GiB; serializing it to / loading it from disk on
|
|
1528
|
+
# a slower model (e.g. Qwen3.6-35B-A3B MoE) can exceed the original
|
|
1529
|
+
# hardcoded 60s/120s, surfacing as `SLOT SAVE/RESTORE error` with an empty
|
|
1530
|
+
# httpx-timeout exception. Restore is given more headroom than save since
|
|
1531
|
+
# it also waits on the disk read + KV reload.
|
|
1532
|
+
PROXY_SLOT_SAVE_TIMEOUT = float(os.environ.get("PROXY_SLOT_SAVE_TIMEOUT", "180"))
|
|
1533
|
+
PROXY_SLOT_RESTORE_TIMEOUT = float(os.environ.get("PROXY_SLOT_RESTORE_TIMEOUT", "300"))
|
|
1526
1534
|
|
|
1527
1535
|
# Module state. Mutated only inside the upstream_semaphore-held section
|
|
1528
1536
|
# (_post_with_retry), so no extra lock is needed.
|
|
@@ -1556,7 +1564,9 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
|
|
|
1556
1564
|
fn = _slot_filename(session_id)
|
|
1557
1565
|
url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=save"
|
|
1558
1566
|
try:
|
|
1559
|
-
resp = await client.post(
|
|
1567
|
+
resp = await client.post(
|
|
1568
|
+
url, json={"filename": fn}, timeout=PROXY_SLOT_SAVE_TIMEOUT
|
|
1569
|
+
)
|
|
1560
1570
|
if resp.status_code == 200:
|
|
1561
1571
|
logger.info("SLOT SAVE: session=%s -> %s", session_id, fn)
|
|
1562
1572
|
return True
|
|
@@ -1565,7 +1575,12 @@ async def _save_slot(client: httpx.AsyncClient, session_id: str) -> bool:
|
|
|
1565
1575
|
session_id, resp.status_code, resp.text[:200],
|
|
1566
1576
|
)
|
|
1567
1577
|
except Exception as exc:
|
|
1568
|
-
|
|
1578
|
+
# Include the exception TYPE — httpx timeout exceptions stringify
|
|
1579
|
+
# to "" and an empty message log line is undiagnosable.
|
|
1580
|
+
logger.warning(
|
|
1581
|
+
"SLOT SAVE error: session=%s %s: %s",
|
|
1582
|
+
session_id, type(exc).__name__, exc,
|
|
1583
|
+
)
|
|
1569
1584
|
return False
|
|
1570
1585
|
|
|
1571
1586
|
|
|
@@ -1581,7 +1596,9 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
|
|
|
1581
1596
|
return False
|
|
1582
1597
|
url = f"{_slot_endpoint_base()}/slots/{PROXY_SLOT_ID}?action=restore"
|
|
1583
1598
|
try:
|
|
1584
|
-
resp = await client.post(
|
|
1599
|
+
resp = await client.post(
|
|
1600
|
+
url, json={"filename": fn}, timeout=PROXY_SLOT_RESTORE_TIMEOUT
|
|
1601
|
+
)
|
|
1585
1602
|
if resp.status_code == 200:
|
|
1586
1603
|
logger.info("SLOT RESTORE: session=%s <- %s", session_id, fn)
|
|
1587
1604
|
return True
|
|
@@ -1590,7 +1607,12 @@ async def _restore_slot(client: httpx.AsyncClient, session_id: str) -> bool:
|
|
|
1590
1607
|
session_id, resp.status_code, resp.text[:200],
|
|
1591
1608
|
)
|
|
1592
1609
|
except Exception as exc:
|
|
1593
|
-
|
|
1610
|
+
# Include the exception TYPE — httpx timeout exceptions stringify
|
|
1611
|
+
# to "" and an empty message log line is undiagnosable.
|
|
1612
|
+
logger.warning(
|
|
1613
|
+
"SLOT RESTORE error: session=%s %s: %s",
|
|
1614
|
+
session_id, type(exc).__name__, exc,
|
|
1615
|
+
)
|
|
1594
1616
|
return False
|
|
1595
1617
|
|
|
1596
1618
|
|
|
@@ -2472,8 +2494,21 @@ def _completion_blockers(
|
|
|
2472
2494
|
def _sanitize_tool_schema_for_llama(schema):
|
|
2473
2495
|
"""Remove JSON Schema keywords that generate unsupported regex grammar.
|
|
2474
2496
|
|
|
2475
|
-
llama.cpp's tool grammar generator can fail on regex-heavy schema fields
|
|
2476
|
-
|
|
2497
|
+
llama.cpp's tool grammar generator can fail on regex-heavy schema fields:
|
|
2498
|
+
|
|
2499
|
+
- "pattern" / "patternProperties" — regex strings (e.g. "\\w").
|
|
2500
|
+
- "format" — string formats. llama.cpp's json-schema-to-grammar turns
|
|
2501
|
+
"format": "date" / "date-time" / "time" / "uuid" into grammar rules
|
|
2502
|
+
built from `\\d`, which its own GBNF parser then rejects with
|
|
2503
|
+
`error parsing grammar: unknown escape at \\d...` → `failed to parse
|
|
2504
|
+
grammar`. Observed on MCP tools with date fields (Atlassian
|
|
2505
|
+
getJiraIssue, tempo bulkCreateWorklogs). "format" is an advisory
|
|
2506
|
+
annotation — dropping it just leaves the field as an unconstrained
|
|
2507
|
+
string in the tool-call grammar, which is correct behaviour.
|
|
2508
|
+
|
|
2509
|
+
All three are stripped only when they appear as schema *keywords*, not
|
|
2510
|
+
when they are property *names* (a tool may legitimately have a parameter
|
|
2511
|
+
literally called "pattern" or "format").
|
|
2477
2512
|
"""
|
|
2478
2513
|
|
|
2479
2514
|
removed = 0
|
|
@@ -2486,7 +2521,7 @@ def _sanitize_tool_schema_for_llama(schema):
|
|
|
2486
2521
|
for key, value in node.items():
|
|
2487
2522
|
key_is_property_name = parent_key in property_map_keys
|
|
2488
2523
|
if (
|
|
2489
|
-
key
|
|
2524
|
+
key in ("pattern", "format")
|
|
2490
2525
|
and isinstance(value, str)
|
|
2491
2526
|
and not key_is_property_name
|
|
2492
2527
|
):
|
|
@@ -284,6 +284,79 @@ class TestToolSchemaSanitization(unittest.TestCase):
|
|
|
284
284
|
self.assertIn("pattern", params["required"])
|
|
285
285
|
self.assertEqual(params["properties"]["pattern"]["type"], "string")
|
|
286
286
|
|
|
287
|
+
def test_convert_tools_strips_format_fields(self):
|
|
288
|
+
"""A string field with "format": "date" must have format stripped.
|
|
289
|
+
llama.cpp's json-schema-to-grammar turns format:date/date-time/etc.
|
|
290
|
+
into `\\d`-based grammar rules that its own GBNF parser then rejects
|
|
291
|
+
('unknown escape at \\d' -> 'failed to parse grammar'). Observed on
|
|
292
|
+
MCP tools like tempo bulkCreateWorklogs (a worklogEntries[].date
|
|
293
|
+
field) and Atlassian getJiraIssue."""
|
|
294
|
+
anthropic_tools = [
|
|
295
|
+
{
|
|
296
|
+
"name": "bulkCreateWorklogs",
|
|
297
|
+
"description": "test",
|
|
298
|
+
"input_schema": {
|
|
299
|
+
"type": "object",
|
|
300
|
+
"properties": {
|
|
301
|
+
"worklogEntries": {
|
|
302
|
+
"type": "array",
|
|
303
|
+
"items": {
|
|
304
|
+
"type": "object",
|
|
305
|
+
"properties": {
|
|
306
|
+
"date": {
|
|
307
|
+
"type": "string",
|
|
308
|
+
"format": "date",
|
|
309
|
+
},
|
|
310
|
+
"started": {
|
|
311
|
+
"type": "string",
|
|
312
|
+
"format": "date-time",
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
},
|
|
316
|
+
}
|
|
317
|
+
},
|
|
318
|
+
},
|
|
319
|
+
}
|
|
320
|
+
]
|
|
321
|
+
|
|
322
|
+
converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
|
|
323
|
+
item = converted[0]["function"]["parameters"]["properties"][
|
|
324
|
+
"worklogEntries"
|
|
325
|
+
]["items"]
|
|
326
|
+
self.assertNotIn("format", item["properties"]["date"])
|
|
327
|
+
self.assertNotIn("format", item["properties"]["started"])
|
|
328
|
+
# The field itself and its type survive — only the format hint goes.
|
|
329
|
+
self.assertEqual(item["properties"]["date"]["type"], "string")
|
|
330
|
+
|
|
331
|
+
def test_convert_tools_keeps_property_named_format(self):
|
|
332
|
+
"""A tool parameter literally named "format" (e.g. an output-format
|
|
333
|
+
selector) must NOT be stripped — only the format *keyword* is."""
|
|
334
|
+
anthropic_tools = [
|
|
335
|
+
{
|
|
336
|
+
"name": "ExportTool",
|
|
337
|
+
"description": "test",
|
|
338
|
+
"input_schema": {
|
|
339
|
+
"type": "object",
|
|
340
|
+
"required": ["format"],
|
|
341
|
+
"properties": {
|
|
342
|
+
"format": {
|
|
343
|
+
"type": "string",
|
|
344
|
+
"enum": ["json", "csv", "yaml"],
|
|
345
|
+
"description": "Output format",
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
},
|
|
349
|
+
}
|
|
350
|
+
]
|
|
351
|
+
|
|
352
|
+
converted = proxy._convert_anthropic_tools_to_openai(anthropic_tools)
|
|
353
|
+
params = converted[0]["function"]["parameters"]
|
|
354
|
+
self.assertIn("format", params["required"])
|
|
355
|
+
self.assertEqual(params["properties"]["format"]["type"], "string")
|
|
356
|
+
self.assertEqual(
|
|
357
|
+
params["properties"]["format"]["enum"], ["json", "csv", "yaml"]
|
|
358
|
+
)
|
|
359
|
+
|
|
287
360
|
|
|
288
361
|
class TestStreamGuardedPathSelection(unittest.TestCase):
|
|
289
362
|
def test_required_tool_turn_uses_guarded_non_stream(self):
|
|
@@ -5139,14 +5212,14 @@ class TestThinkingBlockExtraction(unittest.TestCase):
|
|
|
5139
5212
|
|
|
5140
5213
|
|
|
5141
5214
|
class _SlotFakeClient:
|
|
5142
|
-
"""Records POST calls for slot
|
|
5215
|
+
"""Records POST calls (incl. the timeout kwarg) for slot tests."""
|
|
5143
5216
|
|
|
5144
5217
|
def __init__(self, status_code=200):
|
|
5145
5218
|
self.calls = []
|
|
5146
5219
|
self._status = status_code
|
|
5147
5220
|
|
|
5148
5221
|
async def post(self, url, json=None, timeout=None): # noqa: A002
|
|
5149
|
-
self.calls.append({"url": url, "json": json})
|
|
5222
|
+
self.calls.append({"url": url, "json": json, "timeout": timeout})
|
|
5150
5223
|
return _FakeResponse({}, status_code=self._status)
|
|
5151
5224
|
|
|
5152
5225
|
|
|
@@ -5167,6 +5240,8 @@ class TestSlotSaveRestore(unittest.TestCase):
|
|
|
5167
5240
|
"PROXY_SLOT_SAVE_RESTORE",
|
|
5168
5241
|
"PROXY_SLOT_CACHE_MAX_FILES",
|
|
5169
5242
|
"PROXY_SLOT_ID",
|
|
5243
|
+
"PROXY_SLOT_SAVE_TIMEOUT",
|
|
5244
|
+
"PROXY_SLOT_RESTORE_TIMEOUT",
|
|
5170
5245
|
"_slot_owner_session",
|
|
5171
5246
|
)
|
|
5172
5247
|
}
|
|
@@ -5253,6 +5328,29 @@ class TestSlotSaveRestore(unittest.TestCase):
|
|
|
5253
5328
|
self.assertIn("fp:aaaa", proxy._slot_lru)
|
|
5254
5329
|
self.assertIn("fp:bbbb", proxy._slot_lru)
|
|
5255
5330
|
|
|
5331
|
+
def test_slot_timeout_defaults_are_sane(self):
|
|
5332
|
+
"""Slot save/restore HTTP timeouts must be configurable and large
|
|
5333
|
+
enough for a slow model's ~1 GiB KV serialization. Restore gets more
|
|
5334
|
+
headroom than save (it also waits on disk read + KV reload)."""
|
|
5335
|
+
self.assertIsInstance(proxy.PROXY_SLOT_SAVE_TIMEOUT, float)
|
|
5336
|
+
self.assertIsInstance(proxy.PROXY_SLOT_RESTORE_TIMEOUT, float)
|
|
5337
|
+
# Both above the original hardcoded 60s/120s that were too tight
|
|
5338
|
+
# for the 35B-A3B (surfaced as empty-message SLOT SAVE/RESTORE errors).
|
|
5339
|
+
self.assertGreaterEqual(proxy.PROXY_SLOT_SAVE_TIMEOUT, 120.0)
|
|
5340
|
+
self.assertGreaterEqual(proxy.PROXY_SLOT_RESTORE_TIMEOUT, 180.0)
|
|
5341
|
+
self.assertGreaterEqual(
|
|
5342
|
+
proxy.PROXY_SLOT_RESTORE_TIMEOUT, proxy.PROXY_SLOT_SAVE_TIMEOUT
|
|
5343
|
+
)
|
|
5344
|
+
|
|
5345
|
+
def test_save_slot_passes_configured_timeout(self):
|
|
5346
|
+
"""_save_slot must hand its httpx POST the configured
|
|
5347
|
+
PROXY_SLOT_SAVE_TIMEOUT, not a hardcoded value."""
|
|
5348
|
+
proxy.PROXY_SLOT_SAVE_TIMEOUT = 222.0
|
|
5349
|
+
client = _SlotFakeClient(status_code=200)
|
|
5350
|
+
asyncio.run(proxy._save_slot(client, "fp:timeoutcheck"))
|
|
5351
|
+
self.assertEqual(len(client.calls), 1)
|
|
5352
|
+
self.assertEqual(client.calls[0]["timeout"], 222.0)
|
|
5353
|
+
|
|
5256
5354
|
def test_evict_slot_files_respects_lru_cap_and_owner(self):
|
|
5257
5355
|
"""LRU eviction removes oldest entries beyond the cap but never the
|
|
5258
5356
|
session currently owning the slot."""
|