@miller-tech/uap 1.15.6 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.15.6",
3
+ "version": "1.15.7",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -254,6 +254,28 @@ PROXY_ANALYSIS_ONLY_MIN_TOOLS = int(
254
254
  PROXY_ANALYSIS_ONLY_MAX_MESSAGES = int(
255
255
  os.environ.get("PROXY_ANALYSIS_ONLY_MAX_MESSAGES", "2")
256
256
  )
257
+ PROXY_TOOL_CALL_GRAMMAR = os.environ.get(
258
+ "PROXY_TOOL_CALL_GRAMMAR", "on"
259
+ ).lower() not in {
260
+ "0",
261
+ "false",
262
+ "off",
263
+ "no",
264
+ }
265
+ PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY = os.environ.get(
266
+ "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", "on"
267
+ ).lower() not in {
268
+ "0",
269
+ "false",
270
+ "off",
271
+ "no",
272
+ }
273
+ PROXY_TOOL_CALL_GRAMMAR_PATH = os.path.abspath(
274
+ os.environ.get(
275
+ "PROXY_TOOL_CALL_GRAMMAR_PATH",
276
+ os.path.join(os.path.dirname(__file__), "..", "config", "tool-call.gbnf"),
277
+ )
278
+ )
257
279
 
258
280
  # ---------------------------------------------------------------------------
259
281
  # Logging
@@ -266,6 +288,45 @@ logging.basicConfig(
266
288
  logger = logging.getLogger("uap.anthropic_proxy")
267
289
 
268
290
 
291
+ def _load_tool_call_grammar(path: str) -> str:
292
+ if not PROXY_TOOL_CALL_GRAMMAR:
293
+ return ""
294
+
295
+ try:
296
+ with open(path, "r", encoding="utf-8") as fh:
297
+ return fh.read().strip()
298
+ except OSError as exc:
299
+ logger.warning(
300
+ "Tool-call grammar disabled: failed to read %s (%s)",
301
+ path,
302
+ exc,
303
+ )
304
+ return ""
305
+
306
+
307
+ TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
308
+
309
+
310
+ def _apply_tool_call_grammar(
311
+ request_body: dict, tool_choice: str | None = None
312
+ ) -> None:
313
+ request_body.pop("grammar", None)
314
+
315
+ if not PROXY_TOOL_CALL_GRAMMAR or not TOOL_CALL_GBNF:
316
+ return
317
+
318
+ if not request_body.get("tools"):
319
+ return
320
+
321
+ effective_tool_choice = (
322
+ tool_choice if tool_choice is not None else request_body.get("tool_choice")
323
+ )
324
+ if PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY and effective_tool_choice != "required":
325
+ return
326
+
327
+ request_body["grammar"] = TOOL_CALL_GBNF
328
+
329
+
269
330
  # ---------------------------------------------------------------------------
270
331
  # Option F: Session-level Context Window Monitor
271
332
  # ---------------------------------------------------------------------------
@@ -876,7 +937,7 @@ async def lifespan(app: FastAPI):
876
937
  _resolve_prune_target_fraction() * 100,
877
938
  )
878
939
  logger.info(
879
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
940
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
880
941
  PROXY_MALFORMED_TOOL_GUARDRAIL,
881
942
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
882
943
  PROXY_FORCE_NON_STREAM,
@@ -896,6 +957,10 @@ async def lifespan(app: FastAPI):
896
957
  PROXY_ANALYSIS_ONLY_ROUTE,
897
958
  PROXY_ANALYSIS_ONLY_MIN_TOOLS,
898
959
  PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
960
+ PROXY_TOOL_CALL_GRAMMAR,
961
+ PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
962
+ bool(TOOL_CALL_GBNF),
963
+ PROXY_TOOL_CALL_GRAMMAR_PATH,
899
964
  )
900
965
 
901
966
  yield
@@ -1445,6 +1510,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1445
1510
  "Thinking disabled for tool turn (PROXY_DISABLE_THINKING_ON_TOOL_TURNS=on)"
1446
1511
  )
1447
1512
 
1513
+ _apply_tool_call_grammar(openai_body)
1514
+
1448
1515
  return openai_body
1449
1516
 
1450
1517
 
@@ -2485,6 +2552,8 @@ def _build_malformed_retry_body(
2485
2552
  if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
2486
2553
  retry_body["enable_thinking"] = False
2487
2554
 
2555
+ _apply_tool_call_grammar(retry_body, tool_choice=tool_choice)
2556
+
2488
2557
  if retry_hint:
2489
2558
  repair_prompt = (
2490
2559
  f"[TOOL CALL REPAIR attempt {attempt}/{total_attempts}]\n"
@@ -2571,6 +2640,7 @@ async def _apply_unexpected_end_turn_guardrail(
2571
2640
  retry_body = dict(openai_body)
2572
2641
  retry_body["tool_choice"] = "required"
2573
2642
  retry_body["stream"] = False
2643
+ _apply_tool_call_grammar(retry_body, tool_choice="required")
2574
2644
 
2575
2645
  retry_resp = await client.post(
2576
2646
  f"{LLAMA_CPP_BASE}/chat/completions",
@@ -3737,6 +3807,12 @@ async def context_status(request: Request):
3737
3807
  "overflow_count": monitor.overflow_count,
3738
3808
  "prune_threshold": PROXY_CONTEXT_PRUNE_THRESHOLD,
3739
3809
  "recent_history": monitor.context_history[-10:],
3810
+ "tool_call_grammar": {
3811
+ "enabled": PROXY_TOOL_CALL_GRAMMAR,
3812
+ "required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
3813
+ "path": PROXY_TOOL_CALL_GRAMMAR_PATH,
3814
+ "loaded": bool(TOOL_CALL_GBNF),
3815
+ },
3740
3816
  # Loop protection stats
3741
3817
  "loop_protection": {
3742
3818
  "enabled": PROXY_LOOP_BREAKER,
@@ -514,6 +514,41 @@ class TestMalformedToolGuardrail(unittest.TestCase):
514
514
  self.assertEqual(proxy._retry_tool_choice_for_attempt(True, 2, 3), "auto")
515
515
  self.assertEqual(proxy._retry_tool_choice_for_attempt(False, 0, 3), "auto")
516
516
 
517
+ def test_malformed_retry_body_applies_grammar_only_for_required_tool_choice(self):
518
+ old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
519
+ old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
520
+ old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
521
+ try:
522
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
523
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
524
+ setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
525
+
526
+ openai_body = {
527
+ "model": "test",
528
+ "messages": [{"role": "user", "content": "fix"}],
529
+ }
530
+ anthropic_body = {
531
+ "tools": [{"name": "Read", "input_schema": {"type": "object"}}]
532
+ }
533
+
534
+ required_retry = proxy._build_malformed_retry_body(
535
+ openai_body,
536
+ anthropic_body,
537
+ tool_choice="required",
538
+ )
539
+ auto_retry = proxy._build_malformed_retry_body(
540
+ openai_body,
541
+ anthropic_body,
542
+ tool_choice="auto",
543
+ )
544
+
545
+ self.assertEqual(required_retry.get("grammar"), 'root ::= "<tool_call>"')
546
+ self.assertNotIn("grammar", auto_retry)
547
+ finally:
548
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
549
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
550
+ setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
551
+
517
552
  def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
518
553
  guardrail = proxy._build_clean_guardrail_openai_response(
519
554
  {"model": "test-model"}
@@ -1269,6 +1304,81 @@ class TestToolTurnControls(unittest.TestCase):
1269
1304
  setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", old_rejections)
1270
1305
  setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", old_auto_turns)
1271
1306
 
1307
+ def test_build_request_applies_grammar_when_tool_choice_required(self):
1308
+ old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
1309
+ old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
1310
+ old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
1311
+ try:
1312
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
1313
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
1314
+ setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
1315
+
1316
+ body = {
1317
+ "model": "test",
1318
+ "messages": [
1319
+ {
1320
+ "role": "assistant",
1321
+ "content": [{"type": "text", "text": "I will continue."}],
1322
+ },
1323
+ {"role": "user", "content": "continue"},
1324
+ ],
1325
+ "tools": [
1326
+ {
1327
+ "name": "Read",
1328
+ "description": "Read file",
1329
+ "input_schema": {"type": "object"},
1330
+ }
1331
+ ],
1332
+ }
1333
+
1334
+ openai = proxy.build_openai_request(
1335
+ body, proxy.SessionMonitor(context_window=262144)
1336
+ )
1337
+ self.assertEqual(openai.get("tool_choice"), "required")
1338
+ self.assertEqual(openai.get("grammar"), 'root ::= "<tool_call>"')
1339
+ finally:
1340
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
1341
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
1342
+ setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
1343
+
1344
+ def test_build_request_omits_grammar_when_tool_choice_released_to_auto(self):
1345
+ old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
1346
+ old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
1347
+ old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
1348
+ try:
1349
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
1350
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
1351
+ setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
1352
+
1353
+ monitor = proxy.SessionMonitor(context_window=262144)
1354
+ monitor.forced_auto_cooldown_turns = 1
1355
+
1356
+ body = {
1357
+ "model": "test",
1358
+ "messages": [
1359
+ {
1360
+ "role": "assistant",
1361
+ "content": [{"type": "text", "text": "I will continue."}],
1362
+ },
1363
+ {"role": "user", "content": "continue"},
1364
+ ],
1365
+ "tools": [
1366
+ {
1367
+ "name": "Read",
1368
+ "description": "Read file",
1369
+ "input_schema": {"type": "object"},
1370
+ }
1371
+ ],
1372
+ }
1373
+
1374
+ openai = proxy.build_openai_request(body, monitor)
1375
+ self.assertEqual(openai.get("tool_choice"), "auto")
1376
+ self.assertNotIn("grammar", openai)
1377
+ finally:
1378
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
1379
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
1380
+ setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
1381
+
1272
1382
  def test_no_tools_does_not_inject_agentic_system_message(self):
1273
1383
  body = {
1274
1384
  "model": "test",