@miller-tech/uap 1.14.1 → 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.14.1",
3
+ "version": "1.15.1",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -162,7 +162,7 @@ PROXY_MALFORMED_TOOL_GUARDRAIL = os.environ.get(
162
162
  "no",
163
163
  }
164
164
  PROXY_MALFORMED_TOOL_RETRY_MAX = int(
165
- os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "1")
165
+ os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "2")
166
166
  )
167
167
  PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS = int(
168
168
  os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", "2048")
@@ -203,6 +203,20 @@ PROXY_SESSION_CONTAMINATION_KEEP_LAST = int(
203
203
  PROXY_AGENTIC_SUPPLEMENT_MODE = (
204
204
  os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
205
205
  )
206
+ PROXY_ANALYSIS_ONLY_ROUTE = os.environ.get(
207
+ "PROXY_ANALYSIS_ONLY_ROUTE", "off"
208
+ ).lower() not in {
209
+ "0",
210
+ "false",
211
+ "off",
212
+ "no",
213
+ }
214
+ PROXY_ANALYSIS_ONLY_MIN_TOOLS = int(
215
+ os.environ.get("PROXY_ANALYSIS_ONLY_MIN_TOOLS", "12")
216
+ )
217
+ PROXY_ANALYSIS_ONLY_MAX_MESSAGES = int(
218
+ os.environ.get("PROXY_ANALYSIS_ONLY_MAX_MESSAGES", "2")
219
+ )
206
220
 
207
221
  # ---------------------------------------------------------------------------
208
222
  # Logging
@@ -549,8 +563,9 @@ def estimate_total_tokens(anthropic_body: dict) -> int:
549
563
  if isinstance(block, dict) and block.get("type") == "text":
550
564
  tokens += estimate_tokens(block.get("text", ""))
551
565
 
552
- # Agentic supplement tokens (always injected)
553
- tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
566
+ # Agentic supplement tokens (only when tool mode is active)
567
+ if _has_tool_definitions(anthropic_body):
568
+ tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
554
569
 
555
570
  # Messages
556
571
  for msg in anthropic_body.get("messages", []):
@@ -600,7 +615,8 @@ def prune_conversation(
600
615
  for block in system:
601
616
  if isinstance(block, dict) and block.get("type") == "text":
602
617
  overhead_tokens += estimate_tokens(block.get("text", ""))
603
- overhead_tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
618
+ if _has_tool_definitions(anthropic_body):
619
+ overhead_tokens += estimate_tokens(_AGENTIC_SYSTEM_SUPPLEMENT)
604
620
  tools = anthropic_body.get("tools", [])
605
621
  if tools:
606
622
  overhead_tokens += estimate_tokens(json.dumps(tools))
@@ -768,7 +784,7 @@ async def lifespan(app: FastAPI):
768
784
  _resolve_prune_target_fraction() * 100,
769
785
  )
770
786
  logger.info(
771
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d)",
787
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
772
788
  PROXY_MALFORMED_TOOL_GUARDRAIL,
773
789
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
774
790
  PROXY_FORCE_NON_STREAM,
@@ -776,6 +792,9 @@ async def lifespan(app: FastAPI):
776
792
  PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
777
793
  PROXY_SESSION_CONTAMINATION_BREAKER,
778
794
  PROXY_SESSION_CONTAMINATION_THRESHOLD,
795
+ PROXY_ANALYSIS_ONLY_ROUTE,
796
+ PROXY_ANALYSIS_ONLY_MIN_TOOLS,
797
+ PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
779
798
  )
780
799
 
781
800
  yield
@@ -879,6 +898,112 @@ def _extract_text(content) -> str:
879
898
  return str(content)
880
899
 
881
900
 
901
+ def _has_tool_definitions(anthropic_body: dict) -> bool:
902
+ tools = anthropic_body.get("tools")
903
+ return isinstance(tools, list) and len(tools) > 0
904
+
905
+
906
+ def _message_has_tool_result(content) -> bool:
907
+ return isinstance(content, list) and any(
908
+ isinstance(block, dict) and block.get("type") == "tool_result"
909
+ for block in content
910
+ )
911
+
912
+
913
+ def _last_user_text(anthropic_body: dict) -> str:
914
+ for msg in reversed(anthropic_body.get("messages", [])):
915
+ if msg.get("role") == "user":
916
+ return _extract_text(msg.get("content", "")).strip().lower()
917
+ return ""
918
+
919
+
920
+ def _is_analysis_only_prompt(text: str) -> bool:
921
+ if not text:
922
+ return False
923
+
924
+ analysis_markers = (
925
+ "analy",
926
+ "review",
927
+ "audit",
928
+ "summar",
929
+ "explain",
930
+ "plan",
931
+ "recommend",
932
+ "assess",
933
+ "compare",
934
+ "investigate",
935
+ "diagnose",
936
+ )
937
+ action_markers = (
938
+ "fix",
939
+ "edit",
940
+ "write",
941
+ "create",
942
+ "implement",
943
+ "patch",
944
+ "change",
945
+ "update",
946
+ "run ",
947
+ "execute",
948
+ "command",
949
+ "use tool",
950
+ "call tool",
951
+ "apply",
952
+ "commit",
953
+ "push",
954
+ "merge",
955
+ "publish",
956
+ "deploy",
957
+ "test",
958
+ "build",
959
+ "refactor",
960
+ "rename",
961
+ "delete",
962
+ "install",
963
+ )
964
+
965
+ has_analysis = any(marker in text for marker in analysis_markers)
966
+ has_action = any(marker in text for marker in action_markers)
967
+ return has_analysis and not has_action
968
+
969
+
970
+ def _should_route_analysis_without_tools(anthropic_body: dict) -> bool:
971
+ if not PROXY_ANALYSIS_ONLY_ROUTE:
972
+ return False
973
+
974
+ tools = anthropic_body.get("tools")
975
+ if not isinstance(tools, list) or len(tools) < max(
976
+ 1, PROXY_ANALYSIS_ONLY_MIN_TOOLS
977
+ ):
978
+ return False
979
+
980
+ messages = anthropic_body.get("messages", [])
981
+ if not isinstance(messages, list) or not messages:
982
+ return False
983
+
984
+ if len(messages) > max(1, PROXY_ANALYSIS_ONLY_MAX_MESSAGES):
985
+ return False
986
+
987
+ if any(msg.get("role") == "assistant" for msg in messages):
988
+ return False
989
+
990
+ if any(_message_has_tool_result(msg.get("content")) for msg in messages):
991
+ return False
992
+
993
+ return _is_analysis_only_prompt(_last_user_text(anthropic_body))
994
+
995
+
996
+ def _maybe_route_analysis_without_tools(anthropic_body: dict) -> tuple[dict, int]:
997
+ if not _should_route_analysis_without_tools(anthropic_body):
998
+ return anthropic_body, 0
999
+
1000
+ tools = anthropic_body.get("tools")
1001
+ removed = len(tools) if isinstance(tools, list) else 0
1002
+ updated = dict(anthropic_body)
1003
+ updated.pop("tools", None)
1004
+ return updated, removed
1005
+
1006
+
882
1007
  _AGENTIC_SYSTEM_SUPPLEMENT_LEGACY = (
883
1008
  "\n\n<agentic-protocol>\n"
884
1009
  "You are operating in an agentic coding loop with tool access. Follow these rules:\n"
@@ -1076,19 +1201,24 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1076
1201
  "stream": anthropic_body.get("stream", False),
1077
1202
  }
1078
1203
 
1079
- # Inject agentic protocol instructions into the system message so
1080
- # the model knows it must use tools to complete work, not just explain.
1081
- if openai_body["messages"] and openai_body["messages"][0].get("role") == "system":
1082
- openai_body["messages"][0]["content"] += _AGENTIC_SYSTEM_SUPPLEMENT
1083
- else:
1084
- # No system message from the client; inject one.
1085
- openai_body["messages"].insert(
1086
- 0,
1087
- {
1088
- "role": "system",
1089
- "content": _AGENTIC_SYSTEM_SUPPLEMENT.strip(),
1090
- },
1091
- )
1204
+ has_tools = _has_tool_definitions(anthropic_body)
1205
+
1206
+ # Inject agentic protocol instructions only for tool-enabled turns.
1207
+ if has_tools:
1208
+ if (
1209
+ openai_body["messages"]
1210
+ and openai_body["messages"][0].get("role") == "system"
1211
+ ):
1212
+ openai_body["messages"][0]["content"] += _AGENTIC_SYSTEM_SUPPLEMENT
1213
+ else:
1214
+ # No system message from the client; inject one.
1215
+ openai_body["messages"].insert(
1216
+ 0,
1217
+ {
1218
+ "role": "system",
1219
+ "content": _AGENTIC_SYSTEM_SUPPLEMENT.strip(),
1220
+ },
1221
+ )
1092
1222
 
1093
1223
  if "max_tokens" in anthropic_body:
1094
1224
  # Enforce configurable minimum floor for thinking mode: model needs
@@ -1137,7 +1267,7 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1137
1267
  openai_body["stop"] = anthropic_body["stop_sequences"]
1138
1268
 
1139
1269
  # Convert Anthropic tools to OpenAI function-calling tools
1140
- if "tools" in anthropic_body:
1270
+ if has_tools:
1141
1271
  openai_body["tools"] = _convert_anthropic_tools_to_openai(
1142
1272
  anthropic_body.get("tools", [])
1143
1273
  )
@@ -1517,6 +1647,13 @@ def _looks_malformed_tool_payload(text: str) -> bool:
1517
1647
  return False
1518
1648
 
1519
1649
  lowered = text.lower()
1650
+ apology_markers = (
1651
+ "i could not produce a valid tool-call format in this turn",
1652
+ "i will issue exactly one valid tool call next",
1653
+ )
1654
+ if any(marker in lowered for marker in apology_markers):
1655
+ return True
1656
+
1520
1657
  primary_markers = ("</parameter", "<parameter", "<tool_call", "<function=")
1521
1658
  if any(marker in lowered for marker in primary_markers):
1522
1659
  return True
@@ -1575,6 +1712,18 @@ def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict
1575
1712
  retry_body["tool_choice"] = "required"
1576
1713
  retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
1577
1714
 
1715
+ malformed_retry_instruction = {
1716
+ "role": "user",
1717
+ "content": (
1718
+ "Your previous response had invalid tool-call formatting. "
1719
+ "Respond with exactly one valid tool call using the provided tools. "
1720
+ "Do not output prose, markdown, XML tags, or schema snippets."
1721
+ ),
1722
+ }
1723
+ existing_messages = retry_body.get("messages")
1724
+ if isinstance(existing_messages, list) and existing_messages:
1725
+ retry_body["messages"] = [*existing_messages, malformed_retry_instruction]
1726
+
1578
1727
  if PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
1579
1728
  current_max = int(
1580
1729
  retry_body.get("max_tokens", PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS)
@@ -1608,8 +1757,8 @@ def _build_clean_guardrail_openai_response(openai_resp: dict) -> dict:
1608
1757
  "message": {
1609
1758
  "role": "assistant",
1610
1759
  "content": (
1611
- "I could not produce a valid tool-call format in this turn. "
1612
- "Please continue; I will issue exactly one valid tool call next."
1760
+ "Tool-call formatting failed after automatic retries. "
1761
+ "Please retry the same request."
1613
1762
  ),
1614
1763
  },
1615
1764
  }
@@ -2200,6 +2349,14 @@ async def messages(request: Request):
2200
2349
  last_session_id = session_id
2201
2350
 
2202
2351
  body = _maybe_apply_session_contamination_breaker(body, monitor, session_id)
2352
+ body, analysis_tools_removed = _maybe_route_analysis_without_tools(body)
2353
+ if analysis_tools_removed > 0:
2354
+ monitor.consecutive_forced_count = 0
2355
+ monitor.no_progress_streak = 0
2356
+ logger.info(
2357
+ "ANALYSIS ROUTE: disabled %d tools for analysis-only prompt",
2358
+ analysis_tools_removed,
2359
+ )
2203
2360
 
2204
2361
  # Debug: log request summary
2205
2362
  n_messages = len(body.get("messages", []))
@@ -164,6 +164,27 @@ class TestMalformedToolGuardrail(unittest.TestCase):
164
164
  }
165
165
  self.assertTrue(proxy._is_malformed_tool_response(openai_resp, anthropic_body))
166
166
 
167
+ def test_detects_tool_call_apology_text_as_malformed(self):
168
+ openai_resp = {
169
+ "choices": [
170
+ {
171
+ "finish_reason": "stop",
172
+ "message": {
173
+ "content": (
174
+ "I could not produce a valid tool-call format in this turn. "
175
+ "Please continue; I will issue exactly one valid tool call next."
176
+ ),
177
+ "tool_calls": [],
178
+ },
179
+ }
180
+ ]
181
+ }
182
+ anthropic_body = {
183
+ "tools": [{"name": "Read", "input_schema": {"type": "object"}}],
184
+ "messages": [{"role": "user", "content": "fix this"}],
185
+ }
186
+ self.assertTrue(proxy._is_malformed_tool_response(openai_resp, anthropic_body))
187
+
167
188
  def test_clean_tool_call_response_is_not_malformed(self):
168
189
  openai_resp = {
169
190
  "choices": [
@@ -385,6 +406,7 @@ class TestMalformedToolGuardrail(unittest.TestCase):
385
406
  openai_body = {
386
407
  "model": "test",
387
408
  "max_tokens": 4000,
409
+ "messages": [{"role": "user", "content": "fix the issue"}],
388
410
  "tools": [{"type": "function", "function": {"name": "Read"}}],
389
411
  }
390
412
  anthropic_body = {
@@ -402,11 +424,24 @@ class TestMalformedToolGuardrail(unittest.TestCase):
402
424
  self.assertEqual(retry["max_tokens"], 512)
403
425
  self.assertEqual(len(retry["tools"]), 3)
404
426
  self.assertFalse(retry["enable_thinking"])
427
+ self.assertEqual(retry["messages"][-1]["role"], "user")
428
+ self.assertIn(
429
+ "invalid tool-call formatting",
430
+ retry["messages"][-1]["content"],
431
+ )
405
432
  finally:
406
433
  setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", old_cap)
407
434
  setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE", old_temp)
408
435
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
409
436
 
437
+ def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
438
+ guardrail = proxy._build_clean_guardrail_openai_response(
439
+ {"model": "test-model"}
440
+ )
441
+ text = guardrail["choices"][0]["message"]["content"]
442
+ self.assertIn("Please retry the same request", text)
443
+ self.assertNotIn("I will issue exactly one valid tool call next", text)
444
+
410
445
 
411
446
  class TestToolTurnControls(unittest.TestCase):
412
447
  def test_tool_narrowing_reduces_tool_count(self):
@@ -483,6 +518,82 @@ class TestToolTurnControls(unittest.TestCase):
483
518
  finally:
484
519
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
485
520
 
521
+ def test_no_tools_does_not_inject_agentic_system_message(self):
522
+ body = {
523
+ "model": "test",
524
+ "messages": [{"role": "user", "content": "analyze architecture"}],
525
+ }
526
+ openai = proxy.build_openai_request(
527
+ body, proxy.SessionMonitor(context_window=262144)
528
+ )
529
+
530
+ self.assertEqual(openai["messages"][0]["role"], "user")
531
+ self.assertNotIn("tools", openai)
532
+
533
+ def test_analysis_only_route_removes_tools(self):
534
+ old_route = getattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE")
535
+ old_min_tools = getattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS")
536
+ old_max_messages = getattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES")
537
+ try:
538
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", True)
539
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", 4)
540
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", 2)
541
+
542
+ body = {
543
+ "messages": [
544
+ {
545
+ "role": "user",
546
+ "content": "analyze lifecycle and plan options to improve performance and compliance",
547
+ }
548
+ ],
549
+ "tools": [
550
+ {"name": "Read", "input_schema": {"type": "object"}},
551
+ {"name": "Edit", "input_schema": {"type": "object"}},
552
+ {"name": "Write", "input_schema": {"type": "object"}},
553
+ {"name": "Bash", "input_schema": {"type": "object"}},
554
+ ],
555
+ }
556
+
557
+ updated, removed = proxy._maybe_route_analysis_without_tools(body)
558
+ self.assertEqual(removed, 4)
559
+ self.assertNotIn("tools", updated)
560
+ finally:
561
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", old_route)
562
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", old_min_tools)
563
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", old_max_messages)
564
+
565
+ def test_analysis_only_route_keeps_tools_for_action_prompt(self):
566
+ old_route = getattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE")
567
+ old_min_tools = getattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS")
568
+ old_max_messages = getattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES")
569
+ try:
570
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", True)
571
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", 4)
572
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", 2)
573
+
574
+ body = {
575
+ "messages": [
576
+ {
577
+ "role": "user",
578
+ "content": "analyze failing run and fix the bug",
579
+ }
580
+ ],
581
+ "tools": [
582
+ {"name": "Read", "input_schema": {"type": "object"}},
583
+ {"name": "Edit", "input_schema": {"type": "object"}},
584
+ {"name": "Write", "input_schema": {"type": "object"}},
585
+ {"name": "Bash", "input_schema": {"type": "object"}},
586
+ ],
587
+ }
588
+
589
+ updated, removed = proxy._maybe_route_analysis_without_tools(body)
590
+ self.assertEqual(removed, 0)
591
+ self.assertIn("tools", updated)
592
+ finally:
593
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_ROUTE", old_route)
594
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MIN_TOOLS", old_min_tools)
595
+ setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", old_max_messages)
596
+
486
597
 
487
598
  class TestSessionContaminationBreaker(unittest.TestCase):
488
599
  def test_contamination_breaker_trims_and_resets_streak(self):