@miller-tech/uap 1.20.34 → 1.20.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,18 @@
2
2
  /**
3
3
  * Layer 1: Intelligent Agent Execution Proxy (v1.0.0)
4
4
  *
5
+ * DEPRECATED: This OpenAI-only shim is superseded by anthropic_proxy.py's
6
+ * /v1/chat/completions route, which provides the same OpenAI Chat Completions
7
+ * surface AND runs through the canonical guarded Anthropic pipeline (loop
8
+ * detection, tool narrowing, malformed-payload retry, context pruning).
9
+ *
10
+ * New deployments should point clients at the anthropic-proxy port (default
11
+ * 4000) and use either /v1/messages (Anthropic) or /v1/chat/completions
12
+ * (OpenAI passthrough). This script is retained for backward compatibility
13
+ * with installations that still reference it via `uap tool-calls` tooling
14
+ * (see src/cli/tool-calls.ts) and the in-container Qwen benchmark
15
+ * (scripts/benchmarks/run-tbench-qwen35-quick.sh).
16
+ *
5
17
  * Model-agnostic proxy that sits between any OpenAI-compatible client and
6
18
  * any OpenAI-compatible inference server. Implements:
7
19
  *
@@ -2140,8 +2140,12 @@ class TestToolTurnControls(unittest.TestCase):
2140
2140
  }
2141
2141
 
2142
2142
  openai = proxy.build_openai_request(body, monitor)
2143
- self.assertNotIn("tools", openai)
2144
- self.assertNotIn("tool_choice", openai)
2143
+ # Finalize turn keeps tools available but switches tool_choice to
2144
+ # 'auto' so the model can complete with a tool call or summarise.
2145
+ # Earlier behaviour stripped tools entirely, which caused Anthropic
2146
+ # clients to see end_turn with no action and halt.
2147
+ self.assertIn("tools", openai)
2148
+ self.assertEqual(openai.get("tool_choice"), "auto")
2145
2149
  self.assertEqual(monitor.tool_turn_phase, "finalize")
2146
2150
  self.assertTrue(monitor.finalize_turn_active)
2147
2151
  finally:
@@ -2229,7 +2233,7 @@ class TestToolTurnControls(unittest.TestCase):
2229
2233
  finally:
2230
2234
  setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
2231
2235
 
2232
- def test_state_machine_finalize_temporarily_disables_tools(self):
2236
+ def test_state_machine_finalize_keeps_tools_with_auto_choice(self):
2233
2237
  old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
2234
2238
  old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
2235
2239
  old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
@@ -2293,8 +2297,10 @@ class TestToolTurnControls(unittest.TestCase):
2293
2297
  }
2294
2298
 
2295
2299
  openai = proxy.build_openai_request(body, monitor)
2296
- self.assertNotIn("tools", openai)
2297
- self.assertNotIn("tool_choice", openai)
2300
+ # Finalize keeps tools + tool_choice=auto so the model can either
2301
+ # complete with a tool call or emit a plain-text summary.
2302
+ self.assertIn("tools", openai)
2303
+ self.assertEqual(openai.get("tool_choice"), "auto")
2298
2304
  finally:
2299
2305
  setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
2300
2306
  setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
@@ -3512,28 +3518,78 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3512
3518
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
3513
3519
 
3514
3520
  def test_max_tokens_floor_applied_when_thinking_active(self):
3515
- """max_tokens floor should apply when tools present and thinking enabled."""
3521
+ """Floor applies on non-preflight tool turns with thinking enabled."""
3516
3522
  old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
3517
3523
  old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
3518
3524
  try:
3519
3525
  setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
3520
3526
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
3521
3527
 
3528
+ # max_tokens=1536 is above SMALL_PREFLIGHT_THRESHOLD (1024), so the
3529
+ # request does NOT take the preflight carveout and the regular
3530
+ # floor path applies. Small-preflight bypass is covered separately
3531
+ # in test_max_tokens_floor_bypassed_for_small_preflight.
3522
3532
  body = {
3523
3533
  "model": "test",
3524
- "max_tokens": 512,
3534
+ "max_tokens": 1536,
3525
3535
  "messages": [{"role": "user", "content": "run command"}],
3526
3536
  "tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
3527
3537
  }
3528
3538
  openai = proxy.build_openai_request(
3529
3539
  body, proxy.SessionMonitor(context_window=0)
3530
3540
  )
3531
- # Tools + thinking enabled = floor applied
3532
3541
  self.assertEqual(openai.get("max_tokens"), 4096)
3533
3542
  finally:
3534
3543
  setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
3535
3544
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
3536
3545
 
3546
+ def test_max_tokens_floor_bypassed_for_small_preflight(self):
3547
+ """Small preflight requests (max_tokens <= SMALL_PREFLIGHT_THRESHOLD)
3548
+ bypass the big floor and instead get THINKING_MIN_FOR_TOOLS=2048
3549
+ bump so Qwen's mandatory thinking has room before the tool call."""
3550
+ old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
3551
+ old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
3552
+ try:
3553
+ setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
3554
+ setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
3555
+
3556
+ body = {
3557
+ "model": "test",
3558
+ "max_tokens": 512,
3559
+ "messages": [{"role": "user", "content": "run command"}],
3560
+ "tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
3561
+ }
3562
+ openai = proxy.build_openai_request(
3563
+ body, proxy.SessionMonitor(context_window=0)
3564
+ )
3565
+ self.assertEqual(openai.get("max_tokens"), 2048)
3566
+ finally:
3567
+ setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
3568
+ setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
3569
+
3570
+ def test_max_tokens_true_preflight_left_alone(self):
3571
+ """True preflight requests (max_tokens <= 16) are not inflated, even
3572
+ with tools present, so plan-generation latency stays low."""
3573
+ old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
3574
+ old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
3575
+ try:
3576
+ setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
3577
+ setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
3578
+
3579
+ body = {
3580
+ "model": "test",
3581
+ "max_tokens": 1,
3582
+ "messages": [{"role": "user", "content": "ping"}],
3583
+ "tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
3584
+ }
3585
+ openai = proxy.build_openai_request(
3586
+ body, proxy.SessionMonitor(context_window=0)
3587
+ )
3588
+ self.assertEqual(openai.get("max_tokens"), 1)
3589
+ finally:
3590
+ setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
3591
+ setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
3592
+
3537
3593
 
3538
3594
  class TestGenerationHangRecovery(unittest.TestCase):
3539
3595
  """Tests for generation hang recovery: timeouts, slot hang detection."""
@@ -4818,3 +4874,132 @@ class TestUpstream503Resilience(unittest.TestCase):
4818
4874
  """Does not match 200 even with loading text."""
4819
4875
  resp = httpx.Response(200, text='{"status":"loading model"}')
4820
4876
  self.assertFalse(proxy._is_loading_model_503(resp))
4877
+
4878
+
4879
+ class TestOpenAIPassthroughConversion(unittest.TestCase):
4880
+ """Tests for the /v1/chat/completions OpenAI passthrough route.
4881
+
4882
+ The route converts OpenAI Chat Completions requests to Anthropic
4883
+ Messages, runs the full guarded Anthropic pipeline, then converts the
4884
+ response back to OpenAI shape. This exercises the pure conversion
4885
+ helpers (openai_to_anthropic_request, anthropic_to_openai_response) in
4886
+ isolation so a regression in the dual-interface surface is caught
4887
+ without needing a live FastAPI client."""
4888
+
4889
+ def test_openai_to_anthropic_request_preserves_user_and_assistant_text(self):
4890
+ """User and assistant text messages survive the OpenAI->Anthropic
4891
+ conversion with the expected role + content shape."""
4892
+ openai_body = {
4893
+ "model": "qwen35",
4894
+ "max_tokens": 1024,
4895
+ "messages": [
4896
+ {"role": "system", "content": "you are helpful"},
4897
+ {"role": "user", "content": "hello"},
4898
+ {"role": "assistant", "content": "hi there"},
4899
+ {"role": "user", "content": "thanks"},
4900
+ ],
4901
+ }
4902
+ anthropic_body = proxy.openai_to_anthropic_request(openai_body)
4903
+
4904
+ self.assertEqual(anthropic_body.get("model"), "qwen35")
4905
+ self.assertEqual(anthropic_body.get("max_tokens"), 1024)
4906
+ # System collapses into a top-level 'system' field
4907
+ self.assertIn("system", anthropic_body)
4908
+ # Non-system messages preserved in order
4909
+ msgs = anthropic_body.get("messages", [])
4910
+ self.assertEqual(len(msgs), 3)
4911
+ self.assertEqual(msgs[0]["role"], "user")
4912
+ self.assertEqual(msgs[1]["role"], "assistant")
4913
+ self.assertEqual(msgs[2]["role"], "user")
4914
+
4915
+ def test_openai_to_anthropic_request_converts_tool_response(self):
4916
+ """OpenAI 'role: tool' messages become Anthropic user messages with
4917
+ a tool_result content block — required so the guarded pipeline can
4918
+ track tool history across turns."""
4919
+ openai_body = {
4920
+ "model": "test",
4921
+ "messages": [
4922
+ {"role": "user", "content": "run pwd"},
4923
+ {
4924
+ "role": "assistant",
4925
+ "content": None,
4926
+ "tool_calls": [
4927
+ {
4928
+ "id": "call_1",
4929
+ "type": "function",
4930
+ "function": {"name": "Bash", "arguments": '{"command": "pwd"}'},
4931
+ }
4932
+ ],
4933
+ },
4934
+ {"role": "tool", "tool_call_id": "call_1", "content": "/home/user"},
4935
+ ],
4936
+ }
4937
+ anthropic_body = proxy.openai_to_anthropic_request(openai_body)
4938
+ msgs = anthropic_body.get("messages", [])
4939
+
4940
+ # Last message is the tool result, encoded as Anthropic user/tool_result
4941
+ tool_result_msg = msgs[-1]
4942
+ self.assertEqual(tool_result_msg["role"], "user")
4943
+ blocks = tool_result_msg["content"]
4944
+ self.assertEqual(len(blocks), 1)
4945
+ self.assertEqual(blocks[0]["type"], "tool_result")
4946
+ self.assertEqual(blocks[0]["tool_use_id"], "call_1")
4947
+ self.assertEqual(blocks[0]["content"], "/home/user")
4948
+
4949
+ def test_anthropic_to_openai_response_text_only(self):
4950
+ """A plain-text Anthropic response becomes OpenAI choices[0] with
4951
+ finish_reason='stop' and a string content body."""
4952
+ anthropic_resp = {
4953
+ "id": "msg_test_1",
4954
+ "model": "qwen35",
4955
+ "content": [{"type": "text", "text": "the answer is 42"}],
4956
+ "stop_reason": "end_turn",
4957
+ "usage": {"input_tokens": 10, "output_tokens": 5},
4958
+ }
4959
+ openai_resp = proxy.anthropic_to_openai_response(anthropic_resp)
4960
+
4961
+ self.assertEqual(openai_resp["object"], "chat.completion")
4962
+ self.assertEqual(openai_resp["model"], "qwen35")
4963
+ self.assertEqual(len(openai_resp["choices"]), 1)
4964
+ choice = openai_resp["choices"][0]
4965
+ self.assertEqual(choice["finish_reason"], "stop")
4966
+ self.assertEqual(choice["message"]["role"], "assistant")
4967
+ self.assertEqual(choice["message"]["content"], "the answer is 42")
4968
+ self.assertNotIn("tool_calls", choice["message"])
4969
+ # Usage is re-shaped to OpenAI conventions
4970
+ self.assertEqual(openai_resp["usage"]["prompt_tokens"], 10)
4971
+ self.assertEqual(openai_resp["usage"]["completion_tokens"], 5)
4972
+ self.assertEqual(openai_resp["usage"]["total_tokens"], 15)
4973
+
4974
+ def test_anthropic_to_openai_response_tool_use_yields_tool_calls(self):
4975
+ """An Anthropic response with a tool_use content block becomes an
4976
+ OpenAI choice with finish_reason='tool_calls' and a tool_calls array
4977
+ carrying the JSON-stringified arguments — the canonical OpenAI shape
4978
+ clients like Forge expect."""
4979
+ anthropic_resp = {
4980
+ "id": "msg_tool_1",
4981
+ "model": "qwen35",
4982
+ "content": [
4983
+ {
4984
+ "type": "tool_use",
4985
+ "id": "toolu_xyz",
4986
+ "name": "Bash",
4987
+ "input": {"command": "pwd"},
4988
+ }
4989
+ ],
4990
+ "stop_reason": "tool_use",
4991
+ "usage": {"input_tokens": 20, "output_tokens": 8},
4992
+ }
4993
+ openai_resp = proxy.anthropic_to_openai_response(anthropic_resp)
4994
+
4995
+ choice = openai_resp["choices"][0]
4996
+ self.assertEqual(choice["finish_reason"], "tool_calls")
4997
+ msg = choice["message"]
4998
+ self.assertIsNone(msg["content"]) # No text emitted
4999
+ self.assertEqual(len(msg["tool_calls"]), 1)
5000
+ tc = msg["tool_calls"][0]
5001
+ self.assertEqual(tc["type"], "function")
5002
+ self.assertEqual(tc["id"], "toolu_xyz")
5003
+ self.assertEqual(tc["function"]["name"], "Bash")
5004
+ # Arguments are JSON-stringified per OpenAI spec
5005
+ self.assertEqual(json.loads(tc["function"]["arguments"]), {"command": "pwd"})