@miller-tech/uap 1.20.35 → 1.20.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -77,6 +77,7 @@ uap setup -p all
77
77
  | CLI | 25 commands | Full system management with rich dashboard visualization |
78
78
  | Benchmarks | 9 modules | Terminal-Bench adapter, Harbor integration, A/B comparison |
79
79
  | LLM Optimization | 5 tools | Qwen3.5 tool call fixes, llama.cpp optimizer, LoRA training |
80
+ | Local LLM Proxy | 1 service | Anthropic Messages API default; OpenAI Chat Completions retained as option |
80
81
  | RTK | 1 module | 60-90% token savings on command outputs |
81
82
  | Platforms | 9 integrations | Claude, Factory, OpenCode, ForgeCode, VSCode, Beads, Codex, Pipeline, OMP |
82
83
 
@@ -269,12 +269,21 @@ export ANTHROPIC_BASE_URL=http://localhost:4000
269
269
 
270
270
  ### Endpoints
271
271
 
272
- | Path | Method | Description |
273
- | ------------------------ | ------ | ------------------------------------------ |
274
- | `/v1/messages` | POST | Anthropic Messages API (streaming + sync) |
275
- | `/anthropic/v1/messages` | POST | Alternative path (some clients use this) |
276
- | `/v1/models` | GET | Lists spoofed Anthropic model IDs |
277
- | `/health` | GET | Health check (checks upstream reachability) |
272
+ The proxy speaks **Anthropic Messages API as its canonical interface** and
273
+ keeps an **OpenAI Chat Completions passthrough** for clients that require the
274
+ OpenAI shape. Both paths run through the same guarded pipeline (loop
275
+ detection, tool narrowing, malformed-payload retry, context pruning, etc.)
276
+ the OpenAI route converts the request to Anthropic, runs the pipeline, and
277
+ re-shapes the final response back to OpenAI.
278
+
279
+ | Path | Method | Shape | Description |
280
+ | ------------------------ | ------ | --------- | --------------------------------------------------------------- |
281
+ | `/v1/messages` | POST | Anthropic | Anthropic Messages API — default/canonical (streaming + sync) |
282
+ | `/anthropic/v1/messages` | POST | Anthropic | Alias for `/v1/messages` (some Claude Code configs use this) |
283
+ | `/v1/chat/completions` | POST | OpenAI | OpenAI Chat Completions passthrough (e.g. Forge, OpenCode) |
284
+ | `/v1/models` | GET | Anthropic | Lists spoofed Anthropic model IDs |
285
+ | `/health` | GET | — | Health check (verifies upstream reachability) |
286
+ | `/v1/context` | GET | — | Current session context usage and pruning state |
278
287
 
279
288
  ### Running as a Service (systemd)
280
289
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.35",
3
+ "version": "1.20.36",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -2,6 +2,18 @@
2
2
  /**
3
3
  * Layer 1: Intelligent Agent Execution Proxy (v1.0.0)
4
4
  *
5
+ * DEPRECATED: This OpenAI-only shim is superseded by anthropic_proxy.py's
6
+ * /v1/chat/completions route, which provides the same OpenAI Chat Completions
7
+ * surface AND runs through the canonical guarded Anthropic pipeline (loop
8
+ * detection, tool narrowing, malformed-payload retry, context pruning).
9
+ *
10
+ * New deployments should point clients at the anthropic-proxy port (default
11
+ * 4000) and use either /v1/messages (Anthropic) or /v1/chat/completions
12
+ * (OpenAI passthrough). This script is retained for backward compatibility
13
+ * with installations that still reference it via `uap tool-calls` tooling
14
+ * (see src/cli/tool-calls.ts) and the in-container Qwen benchmark
15
+ * (scripts/benchmarks/run-tbench-qwen35-quick.sh).
16
+ *
5
17
  * Model-agnostic proxy that sits between any OpenAI-compatible client and
6
18
  * any OpenAI-compatible inference server. Implements:
7
19
  *
@@ -4874,3 +4874,132 @@ class TestUpstream503Resilience(unittest.TestCase):
4874
4874
  """Does not match 200 even with loading text."""
4875
4875
  resp = httpx.Response(200, text='{"status":"loading model"}')
4876
4876
  self.assertFalse(proxy._is_loading_model_503(resp))
4877
+
4878
+
4879
+ class TestOpenAIPassthroughConversion(unittest.TestCase):
4880
+ """Tests for the /v1/chat/completions OpenAI passthrough route.
4881
+
4882
+ The route converts OpenAI Chat Completions requests to Anthropic
4883
+ Messages, runs the full guarded Anthropic pipeline, then converts the
4884
+ response back to OpenAI shape. This exercises the pure conversion
4885
+ helpers (openai_to_anthropic_request, anthropic_to_openai_response) in
4886
+ isolation so a regression in the dual-interface surface is caught
4887
+ without needing a live FastAPI client."""
4888
+
4889
+ def test_openai_to_anthropic_request_preserves_user_and_assistant_text(self):
4890
+ """User and assistant text messages survive the OpenAI->Anthropic
4891
+ conversion with the expected role + content shape."""
4892
+ openai_body = {
4893
+ "model": "qwen35",
4894
+ "max_tokens": 1024,
4895
+ "messages": [
4896
+ {"role": "system", "content": "you are helpful"},
4897
+ {"role": "user", "content": "hello"},
4898
+ {"role": "assistant", "content": "hi there"},
4899
+ {"role": "user", "content": "thanks"},
4900
+ ],
4901
+ }
4902
+ anthropic_body = proxy.openai_to_anthropic_request(openai_body)
4903
+
4904
+ self.assertEqual(anthropic_body.get("model"), "qwen35")
4905
+ self.assertEqual(anthropic_body.get("max_tokens"), 1024)
4906
+ # System collapses into a top-level 'system' field
4907
+ self.assertIn("system", anthropic_body)
4908
+ # Non-system messages preserved in order
4909
+ msgs = anthropic_body.get("messages", [])
4910
+ self.assertEqual(len(msgs), 3)
4911
+ self.assertEqual(msgs[0]["role"], "user")
4912
+ self.assertEqual(msgs[1]["role"], "assistant")
4913
+ self.assertEqual(msgs[2]["role"], "user")
4914
+
4915
+ def test_openai_to_anthropic_request_converts_tool_response(self):
4916
+ """OpenAI 'role: tool' messages become Anthropic user messages with
4917
+ a tool_result content block — required so the guarded pipeline can
4918
+ track tool history across turns."""
4919
+ openai_body = {
4920
+ "model": "test",
4921
+ "messages": [
4922
+ {"role": "user", "content": "run pwd"},
4923
+ {
4924
+ "role": "assistant",
4925
+ "content": None,
4926
+ "tool_calls": [
4927
+ {
4928
+ "id": "call_1",
4929
+ "type": "function",
4930
+ "function": {"name": "Bash", "arguments": '{"command": "pwd"}'},
4931
+ }
4932
+ ],
4933
+ },
4934
+ {"role": "tool", "tool_call_id": "call_1", "content": "/home/user"},
4935
+ ],
4936
+ }
4937
+ anthropic_body = proxy.openai_to_anthropic_request(openai_body)
4938
+ msgs = anthropic_body.get("messages", [])
4939
+
4940
+ # Last message is the tool result, encoded as Anthropic user/tool_result
4941
+ tool_result_msg = msgs[-1]
4942
+ self.assertEqual(tool_result_msg["role"], "user")
4943
+ blocks = tool_result_msg["content"]
4944
+ self.assertEqual(len(blocks), 1)
4945
+ self.assertEqual(blocks[0]["type"], "tool_result")
4946
+ self.assertEqual(blocks[0]["tool_use_id"], "call_1")
4947
+ self.assertEqual(blocks[0]["content"], "/home/user")
4948
+
4949
+ def test_anthropic_to_openai_response_text_only(self):
4950
+ """A plain-text Anthropic response becomes OpenAI choices[0] with
4951
+ finish_reason='stop' and a string content body."""
4952
+ anthropic_resp = {
4953
+ "id": "msg_test_1",
4954
+ "model": "qwen35",
4955
+ "content": [{"type": "text", "text": "the answer is 42"}],
4956
+ "stop_reason": "end_turn",
4957
+ "usage": {"input_tokens": 10, "output_tokens": 5},
4958
+ }
4959
+ openai_resp = proxy.anthropic_to_openai_response(anthropic_resp)
4960
+
4961
+ self.assertEqual(openai_resp["object"], "chat.completion")
4962
+ self.assertEqual(openai_resp["model"], "qwen35")
4963
+ self.assertEqual(len(openai_resp["choices"]), 1)
4964
+ choice = openai_resp["choices"][0]
4965
+ self.assertEqual(choice["finish_reason"], "stop")
4966
+ self.assertEqual(choice["message"]["role"], "assistant")
4967
+ self.assertEqual(choice["message"]["content"], "the answer is 42")
4968
+ self.assertNotIn("tool_calls", choice["message"])
4969
+ # Usage is re-shaped to OpenAI conventions
4970
+ self.assertEqual(openai_resp["usage"]["prompt_tokens"], 10)
4971
+ self.assertEqual(openai_resp["usage"]["completion_tokens"], 5)
4972
+ self.assertEqual(openai_resp["usage"]["total_tokens"], 15)
4973
+
4974
+ def test_anthropic_to_openai_response_tool_use_yields_tool_calls(self):
4975
+ """An Anthropic response with a tool_use content block becomes an
4976
+ OpenAI choice with finish_reason='tool_calls' and a tool_calls array
4977
+ carrying the JSON-stringified arguments — the canonical OpenAI shape
4978
+ clients like Forge expect."""
4979
+ anthropic_resp = {
4980
+ "id": "msg_tool_1",
4981
+ "model": "qwen35",
4982
+ "content": [
4983
+ {
4984
+ "type": "tool_use",
4985
+ "id": "toolu_xyz",
4986
+ "name": "Bash",
4987
+ "input": {"command": "pwd"},
4988
+ }
4989
+ ],
4990
+ "stop_reason": "tool_use",
4991
+ "usage": {"input_tokens": 20, "output_tokens": 8},
4992
+ }
4993
+ openai_resp = proxy.anthropic_to_openai_response(anthropic_resp)
4994
+
4995
+ choice = openai_resp["choices"][0]
4996
+ self.assertEqual(choice["finish_reason"], "tool_calls")
4997
+ msg = choice["message"]
4998
+ self.assertIsNone(msg["content"]) # No text emitted
4999
+ self.assertEqual(len(msg["tool_calls"]), 1)
5000
+ tc = msg["tool_calls"][0]
5001
+ self.assertEqual(tc["type"], "function")
5002
+ self.assertEqual(tc["id"], "toolu_xyz")
5003
+ self.assertEqual(tc["function"]["name"], "Bash")
5004
+ # Arguments are JSON-stringified per OpenAI spec
5005
+ self.assertEqual(json.loads(tc["function"]["arguments"]), {"command": "pwd"})