local-openai2anthropic 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/PKG-INFO +1 -1
  2. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/pyproject.toml +1 -1
  3. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/converter.py +0 -4
  4. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/router.py +287 -140
  5. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/.github/workflows/publish.yml +0 -0
  6. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/.gitignore +0 -0
  7. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/LICENSE +0 -0
  8. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/README.md +0 -0
  9. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/README_zh.md +0 -0
  10. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/basic_chat.py +0 -0
  11. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/streaming.py +0 -0
  12. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/thinking_mode.py +0 -0
  13. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/tool_calling.py +0 -0
  14. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/vision.py +0 -0
  15. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/examples/web_search.py +0 -0
  16. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/__init__.py +0 -0
  17. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/__main__.py +0 -0
  18. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/config.py +0 -0
  19. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/daemon.py +0 -0
  20. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/daemon_runner.py +0 -0
  21. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/main.py +0 -0
  22. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/openai_types.py +0 -0
  23. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/protocol.py +0 -0
  24. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/server_tools/__init__.py +0 -0
  25. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/server_tools/base.py +0 -0
  26. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/server_tools/web_search.py +0 -0
  27. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/src/local_openai2anthropic/tavily_client.py +0 -0
  28. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/tests/__init__.py +0 -0
  29. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/tests/test_converter.py +0 -0
  30. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/tests/test_integration.py +0 -0
  31. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/tests/test_router.py +0 -0
  32. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/tests/test_upstream.sh +0 -0
  33. {local_openai2anthropic-0.3.0 → local_openai2anthropic-0.3.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: local-openai2anthropic
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
5
5
  Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
6
6
  Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "local-openai2anthropic"
3
- version = "0.3.0"
3
+ version = "0.3.2"
4
4
  description = "A lightweight proxy server that converts Anthropic Messages API to OpenAI API"
5
5
  readme = "README.md"
6
6
  license = { text = "Apache-2.0" }
@@ -404,10 +404,6 @@ def convert_openai_to_anthropic(
404
404
  # Convert tool calls
405
405
  if message.tool_calls:
406
406
  for tc in message.tool_calls:
407
- # Handle case where function might be None
408
- if not tc.function:
409
- continue
410
-
411
407
  tool_input: dict[str, Any] = {}
412
408
  try:
413
409
  tool_input = json.loads(tc.function.arguments)
@@ -8,7 +8,7 @@ import logging
8
8
  import secrets
9
9
  import string
10
10
  from http import HTTPStatus
11
- from typing import Any, AsyncGenerator
11
+ from typing import Any, AsyncGenerator, cast
12
12
 
13
13
  import httpx
14
14
  from fastapi import APIRouter, Depends, HTTPException, Request
@@ -51,6 +51,87 @@ def _generate_server_tool_id() -> str:
51
51
  return f"srvtoolu_{random_part}"
52
52
 
53
53
 
54
+ def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, Any] | None:
55
+ if not isinstance(usage, dict):
56
+ return usage
57
+ allowed_keys = {
58
+ "input_tokens",
59
+ "output_tokens",
60
+ "cache_creation_input_tokens",
61
+ "cache_read_input_tokens",
62
+ "server_tool_use",
63
+ }
64
+ normalized = {k: v for k, v in usage.items() if k in allowed_keys}
65
+ return normalized or None
66
+
67
+
68
+ def _count_tokens(text: str) -> int:
69
+ try:
70
+ import tiktoken # type: ignore[import-not-found]
71
+ except Exception:
72
+ return 0
73
+
74
+ encoding = tiktoken.get_encoding("cl100k_base")
75
+ return len(encoding.encode(text))
76
+
77
+
78
+ def _chunk_text(text: str, chunk_size: int = 200) -> list[str]:
79
+ if not text:
80
+ return []
81
+ return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
82
+
83
+
84
+ def _estimate_input_tokens(openai_params: dict[str, Any]) -> int:
85
+ try:
86
+ import tiktoken # type: ignore[import-not-found]
87
+ except Exception:
88
+ return 0
89
+
90
+ encoding = tiktoken.get_encoding("cl100k_base")
91
+ total_tokens = 0
92
+
93
+ system = openai_params.get("system")
94
+ if isinstance(system, str):
95
+ total_tokens += len(encoding.encode(system))
96
+
97
+ messages = openai_params.get("messages", [])
98
+ if isinstance(messages, list):
99
+ for msg in messages:
100
+ if not isinstance(msg, dict):
101
+ continue
102
+ content = msg.get("content", "")
103
+ if isinstance(content, str):
104
+ total_tokens += len(encoding.encode(content))
105
+ elif isinstance(content, list):
106
+ for block in content:
107
+ if not isinstance(block, dict):
108
+ total_tokens += len(encoding.encode(str(block)))
109
+ continue
110
+ block_type = block.get("type")
111
+ if block_type == "text":
112
+ total_tokens += len(encoding.encode(block.get("text", "")))
113
+ elif block_type == "image_url":
114
+ total_tokens += 85
115
+
116
+ tool_calls = msg.get("tool_calls")
117
+ if isinstance(tool_calls, list) and tool_calls:
118
+ total_tokens += len(encoding.encode(json.dumps(tool_calls)))
119
+
120
+ tools = openai_params.get("tools")
121
+ if isinstance(tools, list) and tools:
122
+ total_tokens += len(encoding.encode(json.dumps(tools)))
123
+
124
+ tool_choice = openai_params.get("tool_choice")
125
+ if tool_choice is not None:
126
+ total_tokens += len(encoding.encode(json.dumps(tool_choice)))
127
+
128
+ response_format = openai_params.get("response_format")
129
+ if response_format is not None:
130
+ total_tokens += len(encoding.encode(json.dumps(response_format)))
131
+
132
+ return total_tokens
133
+
134
+
54
135
  async def _stream_response(
55
136
  client: httpx.AsyncClient,
56
137
  url: str,
@@ -67,13 +148,17 @@ async def _stream_response(
67
148
  ) as response:
68
149
  if response.status_code != 200:
69
150
  error_body = await response.aread()
151
+ error_text = error_body.decode("utf-8", errors="replace").strip()
70
152
  try:
71
- error_json = json.loads(error_body.decode())
72
- error_msg = error_json.get("error", {}).get(
73
- "message", error_body.decode()
74
- )
153
+ error_json = json.loads(error_text) if error_text else {}
154
+ error_msg = error_json.get("error", {}).get("message") or error_text
75
155
  except json.JSONDecodeError:
76
- error_msg = error_body.decode()
156
+ error_msg = error_text
157
+ if not error_msg:
158
+ error_msg = (
159
+ response.reason_phrase
160
+ or f"Upstream API error ({response.status_code})"
161
+ )
77
162
 
78
163
  error_event = AnthropicErrorResponse(
79
164
  error=AnthropicError(type="api_error", message=error_msg)
@@ -87,10 +172,13 @@ async def _stream_response(
87
172
  content_block_started = False
88
173
  content_block_index = 0
89
174
  current_block_type = None # 'thinking', 'text', or 'tool_use'
175
+ current_tool_call_index = None
176
+ tool_call_buffers: dict[int, str] = {}
90
177
  finish_reason = None
91
- input_tokens = 0
178
+ input_tokens = _estimate_input_tokens(json_data)
92
179
  output_tokens = 0
93
180
  message_id = None
181
+ sent_message_delta = False
94
182
 
95
183
  async for line in response.aiter_lines():
96
184
  if not line.startswith("data: "):
@@ -98,6 +186,30 @@ async def _stream_response(
98
186
 
99
187
  data = line[6:]
100
188
  if data == "[DONE]":
189
+ if not sent_message_delta:
190
+ stop_reason_map = {
191
+ "stop": "end_turn",
192
+ "length": "max_tokens",
193
+ "tool_calls": "tool_use",
194
+ }
195
+ delta_event = {
196
+ "type": "message_delta",
197
+ "delta": {
198
+ "stop_reason": stop_reason_map.get(
199
+ finish_reason or "stop", "end_turn"
200
+ )
201
+ },
202
+ "usage": {
203
+ "input_tokens": input_tokens,
204
+ "output_tokens": output_tokens,
205
+ "cache_creation_input_tokens": None,
206
+ "cache_read_input_tokens": None,
207
+ },
208
+ }
209
+ logger.debug(
210
+ f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
211
+ )
212
+ yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
101
213
  break
102
214
 
103
215
  try:
@@ -112,7 +224,7 @@ async def _stream_response(
112
224
  if first_chunk:
113
225
  message_id = chunk.get("id", "")
114
226
  usage = chunk.get("usage") or {}
115
- input_tokens = usage.get("prompt_tokens", 0)
227
+ input_tokens = usage.get("prompt_tokens", input_tokens)
116
228
 
117
229
  start_event = {
118
230
  "type": "message_start",
@@ -143,6 +255,8 @@ async def _stream_response(
143
255
  if not chunk.get("choices"):
144
256
  usage = chunk.get("usage") or {}
145
257
  if usage:
258
+ input_tokens = usage.get("prompt_tokens", input_tokens)
259
+ output_tokens = usage.get("completion_tokens", output_tokens)
146
260
  if content_block_started:
147
261
  yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
148
262
  content_block_started = False
@@ -160,7 +274,9 @@ async def _stream_response(
160
274
  )
161
275
  },
162
276
  "usage": {
163
- "input_tokens": usage.get("prompt_tokens", 0),
277
+ "input_tokens": usage.get(
278
+ "prompt_tokens", input_tokens
279
+ ),
164
280
  "output_tokens": usage.get("completion_tokens", 0),
165
281
  "cache_creation_input_tokens": None,
166
282
  "cache_read_input_tokens": None,
@@ -170,6 +286,7 @@ async def _stream_response(
170
286
  f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
171
287
  )
172
288
  yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
289
+ sent_message_delta = True
173
290
  continue
174
291
 
175
292
  choice = chunk["choices"][0]
@@ -179,19 +296,6 @@ async def _stream_response(
179
296
  if choice.get("finish_reason"):
180
297
  finish_reason = choice["finish_reason"]
181
298
 
182
- # When finish_reason is tool_calls, we need to close the current block
183
- # and prepare to send message_delta
184
- if finish_reason == "tool_calls" and content_block_started:
185
- stop_block = {
186
- "type": "content_block_stop",
187
- "index": content_block_index,
188
- }
189
- logger.debug(
190
- f"[Anthropic Stream Event] content_block_stop (tool_calls): {json.dumps(stop_block, ensure_ascii=False)}"
191
- )
192
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
193
- content_block_started = False
194
-
195
299
  # Handle reasoning content (thinking)
196
300
  if delta.get("reasoning_content"):
197
301
  reasoning = delta["reasoning_content"]
@@ -211,7 +315,11 @@ async def _stream_response(
211
315
  start_block = {
212
316
  "type": "content_block_start",
213
317
  "index": content_block_index,
214
- "content_block": {"type": "thinking", "thinking": ""},
318
+ "content_block": {
319
+ "type": "thinking",
320
+ "thinking": "",
321
+ "signature": "",
322
+ },
215
323
  }
216
324
  logger.debug(
217
325
  f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}"
@@ -220,12 +328,13 @@ async def _stream_response(
220
328
  content_block_started = True
221
329
  current_block_type = "thinking"
222
330
 
223
- delta_block = {
224
- "type": "content_block_delta",
225
- "index": content_block_index,
226
- "delta": {"type": "thinking_delta", "thinking": reasoning},
227
- }
228
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
331
+ for chunk in _chunk_text(reasoning):
332
+ delta_block = {
333
+ "type": "content_block_delta",
334
+ "index": content_block_index,
335
+ "delta": {"type": "thinking_delta", "thinking": chunk},
336
+ }
337
+ yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
229
338
  continue
230
339
 
231
340
  # Handle content
@@ -254,6 +363,7 @@ async def _stream_response(
254
363
  content_block_started = True
255
364
  current_block_type = "text"
256
365
 
366
+ output_tokens += _count_tokens(delta["content"])
257
367
  delta_block = {
258
368
  "type": "content_block_delta",
259
369
  "index": content_block_index,
@@ -262,28 +372,50 @@ async def _stream_response(
262
372
  yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
263
373
 
264
374
  # Handle tool calls
265
- tool_calls = delta.get("tool_calls", [])
266
- if tool_calls:
267
- tool_call = tool_calls[0]
268
-
269
- # Handle new tool call (with id) - use separate if, not elif
270
- # because a chunk may have both id AND arguments
271
- if tool_call.get("id"):
272
- if content_block_started:
273
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
274
- content_block_started = False
275
- content_block_index += 1
276
-
277
- func = tool_call.get("function") or {}
278
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
279
- content_block_started = True
280
- current_block_type = "tool_use"
281
-
282
- # Handle tool call arguments - always check separately
283
- # Note: This is intentionally NOT elif, as a single chunk may contain both
284
- if (tool_call.get("function") or {}).get("arguments"):
285
- args = (tool_call.get("function") or {}).get("arguments", "")
286
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
375
+ if delta.get("tool_calls"):
376
+ for tool_call in delta["tool_calls"]:
377
+ tool_call_idx = tool_call.get("index", 0)
378
+
379
+ if tool_call.get("id"):
380
+ if content_block_started and (
381
+ current_block_type != "tool_use"
382
+ or current_tool_call_index != tool_call_idx
383
+ ):
384
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
385
+ content_block_started = False
386
+ content_block_index += 1
387
+
388
+ if not content_block_started:
389
+ func = tool_call.get("function") or {}
390
+ yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
391
+ content_block_started = True
392
+ current_block_type = "tool_use"
393
+ current_tool_call_index = tool_call_idx
394
+ tool_call_buffers.setdefault(tool_call_idx, "")
395
+
396
+ if (tool_call.get("function") or {}).get("arguments"):
397
+ args = (tool_call.get("function") or {}).get(
398
+ "arguments", ""
399
+ )
400
+ if (
401
+ not content_block_started
402
+ or current_block_type != "tool_use"
403
+ or current_tool_call_index != tool_call_idx
404
+ ):
405
+ if content_block_started:
406
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
407
+ content_block_index += 1
408
+ func = tool_call.get("function") or {}
409
+ tool_id = tool_call.get("id", "")
410
+ yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_id, 'name': func.get('name', ''), 'input': {}}})}\n\n"
411
+ content_block_started = True
412
+ current_block_type = "tool_use"
413
+ current_tool_call_index = tool_call_idx
414
+ tool_call_buffers.setdefault(tool_call_idx, "")
415
+ tool_call_buffers[tool_call_idx] = (
416
+ tool_call_buffers.get(tool_call_idx, "") + args
417
+ )
418
+ yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': tool_call_buffers[tool_call_idx]}})}\n\n"
287
419
 
288
420
  # Close final content block
289
421
  if content_block_started:
@@ -321,7 +453,7 @@ async def _convert_result_to_stream(
321
453
  """Convert a JSONResponse to streaming SSE format."""
322
454
  import time
323
455
 
324
- body = json.loads(result.body)
456
+ body = json.loads(bytes(result.body).decode("utf-8"))
325
457
  message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
326
458
  content = body.get("content", [])
327
459
  usage = body.get("usage", {})
@@ -368,6 +500,10 @@ async def _convert_result_to_stream(
368
500
 
369
501
  elif block_type == "tool_use":
370
502
  yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
503
+ tool_input = block.get("input", {})
504
+ if tool_input:
505
+ input_json = json.dumps(tool_input, ensure_ascii=False)
506
+ yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'input_json_delta', 'partial_json': input_json}})}\n\n"
371
507
  yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
372
508
 
373
509
  elif block_type == "server_tool_use":
@@ -377,17 +513,14 @@ async def _convert_result_to_stream(
377
513
 
378
514
  elif block_type == "web_search_tool_result":
379
515
  # Stream the tool result as its own content block.
380
- # Some clients expect `results`, others expect `content`; include both when possible.
381
516
  tool_result_block = dict(block)
382
- if "content" not in tool_result_block and "results" in tool_result_block:
383
- tool_result_block["content"] = tool_result_block["results"]
384
-
385
517
  yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
386
518
  yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
387
519
 
388
520
  elif block_type == "thinking":
389
521
  # Handle thinking blocks (BetaThinkingBlock)
390
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
522
+ signature = block.get("signature", "")
523
+ yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': '', 'signature': signature}})}\n\n"
391
524
  thinking_text = block.get("thinking", "")
392
525
  if thinking_text:
393
526
  yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
@@ -440,6 +573,7 @@ class ServerToolHandler:
440
573
  """
441
574
  func_name = tool_call.get("function", {}).get("name")
442
575
  call_id = tool_call.get("id", "")
576
+ openai_call_id = tool_call.get("openai_id", call_id)
443
577
 
444
578
  tool_class = self.server_tools[func_name]
445
579
  config = self.configs.get(tool_class.tool_type, {})
@@ -460,7 +594,9 @@ class ServerToolHandler:
460
594
  content_blocks = tool_class.build_content_blocks(call_id, args, result)
461
595
 
462
596
  # Build tool result message for OpenAI
463
- tool_result_msg = tool_class.build_tool_result_message(call_id, args, result)
597
+ tool_result_msg = tool_class.build_tool_result_message(
598
+ openai_call_id, args, result
599
+ )
464
600
 
465
601
  return content_blocks, tool_result_msg
466
602
 
@@ -503,8 +639,23 @@ async def _handle_with_server_tools(
503
639
  logger.error(
504
640
  f"OpenAI API error: {response.status_code} - {response.text}"
505
641
  )
642
+ raw_text = response.text
643
+ try:
644
+ if not raw_text:
645
+ raw_text = response.content.decode(
646
+ "utf-8", errors="replace"
647
+ )
648
+ except Exception:
649
+ raw_text = ""
650
+ if not raw_text:
651
+ raw_text = response.reason_phrase or ""
652
+ error_message = (raw_text or "").strip()
506
653
  error_response = AnthropicErrorResponse(
507
- error=AnthropicError(type="api_error", message=response.text)
654
+ error=AnthropicError(
655
+ type="api_error",
656
+ message=error_message
657
+ or f"Upstream API error ({response.status_code})",
658
+ )
508
659
  )
509
660
  return JSONResponse(
510
661
  status_code=response.status_code,
@@ -512,9 +663,8 @@ async def _handle_with_server_tools(
512
663
  )
513
664
 
514
665
  completion_data = response.json()
515
- # Log raw OpenAI response for server tools
516
- logger.info(
517
- f"[OpenAI Response (Server Tools)] {json.dumps(completion_data, ensure_ascii=False, indent=2)[:2000]}"
666
+ logger.debug(
667
+ f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}..."
518
668
  )
519
669
  from openai.types.chat import ChatCompletion
520
670
 
@@ -531,13 +681,9 @@ async def _handle_with_server_tools(
531
681
 
532
682
  if tool_calls:
533
683
  for tc in tool_calls:
534
- func_name = tc.function.name if tc.function else ""
535
- func_args = tc.function.arguments if tc.function else "{}"
684
+ func = getattr(tc, "function", None)
685
+ func_name = func.name if func else ""
536
686
  logger.info(f" Tool call: {func_name}")
537
- logger.info(f" Tool ID: {tc.id}")
538
- logger.info(
539
- f" Arguments: {func_args[:200]}"
540
- ) # Log first 200 chars
541
687
 
542
688
  # Generate Anthropic-style ID for server tools
543
689
  is_server = handler.is_server_tool_call(
@@ -548,18 +694,21 @@ async def _handle_with_server_tools(
548
694
  )
549
695
 
550
696
  # Use Anthropic-style ID for server tools, original ID otherwise
551
- tool_id = _generate_server_tool_id() if is_server else tc.id
697
+ client_tool_id = (
698
+ _generate_server_tool_id() if is_server else tc.id
699
+ )
552
700
 
553
701
  tc_dict = {
554
- "id": tool_id,
702
+ "id": client_tool_id,
703
+ "openai_id": tc.id,
555
704
  "function": {
556
705
  "name": func_name,
557
- "arguments": tc.function.arguments
558
- if tc.function
559
- else "{}",
706
+ "arguments": func.arguments if func else "{}",
560
707
  },
561
708
  }
562
- logger.info(f" Is server tool: {is_server}, ID: {tool_id}")
709
+ logger.info(
710
+ f" Is server tool: {is_server}, ID: {client_tool_id}"
711
+ )
563
712
  if is_server:
564
713
  server_tool_calls.append(tc_dict)
565
714
  else:
@@ -580,6 +729,9 @@ async def _handle_with_server_tools(
580
729
 
581
730
  if message_dict.get("usage"):
582
731
  message_dict["usage"]["server_tool_use"] = handler.usage
732
+ message_dict["usage"] = _normalize_usage(
733
+ message_dict.get("usage")
734
+ )
583
735
 
584
736
  # Log full response for debugging
585
737
  logger.info(
@@ -590,7 +742,9 @@ async def _handle_with_server_tools(
590
742
 
591
743
  return JSONResponse(content=message_dict)
592
744
 
593
- return JSONResponse(content=message.model_dump())
745
+ message_dict = message.model_dump()
746
+ message_dict["usage"] = _normalize_usage(message_dict.get("usage"))
747
+ return JSONResponse(content=message_dict)
594
748
 
595
749
  # Check max_uses limit
596
750
  if total_tool_calls >= max_uses:
@@ -615,9 +769,23 @@ async def _handle_with_server_tools(
615
769
  accumulated_content.extend(error_blocks)
616
770
 
617
771
  # Continue with modified messages
772
+ assistant_tool_calls = []
773
+ for call in server_tool_calls:
774
+ assistant_tool_calls.append(
775
+ {
776
+ "id": call.get("openai_id", call.get("id", "")),
777
+ "type": "function",
778
+ "function": {
779
+ "name": call.get("function", {}).get("name", ""),
780
+ "arguments": call.get("function", {}).get(
781
+ "arguments", "{}"
782
+ ),
783
+ },
784
+ }
785
+ )
618
786
  messages = params.get("messages", [])
619
787
  messages = _add_tool_results_to_messages(
620
- messages, server_tool_calls, handler, is_error=True
788
+ messages, assistant_tool_calls, handler, is_error=True
621
789
  )
622
790
  params["messages"] = messages
623
791
  continue
@@ -635,7 +803,7 @@ async def _handle_with_server_tools(
635
803
  # Track for assistant message
636
804
  assistant_tool_calls.append(
637
805
  {
638
- "id": call["id"],
806
+ "id": call.get("openai_id", call.get("id", "")),
639
807
  "type": "function",
640
808
  "function": {
641
809
  "name": call["function"]["name"],
@@ -657,17 +825,17 @@ async def _handle_with_server_tools(
657
825
  type="timeout_error", message="Request timed out"
658
826
  )
659
827
  )
660
- raise HTTPException(
828
+ return JSONResponse(
661
829
  status_code=HTTPStatus.GATEWAY_TIMEOUT,
662
- detail=error_response.model_dump(),
830
+ content=error_response.model_dump(),
663
831
  )
664
832
  except httpx.RequestError as e:
665
833
  error_response = AnthropicErrorResponse(
666
834
  error=AnthropicError(type="connection_error", message=str(e))
667
835
  )
668
- raise HTTPException(
836
+ return JSONResponse(
669
837
  status_code=HTTPStatus.BAD_GATEWAY,
670
- detail=error_response.model_dump(),
838
+ content=error_response.model_dump(),
671
839
  )
672
840
 
673
841
 
@@ -693,10 +861,11 @@ def _add_tool_results_to_messages(
693
861
  # Add tool results
694
862
  if is_error:
695
863
  for call in tool_calls:
864
+ tool_call_id = call.get("openai_id", call.get("id", ""))
696
865
  messages.append(
697
866
  {
698
867
  "role": "tool",
699
- "tool_call_id": call["id"],
868
+ "tool_call_id": tool_call_id,
700
869
  "content": json.dumps(
701
870
  {
702
871
  "error": "max_uses_exceeded",
@@ -743,7 +912,7 @@ async def create_message(
743
912
  type="invalid_request_error", message=f"Invalid JSON: {e}"
744
913
  )
745
914
  )
746
- return JSONResponse(status_code=422, content=error_response.model_dump())
915
+ return JSONResponse(status_code=400, content=error_response.model_dump())
747
916
  except Exception as e:
748
917
  logger.error(f"Failed to parse request body: {e}")
749
918
  error_response = AnthropicErrorResponse(
@@ -759,7 +928,7 @@ async def create_message(
759
928
  message="Request body must be a JSON object",
760
929
  )
761
930
  )
762
- return JSONResponse(status_code=422, content=error_response.model_dump())
931
+ return JSONResponse(status_code=400, content=error_response.model_dump())
763
932
 
764
933
  model_value = anthropic_params.get("model")
765
934
  if not isinstance(model_value, str) or not model_value.strip():
@@ -768,7 +937,7 @@ async def create_message(
768
937
  type="invalid_request_error", message="Model must be a non-empty string"
769
938
  )
770
939
  )
771
- return JSONResponse(status_code=422, content=error_response.model_dump())
940
+ return JSONResponse(status_code=400, content=error_response.model_dump())
772
941
 
773
942
  messages_value = anthropic_params.get("messages")
774
943
  if not isinstance(messages_value, list) or len(messages_value) == 0:
@@ -778,7 +947,7 @@ async def create_message(
778
947
  message="Messages must be a non-empty list",
779
948
  )
780
949
  )
781
- return JSONResponse(status_code=422, content=error_response.model_dump())
950
+ return JSONResponse(status_code=400, content=error_response.model_dump())
782
951
 
783
952
  max_tokens_value = anthropic_params.get("max_tokens")
784
953
  if not isinstance(max_tokens_value, int):
@@ -787,7 +956,7 @@ async def create_message(
787
956
  type="invalid_request_error", message="max_tokens is required"
788
957
  )
789
958
  )
790
- return JSONResponse(status_code=422, content=error_response.model_dump())
959
+ return JSONResponse(status_code=400, content=error_response.model_dump())
791
960
 
792
961
  # Check for server tools
793
962
  tools = anthropic_params.get("tools", [])
@@ -799,7 +968,7 @@ async def create_message(
799
968
 
800
969
  # Convert Anthropic params to OpenAI params
801
970
  openai_params_obj = convert_anthropic_to_openai(
802
- anthropic_params,
971
+ cast(MessageCreateParams, anthropic_params),
803
972
  enabled_server_tools=enabled_server_tools if has_server_tools else None,
804
973
  )
805
974
  openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
@@ -856,8 +1025,23 @@ async def create_message(
856
1025
  response = await client.post(url, headers=headers, json=openai_params)
857
1026
 
858
1027
  if response.status_code != 200:
1028
+ raw_text = response.text
1029
+ try:
1030
+ if not raw_text:
1031
+ raw_text = response.content.decode(
1032
+ "utf-8", errors="replace"
1033
+ )
1034
+ except Exception:
1035
+ raw_text = ""
1036
+ if not raw_text:
1037
+ raw_text = response.reason_phrase or ""
1038
+ error_message = (raw_text or "").strip()
859
1039
  error_response = AnthropicErrorResponse(
860
- error=AnthropicError(type="api_error", message=response.text)
1040
+ error=AnthropicError(
1041
+ type="api_error",
1042
+ message=error_message
1043
+ or f"Upstream API error ({response.status_code})",
1044
+ )
861
1045
  )
862
1046
  return JSONResponse(
863
1047
  status_code=response.status_code,
@@ -865,60 +1049,23 @@ async def create_message(
865
1049
  )
866
1050
 
867
1051
  openai_completion = response.json()
868
- # Log raw OpenAI response
869
- logger.info(
870
- f"[OpenAI Raw Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)[:2000]}"
1052
+ logger.debug(
1053
+ f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}"
871
1054
  )
872
1055
 
873
- # Log response details
874
- if openai_completion.get("choices"):
875
- choice = openai_completion["choices"][0]
876
- message = choice.get("message", {})
877
- finish_reason = choice.get("finish_reason")
878
- content_preview = (
879
- message.get("content", "")[:100]
880
- if message.get("content")
881
- else ""
882
- )
883
- tool_calls_count = (
884
- len(message.get("tool_calls", []))
885
- if message.get("tool_calls")
886
- else 0
887
- )
888
- logger.info(
889
- f"[OpenAI Response Details] finish_reason={finish_reason}, "
890
- f"content_length={len(message.get('content', ''))}, "
891
- f"tool_calls={tool_calls_count}, "
892
- f"content_preview={content_preview[:50]!r}"
893
- )
894
-
895
1056
  from openai.types.chat import ChatCompletion
896
1057
 
897
1058
  completion = ChatCompletion.model_validate(openai_completion)
898
1059
  anthropic_message = convert_openai_to_anthropic(completion, model)
899
1060
 
900
1061
  anthropic_response = anthropic_message.model_dump()
901
- # Log converted Anthropic response
902
- logger.info(
903
- f"[Anthropic Converted Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)[:2000]}"
1062
+ anthropic_response["usage"] = _normalize_usage(
1063
+ anthropic_response.get("usage")
904
1064
  )
905
-
906
- # Log Anthropic response details
907
- content_blocks = anthropic_response.get("content", [])
908
- stop_reason = anthropic_response.get("stop_reason")
909
- usage = anthropic_response.get("usage", {})
910
- logger.info(
911
- f"[Anthropic Response Details] stop_reason={stop_reason}, "
912
- f"content_blocks={len(content_blocks)}, "
913
- f"input_tokens={usage.get('input_tokens')}, "
914
- f"output_tokens={usage.get('output_tokens')}"
1065
+ logger.debug(
1066
+ f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}"
915
1067
  )
916
1068
 
917
- # Log content block types
918
- if content_blocks:
919
- block_types = [block.get("type") for block in content_blocks]
920
- logger.info(f"[Anthropic Content Blocks] types={block_types}")
921
-
922
1069
  return JSONResponse(content=anthropic_response)
923
1070
 
924
1071
  except httpx.TimeoutException:
@@ -927,17 +1074,17 @@ async def create_message(
927
1074
  type="timeout_error", message="Request timed out"
928
1075
  )
929
1076
  )
930
- raise HTTPException(
1077
+ return JSONResponse(
931
1078
  status_code=HTTPStatus.GATEWAY_TIMEOUT,
932
- detail=error_response.model_dump(),
1079
+ content=error_response.model_dump(),
933
1080
  )
934
1081
  except httpx.RequestError as e:
935
1082
  error_response = AnthropicErrorResponse(
936
1083
  error=AnthropicError(type="connection_error", message=str(e))
937
1084
  )
938
- raise HTTPException(
1085
+ return JSONResponse(
939
1086
  status_code=HTTPStatus.BAD_GATEWAY,
940
- detail=error_response.model_dump(),
1087
+ content=error_response.model_dump(),
941
1088
  )
942
1089
 
943
1090
 
@@ -991,7 +1138,7 @@ async def count_tokens(
991
1138
  type="invalid_request_error", message=f"Invalid JSON: {e}"
992
1139
  )
993
1140
  )
994
- return JSONResponse(status_code=422, content=error_response.model_dump())
1141
+ return JSONResponse(status_code=400, content=error_response.model_dump())
995
1142
  except Exception as e:
996
1143
  error_response = AnthropicErrorResponse(
997
1144
  error=AnthropicError(type="invalid_request_error", message=str(e))
@@ -1006,7 +1153,7 @@ async def count_tokens(
1006
1153
  message="Request body must be a JSON object",
1007
1154
  )
1008
1155
  )
1009
- return JSONResponse(status_code=422, content=error_response.model_dump())
1156
+ return JSONResponse(status_code=400, content=error_response.model_dump())
1010
1157
 
1011
1158
  messages = body_json.get("messages", [])
1012
1159
  if not isinstance(messages, list):
@@ -1015,7 +1162,7 @@ async def count_tokens(
1015
1162
  type="invalid_request_error", message="messages must be a list"
1016
1163
  )
1017
1164
  )
1018
- return JSONResponse(status_code=422, content=error_response.model_dump())
1165
+ return JSONResponse(status_code=400, content=error_response.model_dump())
1019
1166
 
1020
1167
  model = body_json.get("model", "")
1021
1168
  system = body_json.get("system")
@@ -1023,7 +1170,7 @@ async def count_tokens(
1023
1170
 
1024
1171
  try:
1025
1172
  # Use tiktoken for token counting
1026
- import tiktoken
1173
+ import tiktoken # type: ignore[import-not-found]
1027
1174
 
1028
1175
  # Map model names to tiktoken encoding
1029
1176
  # Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation