local-openai2anthropic 0.1.0__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
5
5
 
6
6
  import json
7
7
  import logging
8
- import secrets
9
- import string
10
8
  from http import HTTPStatus
11
- from typing import Any, AsyncGenerator
9
+ from typing import Any, cast
12
10
 
13
11
  import httpx
14
12
  from fastapi import APIRouter, Depends, HTTPException, Request
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
26
24
  MessageCreateParams,
27
25
  )
28
26
  from local_openai2anthropic.server_tools import ServerToolRegistry
27
+ from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
28
+ from local_openai2anthropic.tools import (
29
+ ServerToolHandler,
30
+ _add_tool_results_to_messages,
31
+ _handle_with_server_tools,
32
+ )
33
+ from local_openai2anthropic.utils import (
34
+ _chunk_text,
35
+ _count_tokens,
36
+ _estimate_input_tokens,
37
+ _generate_server_tool_id,
38
+ _normalize_usage,
39
+ )
29
40
 
30
41
  logger = logging.getLogger(__name__)
31
42
  router = APIRouter()
32
43
 
44
+ # Backward compatibility: re-export functions used by tests
45
+ __all__ = [
46
+ "router",
47
+ "get_request_settings",
48
+ "create_message",
49
+ "list_models",
50
+ "count_tokens",
51
+ "health_check",
52
+ # Backward compatibility exports
53
+ "_stream_response",
54
+ "_convert_result_to_stream",
55
+ "ServerToolHandler",
56
+ "_handle_with_server_tools",
57
+ "_add_tool_results_to_messages",
58
+ "_generate_server_tool_id",
59
+ "_normalize_usage",
60
+ "_count_tokens",
61
+ "_chunk_text",
62
+ "_estimate_input_tokens",
63
+ ]
64
+
33
65
 
34
66
  def get_request_settings(request: Request) -> Settings:
35
67
  """Resolve Settings from the running app when available.
@@ -43,504 +75,6 @@ def get_request_settings(request: Request) -> Settings:
43
75
  return get_settings()
44
76
 
45
77
 
46
- def _generate_server_tool_id() -> str:
47
- """Generate Anthropic-style server tool use ID (srvtoolu_...)."""
48
- # Generate 24 random alphanumeric characters
49
- chars = string.ascii_lowercase + string.digits
50
- random_part = ''.join(secrets.choice(chars) for _ in range(24))
51
- return f"srvtoolu_{random_part}"
52
-
53
-
54
- async def _stream_response(
55
- client: httpx.AsyncClient,
56
- url: str,
57
- headers: dict,
58
- json_data: dict,
59
- model: str,
60
- ) -> AsyncGenerator[str, None]:
61
- """
62
- Stream response from OpenAI and convert to Anthropic format.
63
- """
64
- try:
65
- async with client.stream("POST", url, headers=headers, json=json_data) as response:
66
- if response.status_code != 200:
67
- error_body = await response.aread()
68
- try:
69
- error_json = json.loads(error_body.decode())
70
- error_msg = error_json.get("error", {}).get("message", error_body.decode())
71
- except json.JSONDecodeError:
72
- error_msg = error_body.decode()
73
-
74
- error_event = AnthropicErrorResponse(
75
- error=AnthropicError(type="api_error", message=error_msg)
76
- )
77
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
78
- yield "data: [DONE]\n\n"
79
- return
80
-
81
- # Process SSE stream
82
- first_chunk = True
83
- content_block_started = False
84
- content_block_index = 0
85
- finish_reason = None
86
- input_tokens = 0
87
- output_tokens = 0
88
- message_id = None
89
-
90
- async for line in response.aiter_lines():
91
- if not line.startswith("data: "):
92
- continue
93
-
94
- data = line[6:]
95
- if data == "[DONE]":
96
- break
97
-
98
- try:
99
- chunk = json.loads(data)
100
- except json.JSONDecodeError:
101
- continue
102
-
103
- # First chunk: message_start
104
- if first_chunk:
105
- message_id = chunk.get("id", "")
106
- usage = chunk.get("usage", {})
107
- input_tokens = usage.get("prompt_tokens", 0)
108
-
109
- start_event = {
110
- "type": "message_start",
111
- "message": {
112
- "id": message_id,
113
- "type": "message",
114
- "role": "assistant",
115
- "content": [],
116
- "model": model,
117
- "stop_reason": None,
118
- "stop_sequence": None,
119
- "usage": {
120
- "input_tokens": input_tokens,
121
- "output_tokens": 0,
122
- "cache_creation_input_tokens": None,
123
- "cache_read_input_tokens": None,
124
- },
125
- },
126
- }
127
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
128
- first_chunk = False
129
- continue
130
-
131
- # Handle usage-only chunks
132
- if not chunk.get("choices"):
133
- usage = chunk.get("usage", {})
134
- if usage:
135
- if content_block_started:
136
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
137
- content_block_started = False
138
-
139
- stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
140
- yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}})}\n\n"
141
- continue
142
-
143
- choice = chunk["choices"][0]
144
- delta = choice.get("delta", {})
145
-
146
- # Track finish reason
147
- if choice.get("finish_reason"):
148
- finish_reason = choice["finish_reason"]
149
- continue
150
-
151
- # Handle content
152
- if delta.get("content"):
153
- if not content_block_started:
154
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
155
- content_block_started = True
156
-
157
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}})}\n\n"
158
-
159
- # Handle tool calls
160
- if delta.get("tool_calls"):
161
- tool_call = delta["tool_calls"][0]
162
-
163
- if tool_call.get("id"):
164
- if content_block_started:
165
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
166
- content_block_started = False
167
- content_block_index += 1
168
-
169
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': tool_call.get('function', {}).get('name', ''), 'input': {}}})}\n\n"
170
- content_block_started = True
171
-
172
- elif tool_call.get("function", {}).get("arguments"):
173
- args = tool_call["function"]["arguments"]
174
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
175
-
176
- # Close final content block
177
- if content_block_started:
178
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
179
-
180
- # Message stop
181
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
182
- yield "data: [DONE]\n\n"
183
-
184
- except Exception as e:
185
- error_event = AnthropicErrorResponse(
186
- error=AnthropicError(type="internal_error", message=str(e))
187
- )
188
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
189
- yield "data: [DONE]\n\n"
190
-
191
-
192
- async def _convert_result_to_stream(
193
- result: JSONResponse,
194
- model: str,
195
- ) -> AsyncGenerator[str, None]:
196
- """Convert a JSONResponse to streaming SSE format."""
197
- import time
198
-
199
- body = json.loads(result.body)
200
- message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
201
- content = body.get("content", [])
202
- usage = body.get("usage", {})
203
- stop_reason = body.get("stop_reason", "end_turn")
204
-
205
- # Map stop_reason
206
- stop_reason_map = {"end_turn": "stop", "max_tokens": "length", "tool_use": "tool_calls"}
207
- openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
208
-
209
- # 1. message_start event
210
- start_event = {
211
- "type": "message_start",
212
- "message": {
213
- "id": message_id,
214
- "type": "message",
215
- "role": "assistant",
216
- "content": [],
217
- "model": model,
218
- "stop_reason": None,
219
- "stop_sequence": None,
220
- "usage": {
221
- "input_tokens": usage.get("input_tokens", 0),
222
- "output_tokens": 0,
223
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
224
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
225
- },
226
- },
227
- }
228
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
229
-
230
- # 2. Process content blocks
231
- for i, block in enumerate(content):
232
- block_type = block.get("type")
233
-
234
- if block_type == "text":
235
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
236
- text = block.get("text", "")
237
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
238
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
239
-
240
- elif block_type == "tool_use":
241
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
242
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
243
-
244
- elif block_type == "server_tool_use":
245
- # Preserve official Anthropic block type so clients can count server tool uses.
246
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
247
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
248
-
249
- elif block_type == "web_search_tool_result":
250
- # Stream the tool result as its own content block.
251
- # Some clients expect `results`, others expect `content`; include both when possible.
252
- tool_result_block = dict(block)
253
- if "content" not in tool_result_block and "results" in tool_result_block:
254
- tool_result_block["content"] = tool_result_block["results"]
255
-
256
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
257
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
258
-
259
- # 3. message_delta with final usage
260
- delta_event = {
261
- "type": "message_delta",
262
- "delta": {"stop_reason": stop_reason},
263
- "usage": {
264
- "input_tokens": usage.get("input_tokens", 0),
265
- "output_tokens": usage.get("output_tokens", 0),
266
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
267
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
268
- "server_tool_use": usage.get("server_tool_use"),
269
- },
270
- }
271
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
272
-
273
- # 4. message_stop
274
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
275
- yield "data: [DONE]\n\n"
276
-
277
-
278
- class ServerToolHandler:
279
- """Handles server tool execution for non-streaming requests."""
280
-
281
- def __init__(
282
- self,
283
- server_tools: list[type],
284
- configs: dict[str, dict[str, Any]],
285
- settings: Settings,
286
- ):
287
- self.server_tools = {t.tool_name: t for t in server_tools}
288
- self.configs = configs
289
- self.settings = settings
290
- self.usage: dict[str, int] = {}
291
-
292
- def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
293
- """Check if a tool call is for a server tool."""
294
- func_name = tool_call.get("function", {}).get("name")
295
- return func_name in self.server_tools
296
-
297
- async def execute_tool(
298
- self,
299
- tool_call: dict[str, Any],
300
- ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
301
- """
302
- Execute a server tool and return content blocks + tool result message.
303
- Returns: (content_blocks, tool_result_message)
304
- """
305
- func_name = tool_call.get("function", {}).get("name")
306
- call_id = tool_call.get("id", "")
307
-
308
- tool_class = self.server_tools[func_name]
309
- config = self.configs.get(tool_class.tool_type, {})
310
-
311
- # Extract call arguments
312
- args = tool_class.extract_call_args(tool_call)
313
- if args is None:
314
- args = {}
315
-
316
- # Execute the tool
317
- result = await tool_class.execute(call_id, args, config, self.settings)
318
-
319
- # Update usage
320
- for key, value in result.usage_increment.items():
321
- self.usage[key] = self.usage.get(key, 0) + value
322
-
323
- # Build content blocks
324
- content_blocks = tool_class.build_content_blocks(call_id, args, result)
325
-
326
- # Build tool result message for OpenAI
327
- tool_result_msg = tool_class.build_tool_result_message(call_id, args, result)
328
-
329
- return content_blocks, tool_result_msg
330
-
331
-
332
- async def _handle_with_server_tools(
333
- openai_params: dict[str, Any],
334
- url: str,
335
- headers: dict[str, str],
336
- settings: Settings,
337
- server_tools: list[type],
338
- model: str,
339
- ) -> JSONResponse:
340
- """Handle request with server tool execution loop."""
341
- params = dict(openai_params)
342
- configs = params.pop("_server_tools_config", {})
343
-
344
- handler = ServerToolHandler(server_tools, configs, settings)
345
- accumulated_content: list[dict[str, Any]] = []
346
-
347
- # Get max_uses from configs (default to settings or 5)
348
- max_uses = settings.websearch_max_uses
349
- for config in configs.values():
350
- if config.get("max_uses"):
351
- max_uses = config["max_uses"]
352
- break
353
-
354
- total_tool_calls = 0
355
-
356
- while True:
357
- async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
358
- try:
359
- # Log full request for debugging
360
- logger.info(f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}")
361
-
362
- response = await client.post(url, headers=headers, json=params)
363
-
364
- if response.status_code != 200:
365
- logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
366
- error_response = AnthropicErrorResponse(
367
- error=AnthropicError(type="api_error", message=response.text)
368
- )
369
- return JSONResponse(
370
- status_code=response.status_code,
371
- content=error_response.model_dump(),
372
- )
373
-
374
- completion_data = response.json()
375
- logger.info(f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}...")
376
- from openai.types.chat import ChatCompletion
377
- completion = ChatCompletion.model_validate(completion_data)
378
-
379
- # Check for server tool calls
380
- server_tool_calls = []
381
- other_tool_calls = []
382
-
383
- tool_calls = completion.choices[0].message.tool_calls
384
- logger.info(f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}")
385
-
386
- if tool_calls:
387
- for tc in tool_calls:
388
- func_name = tc.function.name if tc.function else ""
389
- logger.info(f" Tool call: {func_name}")
390
-
391
- # Generate Anthropic-style ID for server tools
392
- is_server = handler.is_server_tool_call({
393
- "id": tc.id,
394
- "function": {"name": func_name, "arguments": ""},
395
- })
396
-
397
- # Use Anthropic-style ID for server tools, original ID otherwise
398
- tool_id = _generate_server_tool_id() if is_server else tc.id
399
-
400
- tc_dict = {
401
- "id": tool_id,
402
- "function": {
403
- "name": func_name,
404
- "arguments": tc.function.arguments if tc.function else "{}",
405
- },
406
- }
407
- logger.info(f" Is server tool: {is_server}, ID: {tool_id}")
408
- if is_server:
409
- server_tool_calls.append(tc_dict)
410
- else:
411
- other_tool_calls.append(tc)
412
-
413
- # No server tool calls - we're done
414
- logger.info(f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}")
415
- if not server_tool_calls:
416
- message = convert_openai_to_anthropic(completion, model)
417
-
418
- if accumulated_content:
419
- message_dict = message.model_dump()
420
- message_dict["content"] = accumulated_content + message_dict.get("content", [])
421
-
422
- if message_dict.get("usage"):
423
- message_dict["usage"]["server_tool_use"] = handler.usage
424
-
425
- # Log full response for debugging
426
- logger.info(f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}")
427
- logger.info(f"Response usage: {message_dict.get('usage')}")
428
- logger.info(f"Server tool use count: {handler.usage}")
429
-
430
- return JSONResponse(content=message_dict)
431
-
432
- return JSONResponse(content=message.model_dump())
433
-
434
- # Check max_uses limit
435
- if total_tool_calls >= max_uses:
436
- logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
437
- # Return error for each call
438
- for call in server_tool_calls:
439
- func_name = call.get("function", {}).get("name", "")
440
- tool_class = handler.server_tools.get(func_name)
441
- if tool_class:
442
- from local_openai2anthropic.server_tools import ToolResult
443
- error_result = ToolResult(
444
- success=False,
445
- content=[],
446
- error_code="max_uses_exceeded",
447
- )
448
- error_blocks = tool_class.build_content_blocks(
449
- call["id"],
450
- {},
451
- error_result,
452
- )
453
- accumulated_content.extend(error_blocks)
454
-
455
- # Continue with modified messages
456
- messages = params.get("messages", [])
457
- messages = _add_tool_results_to_messages(
458
- messages, server_tool_calls, handler, is_error=True
459
- )
460
- params["messages"] = messages
461
- continue
462
-
463
- # Execute server tools
464
- messages = params.get("messages", [])
465
- assistant_tool_calls = []
466
- tool_results = []
467
-
468
- for call in server_tool_calls:
469
- total_tool_calls += 1
470
- content_blocks, tool_result = await handler.execute_tool(call)
471
- accumulated_content.extend(content_blocks)
472
-
473
- # Track for assistant message
474
- assistant_tool_calls.append({
475
- "id": call["id"],
476
- "type": "function",
477
- "function": {
478
- "name": call["function"]["name"],
479
- "arguments": call["function"]["arguments"],
480
- },
481
- })
482
- tool_results.append(tool_result)
483
-
484
- # Add to messages for next iteration
485
- messages = _add_tool_results_to_messages(
486
- messages, assistant_tool_calls, handler, tool_results=tool_results
487
- )
488
- params["messages"] = messages
489
-
490
- except httpx.TimeoutException:
491
- error_response = AnthropicErrorResponse(
492
- error=AnthropicError(type="timeout_error", message="Request timed out")
493
- )
494
- raise HTTPException(
495
- status_code=HTTPStatus.GATEWAY_TIMEOUT,
496
- detail=error_response.model_dump(),
497
- )
498
- except httpx.RequestError as e:
499
- error_response = AnthropicErrorResponse(
500
- error=AnthropicError(type="connection_error", message=str(e))
501
- )
502
- raise HTTPException(
503
- status_code=HTTPStatus.BAD_GATEWAY,
504
- detail=error_response.model_dump(),
505
- )
506
-
507
-
508
- def _add_tool_results_to_messages(
509
- messages: list[dict[str, Any]],
510
- tool_calls: list[dict[str, Any]],
511
- handler: ServerToolHandler,
512
- tool_results: list[dict[str, Any]] | None = None,
513
- is_error: bool = False,
514
- ) -> list[dict[str, Any]]:
515
- """Add assistant tool call and results to messages."""
516
- messages = list(messages)
517
-
518
- # Add assistant message with tool calls
519
- # SGLang requires content to be a string, not None
520
- assistant_msg: dict[str, Any] = {
521
- "role": "assistant",
522
- "content": "", # Empty string instead of None for SGLang compatibility
523
- "tool_calls": tool_calls,
524
- }
525
- messages.append(assistant_msg)
526
-
527
- # Add tool results
528
- if is_error:
529
- for call in tool_calls:
530
- messages.append({
531
- "role": "tool",
532
- "tool_call_id": call["id"],
533
- "content": json.dumps({
534
- "error": "max_uses_exceeded",
535
- "message": "Maximum tool uses exceeded.",
536
- }),
537
- })
538
- elif tool_results:
539
- messages.extend(tool_results)
540
-
541
- return messages
542
-
543
-
544
78
  @router.post(
545
79
  "/v1/messages",
546
80
  response_model=Message,
@@ -562,13 +96,18 @@ async def create_message(
562
96
  try:
563
97
  body_bytes = await request.body()
564
98
  body_json = json.loads(body_bytes.decode("utf-8"))
99
+ logger.debug(
100
+ f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
101
+ )
565
102
  anthropic_params = body_json
566
103
  except json.JSONDecodeError as e:
567
104
  logger.error(f"Invalid JSON in request body: {e}")
568
105
  error_response = AnthropicErrorResponse(
569
- error=AnthropicError(type="invalid_request_error", message=f"Invalid JSON: {e}")
106
+ error=AnthropicError(
107
+ type="invalid_request_error", message=f"Invalid JSON: {e}"
108
+ )
570
109
  )
571
- return JSONResponse(status_code=422, content=error_response.model_dump())
110
+ return JSONResponse(status_code=400, content=error_response.model_dump())
572
111
  except Exception as e:
573
112
  logger.error(f"Failed to parse request body: {e}")
574
113
  error_response = AnthropicErrorResponse(
@@ -579,30 +118,40 @@ async def create_message(
579
118
  # Validate request shape early (avoid making upstream calls for obviously invalid requests)
580
119
  if not isinstance(anthropic_params, dict):
581
120
  error_response = AnthropicErrorResponse(
582
- error=AnthropicError(type="invalid_request_error", message="Request body must be a JSON object")
121
+ error=AnthropicError(
122
+ type="invalid_request_error",
123
+ message="Request body must be a JSON object",
124
+ )
583
125
  )
584
- return JSONResponse(status_code=422, content=error_response.model_dump())
126
+ return JSONResponse(status_code=400, content=error_response.model_dump())
585
127
 
586
128
  model_value = anthropic_params.get("model")
587
129
  if not isinstance(model_value, str) or not model_value.strip():
588
130
  error_response = AnthropicErrorResponse(
589
- error=AnthropicError(type="invalid_request_error", message="Model must be a non-empty string")
131
+ error=AnthropicError(
132
+ type="invalid_request_error", message="Model must be a non-empty string"
133
+ )
590
134
  )
591
- return JSONResponse(status_code=422, content=error_response.model_dump())
135
+ return JSONResponse(status_code=400, content=error_response.model_dump())
592
136
 
593
137
  messages_value = anthropic_params.get("messages")
594
138
  if not isinstance(messages_value, list) or len(messages_value) == 0:
595
139
  error_response = AnthropicErrorResponse(
596
- error=AnthropicError(type="invalid_request_error", message="Messages must be a non-empty list")
140
+ error=AnthropicError(
141
+ type="invalid_request_error",
142
+ message="Messages must be a non-empty list",
143
+ )
597
144
  )
598
- return JSONResponse(status_code=422, content=error_response.model_dump())
145
+ return JSONResponse(status_code=400, content=error_response.model_dump())
599
146
 
600
147
  max_tokens_value = anthropic_params.get("max_tokens")
601
148
  if not isinstance(max_tokens_value, int):
602
149
  error_response = AnthropicErrorResponse(
603
- error=AnthropicError(type="invalid_request_error", message="max_tokens is required")
150
+ error=AnthropicError(
151
+ type="invalid_request_error", message="max_tokens is required"
152
+ )
604
153
  )
605
- return JSONResponse(status_code=422, content=error_response.model_dump())
154
+ return JSONResponse(status_code=400, content=error_response.model_dump())
606
155
 
607
156
  # Check for server tools
608
157
  tools = anthropic_params.get("tools", [])
@@ -614,11 +163,17 @@ async def create_message(
614
163
 
615
164
  # Convert Anthropic params to OpenAI params
616
165
  openai_params_obj = convert_anthropic_to_openai(
617
- anthropic_params,
166
+ cast(MessageCreateParams, anthropic_params),
618
167
  enabled_server_tools=enabled_server_tools if has_server_tools else None,
619
168
  )
620
169
  openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
621
170
 
171
+ # Log converted OpenAI request (remove internal fields)
172
+ log_params = {k: v for k, v in openai_params.items() if not k.startswith("_")}
173
+ logger.debug(
174
+ f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}"
175
+ )
176
+
622
177
  stream = openai_params.get("stream", False)
623
178
  model = openai_params.get("model", "")
624
179
 
@@ -644,7 +199,7 @@ async def create_message(
644
199
  result = await _handle_with_server_tools(
645
200
  openai_params, url, headers, settings, tool_classes, model
646
201
  )
647
-
202
+
648
203
  # If original request was streaming, convert result to streaming format
649
204
  if stream:
650
205
  return StreamingResponse(
@@ -665,8 +220,23 @@ async def create_message(
665
220
  response = await client.post(url, headers=headers, json=openai_params)
666
221
 
667
222
  if response.status_code != 200:
223
+ raw_text = response.text
224
+ try:
225
+ if not raw_text:
226
+ raw_text = response.content.decode(
227
+ "utf-8", errors="replace"
228
+ )
229
+ except Exception:
230
+ raw_text = ""
231
+ if not raw_text:
232
+ raw_text = response.reason_phrase or ""
233
+ error_message = (raw_text or "").strip()
668
234
  error_response = AnthropicErrorResponse(
669
- error=AnthropicError(type="api_error", message=response.text)
235
+ error=AnthropicError(
236
+ type="api_error",
237
+ message=error_message
238
+ or f"Upstream API error ({response.status_code})",
239
+ )
670
240
  )
671
241
  return JSONResponse(
672
242
  status_code=response.status_code,
@@ -674,27 +244,42 @@ async def create_message(
674
244
  )
675
245
 
676
246
  openai_completion = response.json()
247
+ logger.debug(
248
+ f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}"
249
+ )
250
+
677
251
  from openai.types.chat import ChatCompletion
252
+
678
253
  completion = ChatCompletion.model_validate(openai_completion)
679
254
  anthropic_message = convert_openai_to_anthropic(completion, model)
680
255
 
681
- return JSONResponse(content=anthropic_message.model_dump())
256
+ anthropic_response = anthropic_message.model_dump()
257
+ anthropic_response["usage"] = _normalize_usage(
258
+ anthropic_response.get("usage")
259
+ )
260
+ logger.debug(
261
+ f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}"
262
+ )
263
+
264
+ return JSONResponse(content=anthropic_response)
682
265
 
683
266
  except httpx.TimeoutException:
684
267
  error_response = AnthropicErrorResponse(
685
- error=AnthropicError(type="timeout_error", message="Request timed out")
268
+ error=AnthropicError(
269
+ type="timeout_error", message="Request timed out"
270
+ )
686
271
  )
687
- raise HTTPException(
272
+ return JSONResponse(
688
273
  status_code=HTTPStatus.GATEWAY_TIMEOUT,
689
- detail=error_response.model_dump(),
274
+ content=error_response.model_dump(),
690
275
  )
691
276
  except httpx.RequestError as e:
692
277
  error_response = AnthropicErrorResponse(
693
278
  error=AnthropicError(type="connection_error", message=str(e))
694
279
  )
695
- raise HTTPException(
280
+ return JSONResponse(
696
281
  status_code=HTTPStatus.BAD_GATEWAY,
697
- detail=error_response.model_dump(),
282
+ content=error_response.model_dump(),
698
283
  )
699
284
 
700
285
 
@@ -727,6 +312,112 @@ async def list_models(
727
312
  )
728
313
 
729
314
 
315
+ @router.post("/v1/messages/count_tokens")
316
+ async def count_tokens(
317
+ request: Request,
318
+ settings: Settings = Depends(get_request_settings),
319
+ ) -> JSONResponse:
320
+ """
321
+ Count tokens in messages without creating a message.
322
+ Uses tiktoken for local token counting.
323
+ """
324
+ try:
325
+ body_bytes = await request.body()
326
+ body_json = json.loads(body_bytes.decode("utf-8"))
327
+ logger.debug(
328
+ f"[Count Tokens Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
329
+ )
330
+ except json.JSONDecodeError as e:
331
+ error_response = AnthropicErrorResponse(
332
+ error=AnthropicError(
333
+ type="invalid_request_error", message=f"Invalid JSON: {e}"
334
+ )
335
+ )
336
+ return JSONResponse(status_code=400, content=error_response.model_dump())
337
+ except Exception as e:
338
+ error_response = AnthropicErrorResponse(
339
+ error=AnthropicError(type="invalid_request_error", message=str(e))
340
+ )
341
+ return JSONResponse(status_code=400, content=error_response.model_dump())
342
+
343
+ # Validate required fields
344
+ if not isinstance(body_json, dict):
345
+ error_response = AnthropicErrorResponse(
346
+ error=AnthropicError(
347
+ type="invalid_request_error",
348
+ message="Request body must be a JSON object",
349
+ )
350
+ )
351
+ return JSONResponse(status_code=400, content=error_response.model_dump())
352
+
353
+ messages = body_json.get("messages", [])
354
+ if not isinstance(messages, list):
355
+ error_response = AnthropicErrorResponse(
356
+ error=AnthropicError(
357
+ type="invalid_request_error", message="messages must be a list"
358
+ )
359
+ )
360
+ return JSONResponse(status_code=400, content=error_response.model_dump())
361
+
362
+ model = body_json.get("model", "")
363
+ system = body_json.get("system")
364
+ tools = body_json.get("tools", [])
365
+
366
+ try:
367
+ # Use tiktoken for token counting
368
+ import tiktoken # type: ignore[import-not-found]
369
+
370
+ # Map model names to tiktoken encoding
371
+ # Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation
372
+ encoding = tiktoken.get_encoding("cl100k_base")
373
+
374
+ total_tokens = 0
375
+
376
+ # Count system prompt tokens if present
377
+ if system:
378
+ if isinstance(system, str):
379
+ total_tokens += len(encoding.encode(system))
380
+ elif isinstance(system, list):
381
+ for block in system:
382
+ if isinstance(block, dict) and block.get("type") == "text":
383
+ total_tokens += len(encoding.encode(block.get("text", "")))
384
+
385
+ # Count message tokens
386
+ for msg in messages:
387
+ content = msg.get("content", "")
388
+ if isinstance(content, str):
389
+ total_tokens += len(encoding.encode(content))
390
+ elif isinstance(content, list):
391
+ for block in content:
392
+ if isinstance(block, dict):
393
+ if block.get("type") == "text":
394
+ total_tokens += len(encoding.encode(block.get("text", "")))
395
+ elif block.get("type") == "image":
396
+ # Images are typically counted as a fixed number of tokens
397
+ # This is an approximation
398
+ total_tokens += 85 # Standard approximation for images
399
+
400
+ # Count tool definitions tokens
401
+ if tools:
402
+ for tool in tools:
403
+ tool_def = tool if isinstance(tool, dict) else tool.model_dump()
404
+ # Rough approximation for tool definitions
405
+ total_tokens += len(encoding.encode(json.dumps(tool_def)))
406
+
407
+ logger.debug(f"[Count Tokens Response] input_tokens: {total_tokens}")
408
+
409
+ return JSONResponse(content={"input_tokens": total_tokens})
410
+
411
+ except Exception as e:
412
+ logger.error(f"Token counting error: {e}")
413
+ error_response = AnthropicErrorResponse(
414
+ error=AnthropicError(
415
+ type="internal_error", message=f"Failed to count tokens: {str(e)}"
416
+ )
417
+ )
418
+ return JSONResponse(status_code=500, content=error_response.model_dump())
419
+
420
+
730
421
  @router.get("/health")
731
422
  async def health_check() -> dict[str, str]:
732
423
  """Health check endpoint."""