local-openai2anthropic 0.2.3__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
5
5
 
6
6
  import json
7
7
  import logging
8
- import secrets
9
- import string
10
8
  from http import HTTPStatus
11
- from typing import Any, AsyncGenerator
9
+ from typing import Any, cast
12
10
 
13
11
  import httpx
14
12
  from fastapi import APIRouter, Depends, HTTPException, Request
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
26
24
  MessageCreateParams,
27
25
  )
28
26
  from local_openai2anthropic.server_tools import ServerToolRegistry
27
+ from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
28
+ from local_openai2anthropic.tools import (
29
+ ServerToolHandler,
30
+ _add_tool_results_to_messages,
31
+ _handle_with_server_tools,
32
+ )
33
+ from local_openai2anthropic.utils import (
34
+ _chunk_text,
35
+ _count_tokens,
36
+ _estimate_input_tokens,
37
+ _generate_server_tool_id,
38
+ _normalize_usage,
39
+ )
29
40
 
30
41
  logger = logging.getLogger(__name__)
31
42
  router = APIRouter()
32
43
 
44
+ # Backward compatibility: re-export functions used by tests
45
+ __all__ = [
46
+ "router",
47
+ "get_request_settings",
48
+ "create_message",
49
+ "list_models",
50
+ "count_tokens",
51
+ "health_check",
52
+ # Backward compatibility exports
53
+ "_stream_response",
54
+ "_convert_result_to_stream",
55
+ "ServerToolHandler",
56
+ "_handle_with_server_tools",
57
+ "_add_tool_results_to_messages",
58
+ "_generate_server_tool_id",
59
+ "_normalize_usage",
60
+ "_count_tokens",
61
+ "_chunk_text",
62
+ "_estimate_input_tokens",
63
+ ]
64
+
33
65
 
34
66
  def get_request_settings(request: Request) -> Settings:
35
67
  """Resolve Settings from the running app when available.
@@ -43,553 +75,6 @@ def get_request_settings(request: Request) -> Settings:
43
75
  return get_settings()
44
76
 
45
77
 
46
- def _generate_server_tool_id() -> str:
47
- """Generate Anthropic-style server tool use ID (srvtoolu_...)."""
48
- # Generate 24 random alphanumeric characters
49
- chars = string.ascii_lowercase + string.digits
50
- random_part = ''.join(secrets.choice(chars) for _ in range(24))
51
- return f"srvtoolu_{random_part}"
52
-
53
-
54
- async def _stream_response(
55
- client: httpx.AsyncClient,
56
- url: str,
57
- headers: dict,
58
- json_data: dict,
59
- model: str,
60
- ) -> AsyncGenerator[str, None]:
61
- """
62
- Stream response from OpenAI and convert to Anthropic format.
63
- """
64
- try:
65
- async with client.stream("POST", url, headers=headers, json=json_data) as response:
66
- if response.status_code != 200:
67
- error_body = await response.aread()
68
- try:
69
- error_json = json.loads(error_body.decode())
70
- error_msg = error_json.get("error", {}).get("message", error_body.decode())
71
- except json.JSONDecodeError:
72
- error_msg = error_body.decode()
73
-
74
- error_event = AnthropicErrorResponse(
75
- error=AnthropicError(type="api_error", message=error_msg)
76
- )
77
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
78
- yield "data: [DONE]\n\n"
79
- return
80
-
81
- # Process SSE stream
82
- first_chunk = True
83
- content_block_started = False
84
- content_block_index = 0
85
- current_block_type = None # 'thinking', 'text', or 'tool_use'
86
- finish_reason = None
87
- input_tokens = 0
88
- output_tokens = 0
89
- message_id = None
90
-
91
- async for line in response.aiter_lines():
92
- if not line.startswith("data: "):
93
- continue
94
-
95
- data = line[6:]
96
- if data == "[DONE]":
97
- break
98
-
99
- try:
100
- chunk = json.loads(data)
101
- logger.debug(f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}")
102
- except json.JSONDecodeError:
103
- continue
104
-
105
- # First chunk: message_start
106
- if first_chunk:
107
- message_id = chunk.get("id", "")
108
- usage = chunk.get("usage") or {}
109
- input_tokens = usage.get("prompt_tokens", 0)
110
-
111
- start_event = {
112
- "type": "message_start",
113
- "message": {
114
- "id": message_id,
115
- "type": "message",
116
- "role": "assistant",
117
- "content": [],
118
- "model": model,
119
- "stop_reason": None,
120
- "stop_sequence": None,
121
- "usage": {
122
- "input_tokens": input_tokens,
123
- "output_tokens": 0,
124
- "cache_creation_input_tokens": None,
125
- "cache_read_input_tokens": None,
126
- },
127
- },
128
- }
129
- logger.debug(f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}")
130
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
131
- first_chunk = False
132
- continue
133
-
134
- # Handle usage-only chunks
135
- if not chunk.get("choices"):
136
- usage = chunk.get("usage") or {}
137
- if usage:
138
- if content_block_started:
139
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
140
- content_block_started = False
141
-
142
- stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
143
- delta_event = {'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}}
144
- logger.debug(f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}")
145
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
146
- continue
147
-
148
- choice = chunk["choices"][0]
149
- delta = choice.get("delta", {})
150
-
151
- # Track finish reason (but don't skip - content may also be present)
152
- if choice.get("finish_reason"):
153
- finish_reason = choice["finish_reason"]
154
-
155
- # Handle reasoning content (thinking)
156
- if delta.get("reasoning_content"):
157
- reasoning = delta["reasoning_content"]
158
- # Start thinking content block if not already started
159
- if not content_block_started or current_block_type != 'thinking':
160
- # Close previous block if exists
161
- if content_block_started:
162
- stop_block = {'type': 'content_block_stop', 'index': content_block_index}
163
- logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
164
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
165
- content_block_index += 1
166
- start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'thinking', 'thinking': ''}}
167
- logger.debug(f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}")
168
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
169
- content_block_started = True
170
- current_block_type = 'thinking'
171
-
172
- delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'thinking_delta', 'thinking': reasoning}}
173
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
174
- continue
175
-
176
- # Handle content
177
- if delta.get("content"):
178
- if not content_block_started or current_block_type != 'text':
179
- # Close previous block if exists
180
- if content_block_started:
181
- stop_block = {'type': 'content_block_stop', 'index': content_block_index}
182
- logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
183
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
184
- content_block_index += 1
185
- start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}}
186
- logger.debug(f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}")
187
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
188
- content_block_started = True
189
- current_block_type = 'text'
190
-
191
- delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}}
192
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
193
-
194
- # Handle tool calls
195
- if delta.get("tool_calls"):
196
- tool_call = delta["tool_calls"][0]
197
-
198
- if tool_call.get("id"):
199
- if content_block_started:
200
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
201
- content_block_started = False
202
- content_block_index += 1
203
-
204
- func = tool_call.get('function') or {}
205
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
206
- content_block_started = True
207
- current_block_type = 'tool_use'
208
-
209
- elif (tool_call.get('function') or {}).get("arguments"):
210
- args = (tool_call.get('function') or {}).get("arguments", "")
211
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
212
-
213
- # Close final content block
214
- if content_block_started:
215
- stop_block = {'type': 'content_block_stop', 'index': content_block_index}
216
- logger.debug(f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}")
217
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
218
-
219
- # Message stop
220
- stop_event = {'type': 'message_stop'}
221
- logger.debug(f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}")
222
- yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
223
-
224
- except Exception as e:
225
- import traceback
226
- error_msg = f"{str(e)}\n{traceback.format_exc()}"
227
- logger.error(f"Stream error: {error_msg}")
228
- error_event = AnthropicErrorResponse(
229
- error=AnthropicError(type="internal_error", message=str(e))
230
- )
231
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
232
-
233
-
234
- async def _convert_result_to_stream(
235
- result: JSONResponse,
236
- model: str,
237
- ) -> AsyncGenerator[str, None]:
238
- """Convert a JSONResponse to streaming SSE format."""
239
- import time
240
-
241
- body = json.loads(result.body)
242
- message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
243
- content = body.get("content", [])
244
- usage = body.get("usage", {})
245
- stop_reason = body.get("stop_reason", "end_turn")
246
-
247
- # Map stop_reason
248
- stop_reason_map = {"end_turn": "stop", "max_tokens": "length", "tool_use": "tool_calls"}
249
- openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
250
-
251
- # 1. message_start event
252
- start_event = {
253
- "type": "message_start",
254
- "message": {
255
- "id": message_id,
256
- "type": "message",
257
- "role": "assistant",
258
- "content": [],
259
- "model": model,
260
- "stop_reason": None,
261
- "stop_sequence": None,
262
- "usage": {
263
- "input_tokens": usage.get("input_tokens", 0),
264
- "output_tokens": 0,
265
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
266
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
267
- },
268
- },
269
- }
270
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
271
-
272
- # 2. Process content blocks
273
- for i, block in enumerate(content):
274
- block_type = block.get("type")
275
-
276
- if block_type == "text":
277
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
278
- text = block.get("text", "")
279
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
280
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
281
-
282
- elif block_type == "tool_use":
283
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
284
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
285
-
286
- elif block_type == "server_tool_use":
287
- # Preserve official Anthropic block type so clients can count server tool uses.
288
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
289
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
290
-
291
- elif block_type == "web_search_tool_result":
292
- # Stream the tool result as its own content block.
293
- # Some clients expect `results`, others expect `content`; include both when possible.
294
- tool_result_block = dict(block)
295
- if "content" not in tool_result_block and "results" in tool_result_block:
296
- tool_result_block["content"] = tool_result_block["results"]
297
-
298
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
299
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
300
-
301
- elif block_type == "thinking":
302
- # Handle thinking blocks (BetaThinkingBlock)
303
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
304
- thinking_text = block.get("thinking", "")
305
- if thinking_text:
306
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
307
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
308
-
309
- # 3. message_delta with final usage
310
- delta_event = {
311
- "type": "message_delta",
312
- "delta": {"stop_reason": stop_reason},
313
- "usage": {
314
- "input_tokens": usage.get("input_tokens", 0),
315
- "output_tokens": usage.get("output_tokens", 0),
316
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
317
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
318
- "server_tool_use": usage.get("server_tool_use"),
319
- },
320
- }
321
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
322
-
323
- # 4. message_stop
324
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
325
-
326
-
327
- class ServerToolHandler:
328
- """Handles server tool execution for non-streaming requests."""
329
-
330
- def __init__(
331
- self,
332
- server_tools: list[type],
333
- configs: dict[str, dict[str, Any]],
334
- settings: Settings,
335
- ):
336
- self.server_tools = {t.tool_name: t for t in server_tools}
337
- self.configs = configs
338
- self.settings = settings
339
- self.usage: dict[str, int] = {}
340
-
341
- def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
342
- """Check if a tool call is for a server tool."""
343
- func_name = tool_call.get("function", {}).get("name")
344
- return func_name in self.server_tools
345
-
346
- async def execute_tool(
347
- self,
348
- tool_call: dict[str, Any],
349
- ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
350
- """
351
- Execute a server tool and return content blocks + tool result message.
352
- Returns: (content_blocks, tool_result_message)
353
- """
354
- func_name = tool_call.get("function", {}).get("name")
355
- call_id = tool_call.get("id", "")
356
-
357
- tool_class = self.server_tools[func_name]
358
- config = self.configs.get(tool_class.tool_type, {})
359
-
360
- # Extract call arguments
361
- args = tool_class.extract_call_args(tool_call)
362
- if args is None:
363
- args = {}
364
-
365
- # Execute the tool
366
- result = await tool_class.execute(call_id, args, config, self.settings)
367
-
368
- # Update usage
369
- for key, value in result.usage_increment.items():
370
- self.usage[key] = self.usage.get(key, 0) + value
371
-
372
- # Build content blocks
373
- content_blocks = tool_class.build_content_blocks(call_id, args, result)
374
-
375
- # Build tool result message for OpenAI
376
- tool_result_msg = tool_class.build_tool_result_message(call_id, args, result)
377
-
378
- return content_blocks, tool_result_msg
379
-
380
-
381
- async def _handle_with_server_tools(
382
- openai_params: dict[str, Any],
383
- url: str,
384
- headers: dict[str, str],
385
- settings: Settings,
386
- server_tools: list[type],
387
- model: str,
388
- ) -> JSONResponse:
389
- """Handle request with server tool execution loop."""
390
- params = dict(openai_params)
391
- configs = params.pop("_server_tools_config", {})
392
-
393
- handler = ServerToolHandler(server_tools, configs, settings)
394
- accumulated_content: list[dict[str, Any]] = []
395
-
396
- # Get max_uses from configs (default to settings or 5)
397
- max_uses = settings.websearch_max_uses
398
- for config in configs.values():
399
- if config.get("max_uses"):
400
- max_uses = config["max_uses"]
401
- break
402
-
403
- total_tool_calls = 0
404
-
405
- while True:
406
- async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
407
- try:
408
- # Log full request for debugging
409
- logger.info(f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}")
410
-
411
- response = await client.post(url, headers=headers, json=params)
412
-
413
- if response.status_code != 200:
414
- logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
415
- error_response = AnthropicErrorResponse(
416
- error=AnthropicError(type="api_error", message=response.text)
417
- )
418
- return JSONResponse(
419
- status_code=response.status_code,
420
- content=error_response.model_dump(),
421
- )
422
-
423
- completion_data = response.json()
424
- logger.info(f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}...")
425
- from openai.types.chat import ChatCompletion
426
- completion = ChatCompletion.model_validate(completion_data)
427
-
428
- # Check for server tool calls
429
- server_tool_calls = []
430
- other_tool_calls = []
431
-
432
- tool_calls = completion.choices[0].message.tool_calls
433
- logger.info(f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}")
434
-
435
- if tool_calls:
436
- for tc in tool_calls:
437
- func_name = tc.function.name if tc.function else ""
438
- logger.info(f" Tool call: {func_name}")
439
-
440
- # Generate Anthropic-style ID for server tools
441
- is_server = handler.is_server_tool_call({
442
- "id": tc.id,
443
- "function": {"name": func_name, "arguments": ""},
444
- })
445
-
446
- # Use Anthropic-style ID for server tools, original ID otherwise
447
- tool_id = _generate_server_tool_id() if is_server else tc.id
448
-
449
- tc_dict = {
450
- "id": tool_id,
451
- "function": {
452
- "name": func_name,
453
- "arguments": tc.function.arguments if tc.function else "{}",
454
- },
455
- }
456
- logger.info(f" Is server tool: {is_server}, ID: {tool_id}")
457
- if is_server:
458
- server_tool_calls.append(tc_dict)
459
- else:
460
- other_tool_calls.append(tc)
461
-
462
- # No server tool calls - we're done
463
- logger.info(f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}")
464
- if not server_tool_calls:
465
- message = convert_openai_to_anthropic(completion, model)
466
-
467
- if accumulated_content:
468
- message_dict = message.model_dump()
469
- message_dict["content"] = accumulated_content + message_dict.get("content", [])
470
-
471
- if message_dict.get("usage"):
472
- message_dict["usage"]["server_tool_use"] = handler.usage
473
-
474
- # Log full response for debugging
475
- logger.info(f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}")
476
- logger.info(f"Response usage: {message_dict.get('usage')}")
477
- logger.info(f"Server tool use count: {handler.usage}")
478
-
479
- return JSONResponse(content=message_dict)
480
-
481
- return JSONResponse(content=message.model_dump())
482
-
483
- # Check max_uses limit
484
- if total_tool_calls >= max_uses:
485
- logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
486
- # Return error for each call
487
- for call in server_tool_calls:
488
- func_name = call.get("function", {}).get("name", "")
489
- tool_class = handler.server_tools.get(func_name)
490
- if tool_class:
491
- from local_openai2anthropic.server_tools import ToolResult
492
- error_result = ToolResult(
493
- success=False,
494
- content=[],
495
- error_code="max_uses_exceeded",
496
- )
497
- error_blocks = tool_class.build_content_blocks(
498
- call["id"],
499
- {},
500
- error_result,
501
- )
502
- accumulated_content.extend(error_blocks)
503
-
504
- # Continue with modified messages
505
- messages = params.get("messages", [])
506
- messages = _add_tool_results_to_messages(
507
- messages, server_tool_calls, handler, is_error=True
508
- )
509
- params["messages"] = messages
510
- continue
511
-
512
- # Execute server tools
513
- messages = params.get("messages", [])
514
- assistant_tool_calls = []
515
- tool_results = []
516
-
517
- for call in server_tool_calls:
518
- total_tool_calls += 1
519
- content_blocks, tool_result = await handler.execute_tool(call)
520
- accumulated_content.extend(content_blocks)
521
-
522
- # Track for assistant message
523
- assistant_tool_calls.append({
524
- "id": call["id"],
525
- "type": "function",
526
- "function": {
527
- "name": call["function"]["name"],
528
- "arguments": call["function"]["arguments"],
529
- },
530
- })
531
- tool_results.append(tool_result)
532
-
533
- # Add to messages for next iteration
534
- messages = _add_tool_results_to_messages(
535
- messages, assistant_tool_calls, handler, tool_results=tool_results
536
- )
537
- params["messages"] = messages
538
-
539
- except httpx.TimeoutException:
540
- error_response = AnthropicErrorResponse(
541
- error=AnthropicError(type="timeout_error", message="Request timed out")
542
- )
543
- raise HTTPException(
544
- status_code=HTTPStatus.GATEWAY_TIMEOUT,
545
- detail=error_response.model_dump(),
546
- )
547
- except httpx.RequestError as e:
548
- error_response = AnthropicErrorResponse(
549
- error=AnthropicError(type="connection_error", message=str(e))
550
- )
551
- raise HTTPException(
552
- status_code=HTTPStatus.BAD_GATEWAY,
553
- detail=error_response.model_dump(),
554
- )
555
-
556
-
557
- def _add_tool_results_to_messages(
558
- messages: list[dict[str, Any]],
559
- tool_calls: list[dict[str, Any]],
560
- handler: ServerToolHandler,
561
- tool_results: list[dict[str, Any]] | None = None,
562
- is_error: bool = False,
563
- ) -> list[dict[str, Any]]:
564
- """Add assistant tool call and results to messages."""
565
- messages = list(messages)
566
-
567
- # Add assistant message with tool calls
568
- # SGLang requires content to be a string, not None
569
- assistant_msg: dict[str, Any] = {
570
- "role": "assistant",
571
- "content": "", # Empty string instead of None for SGLang compatibility
572
- "tool_calls": tool_calls,
573
- }
574
- messages.append(assistant_msg)
575
-
576
- # Add tool results
577
- if is_error:
578
- for call in tool_calls:
579
- messages.append({
580
- "role": "tool",
581
- "tool_call_id": call["id"],
582
- "content": json.dumps({
583
- "error": "max_uses_exceeded",
584
- "message": "Maximum tool uses exceeded.",
585
- }),
586
- })
587
- elif tool_results:
588
- messages.extend(tool_results)
589
-
590
- return messages
591
-
592
-
593
78
  @router.post(
594
79
  "/v1/messages",
595
80
  response_model=Message,
@@ -611,14 +96,18 @@ async def create_message(
611
96
  try:
612
97
  body_bytes = await request.body()
613
98
  body_json = json.loads(body_bytes.decode("utf-8"))
614
- logger.debug(f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}")
99
+ logger.debug(
100
+ f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
101
+ )
615
102
  anthropic_params = body_json
616
103
  except json.JSONDecodeError as e:
617
104
  logger.error(f"Invalid JSON in request body: {e}")
618
105
  error_response = AnthropicErrorResponse(
619
- error=AnthropicError(type="invalid_request_error", message=f"Invalid JSON: {e}")
106
+ error=AnthropicError(
107
+ type="invalid_request_error", message=f"Invalid JSON: {e}"
108
+ )
620
109
  )
621
- return JSONResponse(status_code=422, content=error_response.model_dump())
110
+ return JSONResponse(status_code=400, content=error_response.model_dump())
622
111
  except Exception as e:
623
112
  logger.error(f"Failed to parse request body: {e}")
624
113
  error_response = AnthropicErrorResponse(
@@ -629,30 +118,40 @@ async def create_message(
629
118
  # Validate request shape early (avoid making upstream calls for obviously invalid requests)
630
119
  if not isinstance(anthropic_params, dict):
631
120
  error_response = AnthropicErrorResponse(
632
- error=AnthropicError(type="invalid_request_error", message="Request body must be a JSON object")
121
+ error=AnthropicError(
122
+ type="invalid_request_error",
123
+ message="Request body must be a JSON object",
124
+ )
633
125
  )
634
- return JSONResponse(status_code=422, content=error_response.model_dump())
126
+ return JSONResponse(status_code=400, content=error_response.model_dump())
635
127
 
636
128
  model_value = anthropic_params.get("model")
637
129
  if not isinstance(model_value, str) or not model_value.strip():
638
130
  error_response = AnthropicErrorResponse(
639
- error=AnthropicError(type="invalid_request_error", message="Model must be a non-empty string")
131
+ error=AnthropicError(
132
+ type="invalid_request_error", message="Model must be a non-empty string"
133
+ )
640
134
  )
641
- return JSONResponse(status_code=422, content=error_response.model_dump())
135
+ return JSONResponse(status_code=400, content=error_response.model_dump())
642
136
 
643
137
  messages_value = anthropic_params.get("messages")
644
138
  if not isinstance(messages_value, list) or len(messages_value) == 0:
645
139
  error_response = AnthropicErrorResponse(
646
- error=AnthropicError(type="invalid_request_error", message="Messages must be a non-empty list")
140
+ error=AnthropicError(
141
+ type="invalid_request_error",
142
+ message="Messages must be a non-empty list",
143
+ )
647
144
  )
648
- return JSONResponse(status_code=422, content=error_response.model_dump())
145
+ return JSONResponse(status_code=400, content=error_response.model_dump())
649
146
 
650
147
  max_tokens_value = anthropic_params.get("max_tokens")
651
148
  if not isinstance(max_tokens_value, int):
652
149
  error_response = AnthropicErrorResponse(
653
- error=AnthropicError(type="invalid_request_error", message="max_tokens is required")
150
+ error=AnthropicError(
151
+ type="invalid_request_error", message="max_tokens is required"
152
+ )
654
153
  )
655
- return JSONResponse(status_code=422, content=error_response.model_dump())
154
+ return JSONResponse(status_code=400, content=error_response.model_dump())
656
155
 
657
156
  # Check for server tools
658
157
  tools = anthropic_params.get("tools", [])
@@ -664,14 +163,16 @@ async def create_message(
664
163
 
665
164
  # Convert Anthropic params to OpenAI params
666
165
  openai_params_obj = convert_anthropic_to_openai(
667
- anthropic_params,
166
+ cast(MessageCreateParams, anthropic_params),
668
167
  enabled_server_tools=enabled_server_tools if has_server_tools else None,
669
168
  )
670
169
  openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
671
-
170
+
672
171
  # Log converted OpenAI request (remove internal fields)
673
- log_params = {k: v for k, v in openai_params.items() if not k.startswith('_')}
674
- logger.debug(f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}")
172
+ log_params = {k: v for k, v in openai_params.items() if not k.startswith("_")}
173
+ logger.debug(
174
+ f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}"
175
+ )
675
176
 
676
177
  stream = openai_params.get("stream", False)
677
178
  model = openai_params.get("model", "")
@@ -698,7 +199,7 @@ async def create_message(
698
199
  result = await _handle_with_server_tools(
699
200
  openai_params, url, headers, settings, tool_classes, model
700
201
  )
701
-
202
+
702
203
  # If original request was streaming, convert result to streaming format
703
204
  if stream:
704
205
  return StreamingResponse(
@@ -719,8 +220,23 @@ async def create_message(
719
220
  response = await client.post(url, headers=headers, json=openai_params)
720
221
 
721
222
  if response.status_code != 200:
223
+ raw_text = response.text
224
+ try:
225
+ if not raw_text:
226
+ raw_text = response.content.decode(
227
+ "utf-8", errors="replace"
228
+ )
229
+ except Exception:
230
+ raw_text = ""
231
+ if not raw_text:
232
+ raw_text = response.reason_phrase or ""
233
+ error_message = (raw_text or "").strip()
722
234
  error_response = AnthropicErrorResponse(
723
- error=AnthropicError(type="api_error", message=response.text)
235
+ error=AnthropicError(
236
+ type="api_error",
237
+ message=error_message
238
+ or f"Upstream API error ({response.status_code})",
239
+ )
724
240
  )
725
241
  return JSONResponse(
726
242
  status_code=response.status_code,
@@ -728,32 +244,42 @@ async def create_message(
728
244
  )
729
245
 
730
246
  openai_completion = response.json()
731
- logger.debug(f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}")
732
-
247
+ logger.debug(
248
+ f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}"
249
+ )
250
+
733
251
  from openai.types.chat import ChatCompletion
252
+
734
253
  completion = ChatCompletion.model_validate(openai_completion)
735
254
  anthropic_message = convert_openai_to_anthropic(completion, model)
736
-
255
+
737
256
  anthropic_response = anthropic_message.model_dump()
738
- logger.debug(f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}")
257
+ anthropic_response["usage"] = _normalize_usage(
258
+ anthropic_response.get("usage")
259
+ )
260
+ logger.debug(
261
+ f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}"
262
+ )
739
263
 
740
264
  return JSONResponse(content=anthropic_response)
741
265
 
742
266
  except httpx.TimeoutException:
743
267
  error_response = AnthropicErrorResponse(
744
- error=AnthropicError(type="timeout_error", message="Request timed out")
268
+ error=AnthropicError(
269
+ type="timeout_error", message="Request timed out"
270
+ )
745
271
  )
746
- raise HTTPException(
272
+ return JSONResponse(
747
273
  status_code=HTTPStatus.GATEWAY_TIMEOUT,
748
- detail=error_response.model_dump(),
274
+ content=error_response.model_dump(),
749
275
  )
750
276
  except httpx.RequestError as e:
751
277
  error_response = AnthropicErrorResponse(
752
278
  error=AnthropicError(type="connection_error", message=str(e))
753
279
  )
754
- raise HTTPException(
280
+ return JSONResponse(
755
281
  status_code=HTTPStatus.BAD_GATEWAY,
756
- detail=error_response.model_dump(),
282
+ content=error_response.model_dump(),
757
283
  )
758
284
 
759
285
 
@@ -786,6 +312,112 @@ async def list_models(
786
312
  )
787
313
 
788
314
 
315
+ @router.post("/v1/messages/count_tokens")
316
+ async def count_tokens(
317
+ request: Request,
318
+ settings: Settings = Depends(get_request_settings),
319
+ ) -> JSONResponse:
320
+ """
321
+ Count tokens in messages without creating a message.
322
+ Uses tiktoken for local token counting.
323
+ """
324
+ try:
325
+ body_bytes = await request.body()
326
+ body_json = json.loads(body_bytes.decode("utf-8"))
327
+ logger.debug(
328
+ f"[Count Tokens Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
329
+ )
330
+ except json.JSONDecodeError as e:
331
+ error_response = AnthropicErrorResponse(
332
+ error=AnthropicError(
333
+ type="invalid_request_error", message=f"Invalid JSON: {e}"
334
+ )
335
+ )
336
+ return JSONResponse(status_code=400, content=error_response.model_dump())
337
+ except Exception as e:
338
+ error_response = AnthropicErrorResponse(
339
+ error=AnthropicError(type="invalid_request_error", message=str(e))
340
+ )
341
+ return JSONResponse(status_code=400, content=error_response.model_dump())
342
+
343
+ # Validate required fields
344
+ if not isinstance(body_json, dict):
345
+ error_response = AnthropicErrorResponse(
346
+ error=AnthropicError(
347
+ type="invalid_request_error",
348
+ message="Request body must be a JSON object",
349
+ )
350
+ )
351
+ return JSONResponse(status_code=400, content=error_response.model_dump())
352
+
353
+ messages = body_json.get("messages", [])
354
+ if not isinstance(messages, list):
355
+ error_response = AnthropicErrorResponse(
356
+ error=AnthropicError(
357
+ type="invalid_request_error", message="messages must be a list"
358
+ )
359
+ )
360
+ return JSONResponse(status_code=400, content=error_response.model_dump())
361
+
362
+ model = body_json.get("model", "")
363
+ system = body_json.get("system")
364
+ tools = body_json.get("tools", [])
365
+
366
+ try:
367
+ # Use tiktoken for token counting
368
+ import tiktoken # type: ignore[import-not-found]
369
+
370
+ # Map model names to tiktoken encoding
371
+ # Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation
372
+ encoding = tiktoken.get_encoding("cl100k_base")
373
+
374
+ total_tokens = 0
375
+
376
+ # Count system prompt tokens if present
377
+ if system:
378
+ if isinstance(system, str):
379
+ total_tokens += len(encoding.encode(system))
380
+ elif isinstance(system, list):
381
+ for block in system:
382
+ if isinstance(block, dict) and block.get("type") == "text":
383
+ total_tokens += len(encoding.encode(block.get("text", "")))
384
+
385
+ # Count message tokens
386
+ for msg in messages:
387
+ content = msg.get("content", "")
388
+ if isinstance(content, str):
389
+ total_tokens += len(encoding.encode(content))
390
+ elif isinstance(content, list):
391
+ for block in content:
392
+ if isinstance(block, dict):
393
+ if block.get("type") == "text":
394
+ total_tokens += len(encoding.encode(block.get("text", "")))
395
+ elif block.get("type") == "image":
396
+ # Images are typically counted as a fixed number of tokens
397
+ # This is an approximation
398
+ total_tokens += 85 # Standard approximation for images
399
+
400
+ # Count tool definitions tokens
401
+ if tools:
402
+ for tool in tools:
403
+ tool_def = tool if isinstance(tool, dict) else tool.model_dump()
404
+ # Rough approximation for tool definitions
405
+ total_tokens += len(encoding.encode(json.dumps(tool_def)))
406
+
407
+ logger.debug(f"[Count Tokens Response] input_tokens: {total_tokens}")
408
+
409
+ return JSONResponse(content={"input_tokens": total_tokens})
410
+
411
+ except Exception as e:
412
+ logger.error(f"Token counting error: {e}")
413
+ error_response = AnthropicErrorResponse(
414
+ error=AnthropicError(
415
+ type="internal_error", message=f"Failed to count tokens: {str(e)}"
416
+ )
417
+ )
418
+ return JSONResponse(status_code=500, content=error_response.model_dump())
419
+
420
+
789
421
  @router.get("/health")
790
422
  async def health_check() -> dict[str, str]:
791
423
  """Health check endpoint."""