local-openai2anthropic 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
5
5
 
6
6
  import json
7
7
  import logging
8
- import secrets
9
- import string
10
8
  from http import HTTPStatus
11
- from typing import Any, AsyncGenerator, cast
9
+ from typing import Any, cast
12
10
 
13
11
  import httpx
14
12
  from fastapi import APIRouter, Depends, HTTPException, Request
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
26
24
  MessageCreateParams,
27
25
  )
28
26
  from local_openai2anthropic.server_tools import ServerToolRegistry
27
+ from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
28
+ from local_openai2anthropic.tools import (
29
+ ServerToolHandler,
30
+ _add_tool_results_to_messages,
31
+ _handle_with_server_tools,
32
+ )
33
+ from local_openai2anthropic.utils import (
34
+ _chunk_text,
35
+ _count_tokens,
36
+ _estimate_input_tokens,
37
+ _generate_server_tool_id,
38
+ _normalize_usage,
39
+ )
29
40
 
30
41
  logger = logging.getLogger(__name__)
31
42
  router = APIRouter()
32
43
 
44
+ # Backward compatibility: re-export functions used by tests
45
+ __all__ = [
46
+ "router",
47
+ "get_request_settings",
48
+ "create_message",
49
+ "list_models",
50
+ "count_tokens",
51
+ "health_check",
52
+ # Backward compatibility exports
53
+ "_stream_response",
54
+ "_convert_result_to_stream",
55
+ "ServerToolHandler",
56
+ "_handle_with_server_tools",
57
+ "_add_tool_results_to_messages",
58
+ "_generate_server_tool_id",
59
+ "_normalize_usage",
60
+ "_count_tokens",
61
+ "_chunk_text",
62
+ "_estimate_input_tokens",
63
+ ]
64
+
33
65
 
34
66
  def get_request_settings(request: Request) -> Settings:
35
67
  """Resolve Settings from the running app when available.
@@ -43,843 +75,6 @@ def get_request_settings(request: Request) -> Settings:
43
75
  return get_settings()
44
76
 
45
77
 
46
- def _generate_server_tool_id() -> str:
47
- """Generate Anthropic-style server tool use ID (srvtoolu_...)."""
48
- # Generate 24 random alphanumeric characters
49
- chars = string.ascii_lowercase + string.digits
50
- random_part = "".join(secrets.choice(chars) for _ in range(24))
51
- return f"srvtoolu_{random_part}"
52
-
53
-
54
- def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, Any] | None:
55
- if not isinstance(usage, dict):
56
- return usage
57
- allowed_keys = {
58
- "input_tokens",
59
- "output_tokens",
60
- "cache_creation_input_tokens",
61
- "cache_read_input_tokens",
62
- "server_tool_use",
63
- }
64
- normalized = {k: v for k, v in usage.items() if k in allowed_keys}
65
- return normalized or None
66
-
67
-
68
- def _count_tokens(text: str) -> int:
69
- try:
70
- import tiktoken # type: ignore[import-not-found]
71
- except Exception:
72
- return 0
73
-
74
- encoding = tiktoken.get_encoding("cl100k_base")
75
- return len(encoding.encode(text))
76
-
77
-
78
- def _chunk_text(text: str, chunk_size: int = 200) -> list[str]:
79
- if not text:
80
- return []
81
- return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
82
-
83
-
84
- def _estimate_input_tokens(openai_params: dict[str, Any]) -> int:
85
- try:
86
- import tiktoken # type: ignore[import-not-found]
87
- except Exception:
88
- return 0
89
-
90
- encoding = tiktoken.get_encoding("cl100k_base")
91
- total_tokens = 0
92
-
93
- system = openai_params.get("system")
94
- if isinstance(system, str):
95
- total_tokens += len(encoding.encode(system))
96
-
97
- messages = openai_params.get("messages", [])
98
- if isinstance(messages, list):
99
- for msg in messages:
100
- if not isinstance(msg, dict):
101
- continue
102
- content = msg.get("content", "")
103
- if isinstance(content, str):
104
- total_tokens += len(encoding.encode(content))
105
- elif isinstance(content, list):
106
- for block in content:
107
- if not isinstance(block, dict):
108
- total_tokens += len(encoding.encode(str(block)))
109
- continue
110
- block_type = block.get("type")
111
- if block_type == "text":
112
- total_tokens += len(encoding.encode(block.get("text", "")))
113
- elif block_type == "image_url":
114
- total_tokens += 85
115
-
116
- tool_calls = msg.get("tool_calls")
117
- if isinstance(tool_calls, list) and tool_calls:
118
- total_tokens += len(encoding.encode(json.dumps(tool_calls)))
119
-
120
- tools = openai_params.get("tools")
121
- if isinstance(tools, list) and tools:
122
- total_tokens += len(encoding.encode(json.dumps(tools)))
123
-
124
- tool_choice = openai_params.get("tool_choice")
125
- if tool_choice is not None:
126
- total_tokens += len(encoding.encode(json.dumps(tool_choice)))
127
-
128
- response_format = openai_params.get("response_format")
129
- if response_format is not None:
130
- total_tokens += len(encoding.encode(json.dumps(response_format)))
131
-
132
- return total_tokens
133
-
134
-
135
- async def _stream_response(
136
- client: httpx.AsyncClient,
137
- url: str,
138
- headers: dict,
139
- json_data: dict,
140
- model: str,
141
- ) -> AsyncGenerator[str, None]:
142
- """
143
- Stream response from OpenAI and convert to Anthropic format.
144
- """
145
- try:
146
- async with client.stream(
147
- "POST", url, headers=headers, json=json_data
148
- ) as response:
149
- if response.status_code != 200:
150
- error_body = await response.aread()
151
- error_text = error_body.decode("utf-8", errors="replace").strip()
152
- try:
153
- error_json = json.loads(error_text) if error_text else {}
154
- error_msg = error_json.get("error", {}).get("message") or error_text
155
- except json.JSONDecodeError:
156
- error_msg = error_text
157
- if not error_msg:
158
- error_msg = (
159
- response.reason_phrase
160
- or f"Upstream API error ({response.status_code})"
161
- )
162
-
163
- error_event = AnthropicErrorResponse(
164
- error=AnthropicError(type="api_error", message=error_msg)
165
- )
166
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
167
- yield "data: [DONE]\n\n"
168
- return
169
-
170
- # Process SSE stream
171
- first_chunk = True
172
- content_block_started = False
173
- content_block_index = 0
174
- current_block_type = None # 'thinking', 'text', or 'tool_use'
175
- current_tool_call_index = None
176
- tool_call_buffers: dict[int, str] = {}
177
- finish_reason = None
178
- input_tokens = _estimate_input_tokens(json_data)
179
- output_tokens = 0
180
- message_id = None
181
- sent_message_delta = False
182
-
183
- async for line in response.aiter_lines():
184
- if not line.startswith("data: "):
185
- continue
186
-
187
- data = line[6:]
188
- if data == "[DONE]":
189
- if not sent_message_delta:
190
- stop_reason_map = {
191
- "stop": "end_turn",
192
- "length": "max_tokens",
193
- "tool_calls": "tool_use",
194
- }
195
- delta_event = {
196
- "type": "message_delta",
197
- "delta": {
198
- "stop_reason": stop_reason_map.get(
199
- finish_reason or "stop", "end_turn"
200
- )
201
- },
202
- "usage": {
203
- "input_tokens": input_tokens,
204
- "output_tokens": output_tokens,
205
- "cache_creation_input_tokens": None,
206
- "cache_read_input_tokens": None,
207
- },
208
- }
209
- logger.debug(
210
- f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
211
- )
212
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
213
- break
214
-
215
- try:
216
- chunk = json.loads(data)
217
- logger.debug(
218
- f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}"
219
- )
220
- except json.JSONDecodeError:
221
- continue
222
-
223
- # First chunk: message_start
224
- if first_chunk:
225
- message_id = chunk.get("id", "")
226
- usage = chunk.get("usage") or {}
227
- input_tokens = usage.get("prompt_tokens", input_tokens)
228
-
229
- start_event = {
230
- "type": "message_start",
231
- "message": {
232
- "id": message_id,
233
- "type": "message",
234
- "role": "assistant",
235
- "content": [],
236
- "model": model,
237
- "stop_reason": None,
238
- "stop_sequence": None,
239
- "usage": {
240
- "input_tokens": input_tokens,
241
- "output_tokens": 0,
242
- "cache_creation_input_tokens": None,
243
- "cache_read_input_tokens": None,
244
- },
245
- },
246
- }
247
- logger.debug(
248
- f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}"
249
- )
250
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
251
- first_chunk = False
252
- continue
253
-
254
- # Handle usage-only chunks
255
- if not chunk.get("choices"):
256
- usage = chunk.get("usage") or {}
257
- if usage:
258
- input_tokens = usage.get("prompt_tokens", input_tokens)
259
- output_tokens = usage.get("completion_tokens", output_tokens)
260
- if content_block_started:
261
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
262
- content_block_started = False
263
-
264
- stop_reason_map = {
265
- "stop": "end_turn",
266
- "length": "max_tokens",
267
- "tool_calls": "tool_use",
268
- }
269
- delta_event = {
270
- "type": "message_delta",
271
- "delta": {
272
- "stop_reason": stop_reason_map.get(
273
- finish_reason or "stop", "end_turn"
274
- )
275
- },
276
- "usage": {
277
- "input_tokens": usage.get(
278
- "prompt_tokens", input_tokens
279
- ),
280
- "output_tokens": usage.get("completion_tokens", 0),
281
- "cache_creation_input_tokens": None,
282
- "cache_read_input_tokens": None,
283
- },
284
- }
285
- logger.debug(
286
- f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
287
- )
288
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
289
- sent_message_delta = True
290
- continue
291
-
292
- choice = chunk["choices"][0]
293
- delta = choice.get("delta", {})
294
-
295
- # Track finish reason (but don't skip - content may also be present)
296
- if choice.get("finish_reason"):
297
- finish_reason = choice["finish_reason"]
298
-
299
- # Handle reasoning content (thinking)
300
- if delta.get("reasoning_content"):
301
- reasoning = delta["reasoning_content"]
302
- # Start thinking content block if not already started
303
- if not content_block_started or current_block_type != "thinking":
304
- # Close previous block if exists
305
- if content_block_started:
306
- stop_block = {
307
- "type": "content_block_stop",
308
- "index": content_block_index,
309
- }
310
- logger.debug(
311
- f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
312
- )
313
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
314
- content_block_index += 1
315
- start_block = {
316
- "type": "content_block_start",
317
- "index": content_block_index,
318
- "content_block": {
319
- "type": "thinking",
320
- "thinking": "",
321
- "signature": "",
322
- },
323
- }
324
- logger.debug(
325
- f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}"
326
- )
327
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
328
- content_block_started = True
329
- current_block_type = "thinking"
330
-
331
- for chunk in _chunk_text(reasoning):
332
- delta_block = {
333
- "type": "content_block_delta",
334
- "index": content_block_index,
335
- "delta": {"type": "thinking_delta", "thinking": chunk},
336
- }
337
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
338
- continue
339
-
340
- # Handle content
341
- if delta.get("content"):
342
- if not content_block_started or current_block_type != "text":
343
- # Close previous block if exists
344
- if content_block_started:
345
- stop_block = {
346
- "type": "content_block_stop",
347
- "index": content_block_index,
348
- }
349
- logger.debug(
350
- f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
351
- )
352
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
353
- content_block_index += 1
354
- start_block = {
355
- "type": "content_block_start",
356
- "index": content_block_index,
357
- "content_block": {"type": "text", "text": ""},
358
- }
359
- logger.debug(
360
- f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}"
361
- )
362
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
363
- content_block_started = True
364
- current_block_type = "text"
365
-
366
- output_tokens += _count_tokens(delta["content"])
367
- delta_block = {
368
- "type": "content_block_delta",
369
- "index": content_block_index,
370
- "delta": {"type": "text_delta", "text": delta["content"]},
371
- }
372
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
373
-
374
- # Handle tool calls
375
- if delta.get("tool_calls"):
376
- for tool_call in delta["tool_calls"]:
377
- tool_call_idx = tool_call.get("index", 0)
378
-
379
- if tool_call.get("id"):
380
- if content_block_started and (
381
- current_block_type != "tool_use"
382
- or current_tool_call_index != tool_call_idx
383
- ):
384
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
385
- content_block_started = False
386
- content_block_index += 1
387
-
388
- if not content_block_started:
389
- func = tool_call.get("function") or {}
390
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
391
- content_block_started = True
392
- current_block_type = "tool_use"
393
- current_tool_call_index = tool_call_idx
394
- tool_call_buffers.setdefault(tool_call_idx, "")
395
-
396
- if (tool_call.get("function") or {}).get("arguments"):
397
- args = (tool_call.get("function") or {}).get(
398
- "arguments", ""
399
- )
400
- if (
401
- not content_block_started
402
- or current_block_type != "tool_use"
403
- or current_tool_call_index != tool_call_idx
404
- ):
405
- if content_block_started:
406
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
407
- content_block_index += 1
408
- func = tool_call.get("function") or {}
409
- tool_id = tool_call.get("id", "")
410
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_id, 'name': func.get('name', ''), 'input': {}}})}\n\n"
411
- content_block_started = True
412
- current_block_type = "tool_use"
413
- current_tool_call_index = tool_call_idx
414
- tool_call_buffers.setdefault(tool_call_idx, "")
415
- tool_call_buffers[tool_call_idx] = (
416
- tool_call_buffers.get(tool_call_idx, "") + args
417
- )
418
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
419
-
420
- # Close final content block
421
- if content_block_started:
422
- stop_block = {
423
- "type": "content_block_stop",
424
- "index": content_block_index,
425
- }
426
- logger.debug(
427
- f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}"
428
- )
429
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
430
-
431
- # Message stop
432
- stop_event = {"type": "message_stop"}
433
- logger.debug(
434
- f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}"
435
- )
436
- yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
437
-
438
- except Exception as e:
439
- import traceback
440
-
441
- error_msg = f"{str(e)}\n{traceback.format_exc()}"
442
- logger.error(f"Stream error: {error_msg}")
443
- error_event = AnthropicErrorResponse(
444
- error=AnthropicError(type="internal_error", message=str(e))
445
- )
446
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
447
-
448
-
449
- async def _convert_result_to_stream(
450
- result: JSONResponse,
451
- model: str,
452
- ) -> AsyncGenerator[str, None]:
453
- """Convert a JSONResponse to streaming SSE format."""
454
- import time
455
-
456
- body = json.loads(bytes(result.body).decode("utf-8"))
457
- message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
458
- content = body.get("content", [])
459
- usage = body.get("usage", {})
460
- stop_reason = body.get("stop_reason", "end_turn")
461
-
462
- # Map stop_reason
463
- stop_reason_map = {
464
- "end_turn": "stop",
465
- "max_tokens": "length",
466
- "tool_use": "tool_calls",
467
- }
468
- openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
469
-
470
- # 1. message_start event
471
- start_event = {
472
- "type": "message_start",
473
- "message": {
474
- "id": message_id,
475
- "type": "message",
476
- "role": "assistant",
477
- "content": [],
478
- "model": model,
479
- "stop_reason": None,
480
- "stop_sequence": None,
481
- "usage": {
482
- "input_tokens": usage.get("input_tokens", 0),
483
- "output_tokens": 0,
484
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
485
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
486
- },
487
- },
488
- }
489
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
490
-
491
- # 2. Process content blocks
492
- for i, block in enumerate(content):
493
- block_type = block.get("type")
494
-
495
- if block_type == "text":
496
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
497
- text = block.get("text", "")
498
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
499
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
500
-
501
- elif block_type == "tool_use":
502
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
503
- tool_input = block.get("input", {})
504
- if tool_input:
505
- input_json = json.dumps(tool_input, ensure_ascii=False)
506
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'input_json_delta', 'partial_json': input_json}})}\n\n"
507
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
508
-
509
- elif block_type == "server_tool_use":
510
- # Preserve official Anthropic block type so clients can count server tool uses.
511
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
512
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
513
-
514
- elif block_type == "web_search_tool_result":
515
- # Stream the tool result as its own content block.
516
- tool_result_block = dict(block)
517
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
518
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
519
-
520
- elif block_type == "thinking":
521
- # Handle thinking blocks (BetaThinkingBlock)
522
- signature = block.get("signature", "")
523
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': '', 'signature': signature}})}\n\n"
524
- thinking_text = block.get("thinking", "")
525
- if thinking_text:
526
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
527
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
528
-
529
- # 3. message_delta with final usage
530
- delta_event = {
531
- "type": "message_delta",
532
- "delta": {"stop_reason": stop_reason},
533
- "usage": {
534
- "input_tokens": usage.get("input_tokens", 0),
535
- "output_tokens": usage.get("output_tokens", 0),
536
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
537
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
538
- "server_tool_use": usage.get("server_tool_use"),
539
- },
540
- }
541
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
542
-
543
- # 4. message_stop
544
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
545
-
546
-
547
- class ServerToolHandler:
548
- """Handles server tool execution for non-streaming requests."""
549
-
550
- def __init__(
551
- self,
552
- server_tools: list[type],
553
- configs: dict[str, dict[str, Any]],
554
- settings: Settings,
555
- ):
556
- self.server_tools = {t.tool_name: t for t in server_tools}
557
- self.configs = configs
558
- self.settings = settings
559
- self.usage: dict[str, int] = {}
560
-
561
- def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
562
- """Check if a tool call is for a server tool."""
563
- func_name = tool_call.get("function", {}).get("name")
564
- return func_name in self.server_tools
565
-
566
- async def execute_tool(
567
- self,
568
- tool_call: dict[str, Any],
569
- ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
570
- """
571
- Execute a server tool and return content blocks + tool result message.
572
- Returns: (content_blocks, tool_result_message)
573
- """
574
- func_name = tool_call.get("function", {}).get("name")
575
- call_id = tool_call.get("id", "")
576
- openai_call_id = tool_call.get("openai_id", call_id)
577
-
578
- tool_class = self.server_tools[func_name]
579
- config = self.configs.get(tool_class.tool_type, {})
580
-
581
- # Extract call arguments
582
- args = tool_class.extract_call_args(tool_call)
583
- if args is None:
584
- args = {}
585
-
586
- # Execute the tool
587
- result = await tool_class.execute(call_id, args, config, self.settings)
588
-
589
- # Update usage
590
- for key, value in result.usage_increment.items():
591
- self.usage[key] = self.usage.get(key, 0) + value
592
-
593
- # Build content blocks
594
- content_blocks = tool_class.build_content_blocks(call_id, args, result)
595
-
596
- # Build tool result message for OpenAI
597
- tool_result_msg = tool_class.build_tool_result_message(
598
- openai_call_id, args, result
599
- )
600
-
601
- return content_blocks, tool_result_msg
602
-
603
-
604
- async def _handle_with_server_tools(
605
- openai_params: dict[str, Any],
606
- url: str,
607
- headers: dict[str, str],
608
- settings: Settings,
609
- server_tools: list[type],
610
- model: str,
611
- ) -> JSONResponse:
612
- """Handle request with server tool execution loop."""
613
- params = dict(openai_params)
614
- configs = params.pop("_server_tools_config", {})
615
-
616
- handler = ServerToolHandler(server_tools, configs, settings)
617
- accumulated_content: list[dict[str, Any]] = []
618
-
619
- # Get max_uses from configs (default to settings or 5)
620
- max_uses = settings.websearch_max_uses
621
- for config in configs.values():
622
- if config.get("max_uses"):
623
- max_uses = config["max_uses"]
624
- break
625
-
626
- total_tool_calls = 0
627
-
628
- while True:
629
- async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
630
- try:
631
- # Log full request for debugging
632
- logger.debug(
633
- f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}"
634
- )
635
-
636
- response = await client.post(url, headers=headers, json=params)
637
-
638
- if response.status_code != 200:
639
- logger.error(
640
- f"OpenAI API error: {response.status_code} - {response.text}"
641
- )
642
- raw_text = response.text
643
- try:
644
- if not raw_text:
645
- raw_text = response.content.decode(
646
- "utf-8", errors="replace"
647
- )
648
- except Exception:
649
- raw_text = ""
650
- if not raw_text:
651
- raw_text = response.reason_phrase or ""
652
- error_message = (raw_text or "").strip()
653
- error_response = AnthropicErrorResponse(
654
- error=AnthropicError(
655
- type="api_error",
656
- message=error_message
657
- or f"Upstream API error ({response.status_code})",
658
- )
659
- )
660
- return JSONResponse(
661
- status_code=response.status_code,
662
- content=error_response.model_dump(),
663
- )
664
-
665
- completion_data = response.json()
666
- logger.debug(
667
- f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}..."
668
- )
669
- from openai.types.chat import ChatCompletion
670
-
671
- completion = ChatCompletion.model_validate(completion_data)
672
-
673
- # Check for server tool calls
674
- server_tool_calls = []
675
- other_tool_calls = []
676
-
677
- tool_calls = completion.choices[0].message.tool_calls
678
- logger.info(
679
- f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}"
680
- )
681
-
682
- if tool_calls:
683
- for tc in tool_calls:
684
- func = getattr(tc, "function", None)
685
- func_name = func.name if func else ""
686
- logger.info(f" Tool call: {func_name}")
687
-
688
- # Generate Anthropic-style ID for server tools
689
- is_server = handler.is_server_tool_call(
690
- {
691
- "id": tc.id,
692
- "function": {"name": func_name, "arguments": ""},
693
- }
694
- )
695
-
696
- # Use Anthropic-style ID for server tools, original ID otherwise
697
- client_tool_id = (
698
- _generate_server_tool_id() if is_server else tc.id
699
- )
700
-
701
- tc_dict = {
702
- "id": client_tool_id,
703
- "openai_id": tc.id,
704
- "function": {
705
- "name": func_name,
706
- "arguments": func.arguments if func else "{}",
707
- },
708
- }
709
- logger.info(
710
- f" Is server tool: {is_server}, ID: {client_tool_id}"
711
- )
712
- if is_server:
713
- server_tool_calls.append(tc_dict)
714
- else:
715
- other_tool_calls.append(tc)
716
-
717
- # No server tool calls - we're done
718
- logger.info(
719
- f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}"
720
- )
721
- if not server_tool_calls:
722
- message = convert_openai_to_anthropic(completion, model)
723
-
724
- if accumulated_content:
725
- message_dict = message.model_dump()
726
- message_dict["content"] = (
727
- accumulated_content + message_dict.get("content", [])
728
- )
729
-
730
- if message_dict.get("usage"):
731
- message_dict["usage"]["server_tool_use"] = handler.usage
732
- message_dict["usage"] = _normalize_usage(
733
- message_dict.get("usage")
734
- )
735
-
736
- # Log full response for debugging
737
- logger.info(
738
- f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}"
739
- )
740
- logger.info(f"Response usage: {message_dict.get('usage')}")
741
- logger.info(f"Server tool use count: {handler.usage}")
742
-
743
- return JSONResponse(content=message_dict)
744
-
745
- message_dict = message.model_dump()
746
- message_dict["usage"] = _normalize_usage(message_dict.get("usage"))
747
- return JSONResponse(content=message_dict)
748
-
749
- # Check max_uses limit
750
- if total_tool_calls >= max_uses:
751
- logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
752
- # Return error for each call
753
- for call in server_tool_calls:
754
- func_name = call.get("function", {}).get("name", "")
755
- tool_class = handler.server_tools.get(func_name)
756
- if tool_class:
757
- from local_openai2anthropic.server_tools import ToolResult
758
-
759
- error_result = ToolResult(
760
- success=False,
761
- content=[],
762
- error_code="max_uses_exceeded",
763
- )
764
- error_blocks = tool_class.build_content_blocks(
765
- call["id"],
766
- {},
767
- error_result,
768
- )
769
- accumulated_content.extend(error_blocks)
770
-
771
- # Continue with modified messages
772
- assistant_tool_calls = []
773
- for call in server_tool_calls:
774
- assistant_tool_calls.append(
775
- {
776
- "id": call.get("openai_id", call.get("id", "")),
777
- "type": "function",
778
- "function": {
779
- "name": call.get("function", {}).get("name", ""),
780
- "arguments": call.get("function", {}).get(
781
- "arguments", "{}"
782
- ),
783
- },
784
- }
785
- )
786
- messages = params.get("messages", [])
787
- messages = _add_tool_results_to_messages(
788
- messages, assistant_tool_calls, handler, is_error=True
789
- )
790
- params["messages"] = messages
791
- continue
792
-
793
- # Execute server tools
794
- messages = params.get("messages", [])
795
- assistant_tool_calls = []
796
- tool_results = []
797
-
798
- for call in server_tool_calls:
799
- total_tool_calls += 1
800
- content_blocks, tool_result = await handler.execute_tool(call)
801
- accumulated_content.extend(content_blocks)
802
-
803
- # Track for assistant message
804
- assistant_tool_calls.append(
805
- {
806
- "id": call.get("openai_id", call.get("id", "")),
807
- "type": "function",
808
- "function": {
809
- "name": call["function"]["name"],
810
- "arguments": call["function"]["arguments"],
811
- },
812
- }
813
- )
814
- tool_results.append(tool_result)
815
-
816
- # Add to messages for next iteration
817
- messages = _add_tool_results_to_messages(
818
- messages, assistant_tool_calls, handler, tool_results=tool_results
819
- )
820
- params["messages"] = messages
821
-
822
- except httpx.TimeoutException:
823
- error_response = AnthropicErrorResponse(
824
- error=AnthropicError(
825
- type="timeout_error", message="Request timed out"
826
- )
827
- )
828
- return JSONResponse(
829
- status_code=HTTPStatus.GATEWAY_TIMEOUT,
830
- content=error_response.model_dump(),
831
- )
832
- except httpx.RequestError as e:
833
- error_response = AnthropicErrorResponse(
834
- error=AnthropicError(type="connection_error", message=str(e))
835
- )
836
- return JSONResponse(
837
- status_code=HTTPStatus.BAD_GATEWAY,
838
- content=error_response.model_dump(),
839
- )
840
-
841
-
842
- def _add_tool_results_to_messages(
843
- messages: list[dict[str, Any]],
844
- tool_calls: list[dict[str, Any]],
845
- handler: ServerToolHandler,
846
- tool_results: list[dict[str, Any]] | None = None,
847
- is_error: bool = False,
848
- ) -> list[dict[str, Any]]:
849
- """Add assistant tool call and results to messages."""
850
- messages = list(messages)
851
-
852
- # Add assistant message with tool calls
853
- # SGLang requires content to be a string, not None
854
- assistant_msg: dict[str, Any] = {
855
- "role": "assistant",
856
- "content": "", # Empty string instead of None for SGLang compatibility
857
- "tool_calls": tool_calls,
858
- }
859
- messages.append(assistant_msg)
860
-
861
- # Add tool results
862
- if is_error:
863
- for call in tool_calls:
864
- tool_call_id = call.get("openai_id", call.get("id", ""))
865
- messages.append(
866
- {
867
- "role": "tool",
868
- "tool_call_id": tool_call_id,
869
- "content": json.dumps(
870
- {
871
- "error": "max_uses_exceeded",
872
- "message": "Maximum tool uses exceeded.",
873
- }
874
- ),
875
- }
876
- )
877
- elif tool_results:
878
- messages.extend(tool_results)
879
-
880
- return messages
881
-
882
-
883
78
  @router.post(
884
79
  "/v1/messages",
885
80
  response_model=Message,