local-openai2anthropic 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
5
5
 
6
6
  import json
7
7
  import logging
8
- import secrets
9
- import string
10
8
  from http import HTTPStatus
11
- from typing import Any, AsyncGenerator, cast
9
+ from typing import Any, cast
12
10
 
13
11
  import httpx
14
12
  from fastapi import APIRouter, Depends, HTTPException, Request
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
26
24
  MessageCreateParams,
27
25
  )
28
26
  from local_openai2anthropic.server_tools import ServerToolRegistry
27
+ from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
28
+ from local_openai2anthropic.tools import (
29
+ ServerToolHandler,
30
+ _add_tool_results_to_messages,
31
+ _handle_with_server_tools,
32
+ )
33
+ from local_openai2anthropic.utils import (
34
+ _chunk_text,
35
+ _count_tokens,
36
+ _estimate_input_tokens,
37
+ _generate_server_tool_id,
38
+ _normalize_usage,
39
+ )
29
40
 
30
41
  logger = logging.getLogger(__name__)
31
42
  router = APIRouter()
32
43
 
44
+ # Backward compatibility: re-export functions used by tests
45
+ __all__ = [
46
+ "router",
47
+ "get_request_settings",
48
+ "create_message",
49
+ "list_models",
50
+ "count_tokens",
51
+ "health_check",
52
+ # Backward compatibility exports
53
+ "_stream_response",
54
+ "_convert_result_to_stream",
55
+ "ServerToolHandler",
56
+ "_handle_with_server_tools",
57
+ "_add_tool_results_to_messages",
58
+ "_generate_server_tool_id",
59
+ "_normalize_usage",
60
+ "_count_tokens",
61
+ "_chunk_text",
62
+ "_estimate_input_tokens",
63
+ ]
64
+
33
65
 
34
66
  def get_request_settings(request: Request) -> Settings:
35
67
  """Resolve Settings from the running app when available.
@@ -43,858 +75,6 @@ def get_request_settings(request: Request) -> Settings:
43
75
  return get_settings()
44
76
 
45
77
 
46
- def _generate_server_tool_id() -> str:
47
- """Generate Anthropic-style server tool use ID (srvtoolu_...)."""
48
- # Generate 24 random alphanumeric characters
49
- chars = string.ascii_lowercase + string.digits
50
- random_part = "".join(secrets.choice(chars) for _ in range(24))
51
- return f"srvtoolu_{random_part}"
52
-
53
-
54
- def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, Any] | None:
55
- if not isinstance(usage, dict):
56
- return usage
57
- allowed_keys = {
58
- "input_tokens",
59
- "output_tokens",
60
- "cache_creation_input_tokens",
61
- "cache_read_input_tokens",
62
- "server_tool_use",
63
- }
64
- normalized = {k: v for k, v in usage.items() if k in allowed_keys}
65
- return normalized or None
66
-
67
-
68
- def _count_tokens(text: str) -> int:
69
- try:
70
- import tiktoken # type: ignore[import-not-found]
71
- except Exception:
72
- return 0
73
-
74
- encoding = tiktoken.get_encoding("cl100k_base")
75
- return len(encoding.encode(text))
76
-
77
-
78
- def _chunk_text(text: str, chunk_size: int = 200) -> list[str]:
79
- if not text:
80
- return []
81
- return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
82
-
83
-
84
- def _estimate_input_tokens(openai_params: dict[str, Any]) -> int:
85
- try:
86
- import tiktoken # type: ignore[import-not-found]
87
- except Exception:
88
- return 0
89
-
90
- encoding = tiktoken.get_encoding("cl100k_base")
91
- total_tokens = 0
92
-
93
- system = openai_params.get("system")
94
- if isinstance(system, str):
95
- total_tokens += len(encoding.encode(system))
96
-
97
- messages = openai_params.get("messages", [])
98
- if isinstance(messages, list):
99
- for msg in messages:
100
- if not isinstance(msg, dict):
101
- continue
102
- content = msg.get("content", "")
103
- if isinstance(content, str):
104
- total_tokens += len(encoding.encode(content))
105
- elif isinstance(content, list):
106
- for block in content:
107
- if not isinstance(block, dict):
108
- total_tokens += len(encoding.encode(str(block)))
109
- continue
110
- block_type = block.get("type")
111
- if block_type == "text":
112
- total_tokens += len(encoding.encode(block.get("text", "")))
113
- elif block_type == "image_url":
114
- total_tokens += 85
115
-
116
- tool_calls = msg.get("tool_calls")
117
- if isinstance(tool_calls, list) and tool_calls:
118
- total_tokens += len(encoding.encode(json.dumps(tool_calls)))
119
-
120
- tools = openai_params.get("tools")
121
- if isinstance(tools, list) and tools:
122
- total_tokens += len(encoding.encode(json.dumps(tools)))
123
-
124
- tool_choice = openai_params.get("tool_choice")
125
- if tool_choice is not None:
126
- total_tokens += len(encoding.encode(json.dumps(tool_choice)))
127
-
128
- response_format = openai_params.get("response_format")
129
- if response_format is not None:
130
- total_tokens += len(encoding.encode(json.dumps(response_format)))
131
-
132
- return total_tokens
133
-
134
-
135
- async def _stream_response(
136
- client: httpx.AsyncClient,
137
- url: str,
138
- headers: dict,
139
- json_data: dict,
140
- model: str,
141
- ) -> AsyncGenerator[str, None]:
142
- """
143
- Stream response from OpenAI and convert to Anthropic format.
144
- """
145
- try:
146
- async with client.stream(
147
- "POST", url, headers=headers, json=json_data
148
- ) as response:
149
- if response.status_code != 200:
150
- error_body = await response.aread()
151
- error_text = error_body.decode("utf-8", errors="replace").strip()
152
- try:
153
- error_json = json.loads(error_text) if error_text else {}
154
- error_msg = error_json.get("error", {}).get("message") or error_text
155
- except json.JSONDecodeError:
156
- error_msg = error_text
157
- if not error_msg:
158
- error_msg = (
159
- response.reason_phrase
160
- or f"Upstream API error ({response.status_code})"
161
- )
162
-
163
- error_event = AnthropicErrorResponse(
164
- error=AnthropicError(type="api_error", message=error_msg)
165
- )
166
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
167
- yield "data: [DONE]\n\n"
168
- return
169
-
170
- # Process SSE stream
171
- first_chunk = True
172
- content_block_started = False
173
- content_block_index = 0
174
- current_block_type = None # 'thinking', 'text', or 'tool_use'
175
- current_tool_call_index = None
176
- tool_call_buffers: dict[int, str] = {}
177
- finish_reason = None
178
- input_tokens = _estimate_input_tokens(json_data)
179
- output_tokens = 0
180
- message_id = None
181
- sent_message_delta = False
182
- pending_text_prefix = ""
183
-
184
- async for line in response.aiter_lines():
185
- if not line.startswith("data: "):
186
- continue
187
-
188
- data = line[6:]
189
- if data == "[DONE]":
190
- if not sent_message_delta:
191
- stop_reason_map = {
192
- "stop": "end_turn",
193
- "length": "max_tokens",
194
- "tool_calls": "tool_use",
195
- }
196
- delta_event = {
197
- "type": "message_delta",
198
- "delta": {
199
- "stop_reason": stop_reason_map.get(
200
- finish_reason or "stop", "end_turn"
201
- )
202
- },
203
- "usage": {
204
- "input_tokens": input_tokens,
205
- "output_tokens": output_tokens,
206
- "cache_creation_input_tokens": None,
207
- "cache_read_input_tokens": None,
208
- },
209
- }
210
- logger.debug(
211
- f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
212
- )
213
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
214
- break
215
-
216
- try:
217
- chunk = json.loads(data)
218
- logger.debug(
219
- f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}"
220
- )
221
- except json.JSONDecodeError:
222
- continue
223
-
224
- # First chunk: message_start
225
- if first_chunk:
226
- message_id = chunk.get("id", "")
227
- usage = chunk.get("usage") or {}
228
- input_tokens = usage.get("prompt_tokens", input_tokens)
229
-
230
- start_event = {
231
- "type": "message_start",
232
- "message": {
233
- "id": message_id,
234
- "type": "message",
235
- "role": "assistant",
236
- "content": [],
237
- "model": model,
238
- "stop_reason": None,
239
- "stop_sequence": None,
240
- "usage": {
241
- "input_tokens": input_tokens,
242
- "output_tokens": 0,
243
- "cache_creation_input_tokens": None,
244
- "cache_read_input_tokens": None,
245
- },
246
- },
247
- }
248
- logger.debug(
249
- f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}"
250
- )
251
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
252
- first_chunk = False
253
- continue
254
-
255
- # Handle usage-only chunks
256
- if not chunk.get("choices"):
257
- usage = chunk.get("usage") or {}
258
- if usage:
259
- input_tokens = usage.get("prompt_tokens", input_tokens)
260
- output_tokens = usage.get("completion_tokens", output_tokens)
261
- if content_block_started:
262
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
263
- content_block_started = False
264
-
265
- stop_reason_map = {
266
- "stop": "end_turn",
267
- "length": "max_tokens",
268
- "tool_calls": "tool_use",
269
- }
270
- delta_event = {
271
- "type": "message_delta",
272
- "delta": {
273
- "stop_reason": stop_reason_map.get(
274
- finish_reason or "stop", "end_turn"
275
- )
276
- },
277
- "usage": {
278
- "input_tokens": usage.get(
279
- "prompt_tokens", input_tokens
280
- ),
281
- "output_tokens": usage.get("completion_tokens", 0),
282
- "cache_creation_input_tokens": None,
283
- "cache_read_input_tokens": None,
284
- },
285
- }
286
- logger.debug(
287
- f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
288
- )
289
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
290
- sent_message_delta = True
291
- continue
292
-
293
- choice = chunk["choices"][0]
294
- delta = choice.get("delta", {})
295
-
296
- # Track finish reason (but don't skip - content may also be present)
297
- if choice.get("finish_reason"):
298
- finish_reason = choice["finish_reason"]
299
-
300
- # Handle reasoning content (thinking)
301
- if delta.get("reasoning_content"):
302
- reasoning = delta["reasoning_content"]
303
- pending_text_prefix = ""
304
- # Start thinking content block if not already started
305
- if not content_block_started or current_block_type != "thinking":
306
- # Close previous block if exists
307
- if content_block_started:
308
- stop_block = {
309
- "type": "content_block_stop",
310
- "index": content_block_index,
311
- }
312
- logger.debug(
313
- f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
314
- )
315
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
316
- content_block_index += 1
317
- start_block = {
318
- "type": "content_block_start",
319
- "index": content_block_index,
320
- "content_block": {
321
- "type": "thinking",
322
- "thinking": "",
323
- "signature": "",
324
- },
325
- }
326
- logger.debug(
327
- f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}"
328
- )
329
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
330
- content_block_started = True
331
- current_block_type = "thinking"
332
-
333
- for chunk in _chunk_text(reasoning):
334
- delta_block = {
335
- "type": "content_block_delta",
336
- "index": content_block_index,
337
- "delta": {"type": "thinking_delta", "thinking": chunk},
338
- }
339
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
340
- continue
341
-
342
- # Handle content
343
- if isinstance(delta.get("content"), str):
344
- content_text = delta.get("content", "")
345
- if not content_text:
346
- continue
347
- if content_text.strip() == "(no content)":
348
- continue
349
- if not content_block_started or current_block_type != "text":
350
- if not content_text.strip():
351
- pending_text_prefix += content_text
352
- continue
353
- # Close previous block if exists
354
- if content_block_started:
355
- stop_block = {
356
- "type": "content_block_stop",
357
- "index": content_block_index,
358
- }
359
- logger.debug(
360
- f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
361
- )
362
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
363
- content_block_index += 1
364
- start_block = {
365
- "type": "content_block_start",
366
- "index": content_block_index,
367
- "content_block": {"type": "text", "text": ""},
368
- }
369
- logger.debug(
370
- f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}"
371
- )
372
- yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
373
- content_block_started = True
374
- current_block_type = "text"
375
-
376
- if pending_text_prefix:
377
- content_text = pending_text_prefix + content_text
378
- pending_text_prefix = ""
379
-
380
- output_tokens += _count_tokens(content_text)
381
- delta_block = {
382
- "type": "content_block_delta",
383
- "index": content_block_index,
384
- "delta": {"type": "text_delta", "text": content_text},
385
- }
386
- yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
387
-
388
- # Handle tool calls
389
- if delta.get("tool_calls"):
390
- pending_text_prefix = ""
391
- for tool_call in delta["tool_calls"]:
392
- tool_call_idx = tool_call.get("index", 0)
393
-
394
- if tool_call.get("id"):
395
- if content_block_started and (
396
- current_block_type != "tool_use"
397
- or current_tool_call_index != tool_call_idx
398
- ):
399
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
400
- content_block_started = False
401
- content_block_index += 1
402
-
403
- if not content_block_started:
404
- func = tool_call.get("function") or {}
405
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
406
- content_block_started = True
407
- current_block_type = "tool_use"
408
- current_tool_call_index = tool_call_idx
409
- tool_call_buffers.setdefault(tool_call_idx, "")
410
-
411
- if (tool_call.get("function") or {}).get("arguments"):
412
- args = (tool_call.get("function") or {}).get(
413
- "arguments", ""
414
- )
415
- if (
416
- not content_block_started
417
- or current_block_type != "tool_use"
418
- or current_tool_call_index != tool_call_idx
419
- ):
420
- if content_block_started:
421
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
422
- content_block_index += 1
423
- func = tool_call.get("function") or {}
424
- tool_id = tool_call.get("id", "")
425
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_id, 'name': func.get('name', ''), 'input': {}}})}\n\n"
426
- content_block_started = True
427
- current_block_type = "tool_use"
428
- current_tool_call_index = tool_call_idx
429
- tool_call_buffers.setdefault(tool_call_idx, "")
430
- tool_call_buffers[tool_call_idx] = (
431
- tool_call_buffers.get(tool_call_idx, "") + args
432
- )
433
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
434
-
435
- # Close final content block
436
- if content_block_started:
437
- stop_block = {
438
- "type": "content_block_stop",
439
- "index": content_block_index,
440
- }
441
- logger.debug(
442
- f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}"
443
- )
444
- yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
445
-
446
- # Message stop
447
- stop_event = {"type": "message_stop"}
448
- logger.debug(
449
- f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}"
450
- )
451
- yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
452
-
453
- except Exception as e:
454
- import traceback
455
-
456
- error_msg = f"{str(e)}\n{traceback.format_exc()}"
457
- logger.error(f"Stream error: {error_msg}")
458
- error_event = AnthropicErrorResponse(
459
- error=AnthropicError(type="internal_error", message=str(e))
460
- )
461
- yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
462
-
463
-
464
- async def _convert_result_to_stream(
465
- result: JSONResponse,
466
- model: str,
467
- ) -> AsyncGenerator[str, None]:
468
- """Convert a JSONResponse to streaming SSE format."""
469
- import time
470
-
471
- body = json.loads(bytes(result.body).decode("utf-8"))
472
- message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
473
- content = body.get("content", [])
474
- usage = body.get("usage", {})
475
- stop_reason = body.get("stop_reason", "end_turn")
476
-
477
- # Map stop_reason
478
- stop_reason_map = {
479
- "end_turn": "stop",
480
- "max_tokens": "length",
481
- "tool_use": "tool_calls",
482
- }
483
- openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
484
-
485
- # 1. message_start event
486
- start_event = {
487
- "type": "message_start",
488
- "message": {
489
- "id": message_id,
490
- "type": "message",
491
- "role": "assistant",
492
- "content": [],
493
- "model": model,
494
- "stop_reason": None,
495
- "stop_sequence": None,
496
- "usage": {
497
- "input_tokens": usage.get("input_tokens", 0),
498
- "output_tokens": 0,
499
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
500
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
501
- },
502
- },
503
- }
504
- yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
505
-
506
- # 2. Process content blocks
507
- for i, block in enumerate(content):
508
- block_type = block.get("type")
509
-
510
- if block_type == "text":
511
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
512
- text = block.get("text", "")
513
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
514
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
515
-
516
- elif block_type == "tool_use":
517
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
518
- tool_input = block.get("input", {})
519
- if tool_input:
520
- input_json = json.dumps(tool_input, ensure_ascii=False)
521
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'input_json_delta', 'partial_json': input_json}})}\n\n"
522
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
523
-
524
- elif block_type == "server_tool_use":
525
- # Preserve official Anthropic block type so clients can count server tool uses.
526
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
527
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
528
-
529
- elif block_type == "web_search_tool_result":
530
- # Stream the tool result as its own content block.
531
- tool_result_block = dict(block)
532
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
533
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
534
-
535
- elif block_type == "thinking":
536
- # Handle thinking blocks (BetaThinkingBlock)
537
- signature = block.get("signature", "")
538
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': '', 'signature': signature}})}\n\n"
539
- thinking_text = block.get("thinking", "")
540
- if thinking_text:
541
- yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
542
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
543
-
544
- # 3. message_delta with final usage
545
- delta_event = {
546
- "type": "message_delta",
547
- "delta": {"stop_reason": stop_reason},
548
- "usage": {
549
- "input_tokens": usage.get("input_tokens", 0),
550
- "output_tokens": usage.get("output_tokens", 0),
551
- "cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
552
- "cache_read_input_tokens": usage.get("cache_read_input_tokens"),
553
- "server_tool_use": usage.get("server_tool_use"),
554
- },
555
- }
556
- yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
557
-
558
- # 4. message_stop
559
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
560
-
561
-
562
- class ServerToolHandler:
563
- """Handles server tool execution for non-streaming requests."""
564
-
565
- def __init__(
566
- self,
567
- server_tools: list[type],
568
- configs: dict[str, dict[str, Any]],
569
- settings: Settings,
570
- ):
571
- self.server_tools = {t.tool_name: t for t in server_tools}
572
- self.configs = configs
573
- self.settings = settings
574
- self.usage: dict[str, int] = {}
575
-
576
- def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
577
- """Check if a tool call is for a server tool."""
578
- func_name = tool_call.get("function", {}).get("name")
579
- return func_name in self.server_tools
580
-
581
- async def execute_tool(
582
- self,
583
- tool_call: dict[str, Any],
584
- ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
585
- """
586
- Execute a server tool and return content blocks + tool result message.
587
- Returns: (content_blocks, tool_result_message)
588
- """
589
- func_name = tool_call.get("function", {}).get("name")
590
- call_id = tool_call.get("id", "")
591
- openai_call_id = tool_call.get("openai_id", call_id)
592
-
593
- tool_class = self.server_tools[func_name]
594
- config = self.configs.get(tool_class.tool_type, {})
595
-
596
- # Extract call arguments
597
- args = tool_class.extract_call_args(tool_call)
598
- if args is None:
599
- args = {}
600
-
601
- # Execute the tool
602
- result = await tool_class.execute(call_id, args, config, self.settings)
603
-
604
- # Update usage
605
- for key, value in result.usage_increment.items():
606
- self.usage[key] = self.usage.get(key, 0) + value
607
-
608
- # Build content blocks
609
- content_blocks = tool_class.build_content_blocks(call_id, args, result)
610
-
611
- # Build tool result message for OpenAI
612
- tool_result_msg = tool_class.build_tool_result_message(
613
- openai_call_id, args, result
614
- )
615
-
616
- return content_blocks, tool_result_msg
617
-
618
-
619
- async def _handle_with_server_tools(
620
- openai_params: dict[str, Any],
621
- url: str,
622
- headers: dict[str, str],
623
- settings: Settings,
624
- server_tools: list[type],
625
- model: str,
626
- ) -> JSONResponse:
627
- """Handle request with server tool execution loop."""
628
- params = dict(openai_params)
629
- configs = params.pop("_server_tools_config", {})
630
-
631
- handler = ServerToolHandler(server_tools, configs, settings)
632
- accumulated_content: list[dict[str, Any]] = []
633
-
634
- # Get max_uses from configs (default to settings or 5)
635
- max_uses = settings.websearch_max_uses
636
- for config in configs.values():
637
- if config.get("max_uses"):
638
- max_uses = config["max_uses"]
639
- break
640
-
641
- total_tool_calls = 0
642
-
643
- while True:
644
- async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
645
- try:
646
- # Log full request for debugging
647
- logger.debug(
648
- f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}"
649
- )
650
-
651
- response = await client.post(url, headers=headers, json=params)
652
-
653
- if response.status_code != 200:
654
- logger.error(
655
- f"OpenAI API error: {response.status_code} - {response.text}"
656
- )
657
- raw_text = response.text
658
- try:
659
- if not raw_text:
660
- raw_text = response.content.decode(
661
- "utf-8", errors="replace"
662
- )
663
- except Exception:
664
- raw_text = ""
665
- if not raw_text:
666
- raw_text = response.reason_phrase or ""
667
- error_message = (raw_text or "").strip()
668
- error_response = AnthropicErrorResponse(
669
- error=AnthropicError(
670
- type="api_error",
671
- message=error_message
672
- or f"Upstream API error ({response.status_code})",
673
- )
674
- )
675
- return JSONResponse(
676
- status_code=response.status_code,
677
- content=error_response.model_dump(),
678
- )
679
-
680
- completion_data = response.json()
681
- logger.debug(
682
- f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}..."
683
- )
684
- from openai.types.chat import ChatCompletion
685
-
686
- completion = ChatCompletion.model_validate(completion_data)
687
-
688
- # Check for server tool calls
689
- server_tool_calls = []
690
- other_tool_calls = []
691
-
692
- tool_calls = completion.choices[0].message.tool_calls
693
- logger.info(
694
- f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}"
695
- )
696
-
697
- if tool_calls:
698
- for tc in tool_calls:
699
- func = getattr(tc, "function", None)
700
- func_name = func.name if func else ""
701
- logger.info(f" Tool call: {func_name}")
702
-
703
- # Generate Anthropic-style ID for server tools
704
- is_server = handler.is_server_tool_call(
705
- {
706
- "id": tc.id,
707
- "function": {"name": func_name, "arguments": ""},
708
- }
709
- )
710
-
711
- # Use Anthropic-style ID for server tools, original ID otherwise
712
- client_tool_id = (
713
- _generate_server_tool_id() if is_server else tc.id
714
- )
715
-
716
- tc_dict = {
717
- "id": client_tool_id,
718
- "openai_id": tc.id,
719
- "function": {
720
- "name": func_name,
721
- "arguments": func.arguments if func else "{}",
722
- },
723
- }
724
- logger.info(
725
- f" Is server tool: {is_server}, ID: {client_tool_id}"
726
- )
727
- if is_server:
728
- server_tool_calls.append(tc_dict)
729
- else:
730
- other_tool_calls.append(tc)
731
-
732
- # No server tool calls - we're done
733
- logger.info(
734
- f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}"
735
- )
736
- if not server_tool_calls:
737
- message = convert_openai_to_anthropic(completion, model)
738
-
739
- if accumulated_content:
740
- message_dict = message.model_dump()
741
- message_dict["content"] = (
742
- accumulated_content + message_dict.get("content", [])
743
- )
744
-
745
- if message_dict.get("usage"):
746
- message_dict["usage"]["server_tool_use"] = handler.usage
747
- message_dict["usage"] = _normalize_usage(
748
- message_dict.get("usage")
749
- )
750
-
751
- # Log full response for debugging
752
- logger.info(
753
- f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}"
754
- )
755
- logger.info(f"Response usage: {message_dict.get('usage')}")
756
- logger.info(f"Server tool use count: {handler.usage}")
757
-
758
- return JSONResponse(content=message_dict)
759
-
760
- message_dict = message.model_dump()
761
- message_dict["usage"] = _normalize_usage(message_dict.get("usage"))
762
- return JSONResponse(content=message_dict)
763
-
764
- # Check max_uses limit
765
- if total_tool_calls >= max_uses:
766
- logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
767
- # Return error for each call
768
- for call in server_tool_calls:
769
- func_name = call.get("function", {}).get("name", "")
770
- tool_class = handler.server_tools.get(func_name)
771
- if tool_class:
772
- from local_openai2anthropic.server_tools import ToolResult
773
-
774
- error_result = ToolResult(
775
- success=False,
776
- content=[],
777
- error_code="max_uses_exceeded",
778
- )
779
- error_blocks = tool_class.build_content_blocks(
780
- call["id"],
781
- {},
782
- error_result,
783
- )
784
- accumulated_content.extend(error_blocks)
785
-
786
- # Continue with modified messages
787
- assistant_tool_calls = []
788
- for call in server_tool_calls:
789
- assistant_tool_calls.append(
790
- {
791
- "id": call.get("openai_id", call.get("id", "")),
792
- "type": "function",
793
- "function": {
794
- "name": call.get("function", {}).get("name", ""),
795
- "arguments": call.get("function", {}).get(
796
- "arguments", "{}"
797
- ),
798
- },
799
- }
800
- )
801
- messages = params.get("messages", [])
802
- messages = _add_tool_results_to_messages(
803
- messages, assistant_tool_calls, handler, is_error=True
804
- )
805
- params["messages"] = messages
806
- continue
807
-
808
- # Execute server tools
809
- messages = params.get("messages", [])
810
- assistant_tool_calls = []
811
- tool_results = []
812
-
813
- for call in server_tool_calls:
814
- total_tool_calls += 1
815
- content_blocks, tool_result = await handler.execute_tool(call)
816
- accumulated_content.extend(content_blocks)
817
-
818
- # Track for assistant message
819
- assistant_tool_calls.append(
820
- {
821
- "id": call.get("openai_id", call.get("id", "")),
822
- "type": "function",
823
- "function": {
824
- "name": call["function"]["name"],
825
- "arguments": call["function"]["arguments"],
826
- },
827
- }
828
- )
829
- tool_results.append(tool_result)
830
-
831
- # Add to messages for next iteration
832
- messages = _add_tool_results_to_messages(
833
- messages, assistant_tool_calls, handler, tool_results=tool_results
834
- )
835
- params["messages"] = messages
836
-
837
- except httpx.TimeoutException:
838
- error_response = AnthropicErrorResponse(
839
- error=AnthropicError(
840
- type="timeout_error", message="Request timed out"
841
- )
842
- )
843
- return JSONResponse(
844
- status_code=HTTPStatus.GATEWAY_TIMEOUT,
845
- content=error_response.model_dump(),
846
- )
847
- except httpx.RequestError as e:
848
- error_response = AnthropicErrorResponse(
849
- error=AnthropicError(type="connection_error", message=str(e))
850
- )
851
- return JSONResponse(
852
- status_code=HTTPStatus.BAD_GATEWAY,
853
- content=error_response.model_dump(),
854
- )
855
-
856
-
857
- def _add_tool_results_to_messages(
858
- messages: list[dict[str, Any]],
859
- tool_calls: list[dict[str, Any]],
860
- handler: ServerToolHandler,
861
- tool_results: list[dict[str, Any]] | None = None,
862
- is_error: bool = False,
863
- ) -> list[dict[str, Any]]:
864
- """Add assistant tool call and results to messages."""
865
- messages = list(messages)
866
-
867
- # Add assistant message with tool calls
868
- # SGLang requires content to be a string, not None
869
- assistant_msg: dict[str, Any] = {
870
- "role": "assistant",
871
- "content": "", # Empty string instead of None for SGLang compatibility
872
- "tool_calls": tool_calls,
873
- }
874
- messages.append(assistant_msg)
875
-
876
- # Add tool results
877
- if is_error:
878
- for call in tool_calls:
879
- tool_call_id = call.get("openai_id", call.get("id", ""))
880
- messages.append(
881
- {
882
- "role": "tool",
883
- "tool_call_id": tool_call_id,
884
- "content": json.dumps(
885
- {
886
- "error": "max_uses_exceeded",
887
- "message": "Maximum tool uses exceeded.",
888
- }
889
- ),
890
- }
891
- )
892
- elif tool_results:
893
- messages.extend(tool_results)
894
-
895
- return messages
896
-
897
-
898
78
  @router.post(
899
79
  "/v1/messages",
900
80
  response_model=Message,