local-openai2anthropic 0.2.3__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/config.py +132 -18
- local_openai2anthropic/converter.py +82 -60
- local_openai2anthropic/main.py +83 -12
- local_openai2anthropic/protocol.py +1 -1
- local_openai2anthropic/router.py +208 -576
- local_openai2anthropic/streaming/__init__.py +6 -0
- local_openai2anthropic/streaming/handler.py +444 -0
- local_openai2anthropic/tools/__init__.py +14 -0
- local_openai2anthropic/tools/handler.py +357 -0
- local_openai2anthropic/utils/__init__.py +18 -0
- local_openai2anthropic/utils/tokens.py +96 -0
- {local_openai2anthropic-0.2.3.dist-info → local_openai2anthropic-0.3.6.dist-info}/METADATA +51 -28
- local_openai2anthropic-0.3.6.dist-info/RECORD +25 -0
- local_openai2anthropic-0.2.3.dist-info/RECORD +0 -19
- {local_openai2anthropic-0.2.3.dist-info → local_openai2anthropic-0.3.6.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.2.3.dist-info → local_openai2anthropic-0.3.6.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.2.3.dist-info → local_openai2anthropic-0.3.6.dist-info}/licenses/LICENSE +0 -0
local_openai2anthropic/router.py
CHANGED
|
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import secrets
|
|
9
|
-
import string
|
|
10
8
|
from http import HTTPStatus
|
|
11
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, cast
|
|
12
10
|
|
|
13
11
|
import httpx
|
|
14
12
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
|
|
|
26
24
|
MessageCreateParams,
|
|
27
25
|
)
|
|
28
26
|
from local_openai2anthropic.server_tools import ServerToolRegistry
|
|
27
|
+
from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
|
|
28
|
+
from local_openai2anthropic.tools import (
|
|
29
|
+
ServerToolHandler,
|
|
30
|
+
_add_tool_results_to_messages,
|
|
31
|
+
_handle_with_server_tools,
|
|
32
|
+
)
|
|
33
|
+
from local_openai2anthropic.utils import (
|
|
34
|
+
_chunk_text,
|
|
35
|
+
_count_tokens,
|
|
36
|
+
_estimate_input_tokens,
|
|
37
|
+
_generate_server_tool_id,
|
|
38
|
+
_normalize_usage,
|
|
39
|
+
)
|
|
29
40
|
|
|
30
41
|
logger = logging.getLogger(__name__)
|
|
31
42
|
router = APIRouter()
|
|
32
43
|
|
|
44
|
+
# Backward compatibility: re-export functions used by tests
|
|
45
|
+
__all__ = [
|
|
46
|
+
"router",
|
|
47
|
+
"get_request_settings",
|
|
48
|
+
"create_message",
|
|
49
|
+
"list_models",
|
|
50
|
+
"count_tokens",
|
|
51
|
+
"health_check",
|
|
52
|
+
# Backward compatibility exports
|
|
53
|
+
"_stream_response",
|
|
54
|
+
"_convert_result_to_stream",
|
|
55
|
+
"ServerToolHandler",
|
|
56
|
+
"_handle_with_server_tools",
|
|
57
|
+
"_add_tool_results_to_messages",
|
|
58
|
+
"_generate_server_tool_id",
|
|
59
|
+
"_normalize_usage",
|
|
60
|
+
"_count_tokens",
|
|
61
|
+
"_chunk_text",
|
|
62
|
+
"_estimate_input_tokens",
|
|
63
|
+
]
|
|
64
|
+
|
|
33
65
|
|
|
34
66
|
def get_request_settings(request: Request) -> Settings:
|
|
35
67
|
"""Resolve Settings from the running app when available.
|
|
@@ -43,553 +75,6 @@ def get_request_settings(request: Request) -> Settings:
|
|
|
43
75
|
return get_settings()
|
|
44
76
|
|
|
45
77
|
|
|
46
|
-
def _generate_server_tool_id() -> str:
|
|
47
|
-
"""Generate Anthropic-style server tool use ID (srvtoolu_...)."""
|
|
48
|
-
# Generate 24 random alphanumeric characters
|
|
49
|
-
chars = string.ascii_lowercase + string.digits
|
|
50
|
-
random_part = ''.join(secrets.choice(chars) for _ in range(24))
|
|
51
|
-
return f"srvtoolu_{random_part}"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
async def _stream_response(
|
|
55
|
-
client: httpx.AsyncClient,
|
|
56
|
-
url: str,
|
|
57
|
-
headers: dict,
|
|
58
|
-
json_data: dict,
|
|
59
|
-
model: str,
|
|
60
|
-
) -> AsyncGenerator[str, None]:
|
|
61
|
-
"""
|
|
62
|
-
Stream response from OpenAI and convert to Anthropic format.
|
|
63
|
-
"""
|
|
64
|
-
try:
|
|
65
|
-
async with client.stream("POST", url, headers=headers, json=json_data) as response:
|
|
66
|
-
if response.status_code != 200:
|
|
67
|
-
error_body = await response.aread()
|
|
68
|
-
try:
|
|
69
|
-
error_json = json.loads(error_body.decode())
|
|
70
|
-
error_msg = error_json.get("error", {}).get("message", error_body.decode())
|
|
71
|
-
except json.JSONDecodeError:
|
|
72
|
-
error_msg = error_body.decode()
|
|
73
|
-
|
|
74
|
-
error_event = AnthropicErrorResponse(
|
|
75
|
-
error=AnthropicError(type="api_error", message=error_msg)
|
|
76
|
-
)
|
|
77
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
78
|
-
yield "data: [DONE]\n\n"
|
|
79
|
-
return
|
|
80
|
-
|
|
81
|
-
# Process SSE stream
|
|
82
|
-
first_chunk = True
|
|
83
|
-
content_block_started = False
|
|
84
|
-
content_block_index = 0
|
|
85
|
-
current_block_type = None # 'thinking', 'text', or 'tool_use'
|
|
86
|
-
finish_reason = None
|
|
87
|
-
input_tokens = 0
|
|
88
|
-
output_tokens = 0
|
|
89
|
-
message_id = None
|
|
90
|
-
|
|
91
|
-
async for line in response.aiter_lines():
|
|
92
|
-
if not line.startswith("data: "):
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
data = line[6:]
|
|
96
|
-
if data == "[DONE]":
|
|
97
|
-
break
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
chunk = json.loads(data)
|
|
101
|
-
logger.debug(f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}")
|
|
102
|
-
except json.JSONDecodeError:
|
|
103
|
-
continue
|
|
104
|
-
|
|
105
|
-
# First chunk: message_start
|
|
106
|
-
if first_chunk:
|
|
107
|
-
message_id = chunk.get("id", "")
|
|
108
|
-
usage = chunk.get("usage") or {}
|
|
109
|
-
input_tokens = usage.get("prompt_tokens", 0)
|
|
110
|
-
|
|
111
|
-
start_event = {
|
|
112
|
-
"type": "message_start",
|
|
113
|
-
"message": {
|
|
114
|
-
"id": message_id,
|
|
115
|
-
"type": "message",
|
|
116
|
-
"role": "assistant",
|
|
117
|
-
"content": [],
|
|
118
|
-
"model": model,
|
|
119
|
-
"stop_reason": None,
|
|
120
|
-
"stop_sequence": None,
|
|
121
|
-
"usage": {
|
|
122
|
-
"input_tokens": input_tokens,
|
|
123
|
-
"output_tokens": 0,
|
|
124
|
-
"cache_creation_input_tokens": None,
|
|
125
|
-
"cache_read_input_tokens": None,
|
|
126
|
-
},
|
|
127
|
-
},
|
|
128
|
-
}
|
|
129
|
-
logger.debug(f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}")
|
|
130
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
131
|
-
first_chunk = False
|
|
132
|
-
continue
|
|
133
|
-
|
|
134
|
-
# Handle usage-only chunks
|
|
135
|
-
if not chunk.get("choices"):
|
|
136
|
-
usage = chunk.get("usage") or {}
|
|
137
|
-
if usage:
|
|
138
|
-
if content_block_started:
|
|
139
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
140
|
-
content_block_started = False
|
|
141
|
-
|
|
142
|
-
stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
|
|
143
|
-
delta_event = {'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}}
|
|
144
|
-
logger.debug(f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}")
|
|
145
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
146
|
-
continue
|
|
147
|
-
|
|
148
|
-
choice = chunk["choices"][0]
|
|
149
|
-
delta = choice.get("delta", {})
|
|
150
|
-
|
|
151
|
-
# Track finish reason (but don't skip - content may also be present)
|
|
152
|
-
if choice.get("finish_reason"):
|
|
153
|
-
finish_reason = choice["finish_reason"]
|
|
154
|
-
|
|
155
|
-
# Handle reasoning content (thinking)
|
|
156
|
-
if delta.get("reasoning_content"):
|
|
157
|
-
reasoning = delta["reasoning_content"]
|
|
158
|
-
# Start thinking content block if not already started
|
|
159
|
-
if not content_block_started or current_block_type != 'thinking':
|
|
160
|
-
# Close previous block if exists
|
|
161
|
-
if content_block_started:
|
|
162
|
-
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
163
|
-
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
164
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
165
|
-
content_block_index += 1
|
|
166
|
-
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'thinking', 'thinking': ''}}
|
|
167
|
-
logger.debug(f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}")
|
|
168
|
-
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
169
|
-
content_block_started = True
|
|
170
|
-
current_block_type = 'thinking'
|
|
171
|
-
|
|
172
|
-
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'thinking_delta', 'thinking': reasoning}}
|
|
173
|
-
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
174
|
-
continue
|
|
175
|
-
|
|
176
|
-
# Handle content
|
|
177
|
-
if delta.get("content"):
|
|
178
|
-
if not content_block_started or current_block_type != 'text':
|
|
179
|
-
# Close previous block if exists
|
|
180
|
-
if content_block_started:
|
|
181
|
-
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
182
|
-
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
183
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
184
|
-
content_block_index += 1
|
|
185
|
-
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}}
|
|
186
|
-
logger.debug(f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}")
|
|
187
|
-
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
188
|
-
content_block_started = True
|
|
189
|
-
current_block_type = 'text'
|
|
190
|
-
|
|
191
|
-
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}}
|
|
192
|
-
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
193
|
-
|
|
194
|
-
# Handle tool calls
|
|
195
|
-
if delta.get("tool_calls"):
|
|
196
|
-
tool_call = delta["tool_calls"][0]
|
|
197
|
-
|
|
198
|
-
if tool_call.get("id"):
|
|
199
|
-
if content_block_started:
|
|
200
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
201
|
-
content_block_started = False
|
|
202
|
-
content_block_index += 1
|
|
203
|
-
|
|
204
|
-
func = tool_call.get('function') or {}
|
|
205
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
206
|
-
content_block_started = True
|
|
207
|
-
current_block_type = 'tool_use'
|
|
208
|
-
|
|
209
|
-
elif (tool_call.get('function') or {}).get("arguments"):
|
|
210
|
-
args = (tool_call.get('function') or {}).get("arguments", "")
|
|
211
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
212
|
-
|
|
213
|
-
# Close final content block
|
|
214
|
-
if content_block_started:
|
|
215
|
-
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
216
|
-
logger.debug(f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
217
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
218
|
-
|
|
219
|
-
# Message stop
|
|
220
|
-
stop_event = {'type': 'message_stop'}
|
|
221
|
-
logger.debug(f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}")
|
|
222
|
-
yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
|
|
223
|
-
|
|
224
|
-
except Exception as e:
|
|
225
|
-
import traceback
|
|
226
|
-
error_msg = f"{str(e)}\n{traceback.format_exc()}"
|
|
227
|
-
logger.error(f"Stream error: {error_msg}")
|
|
228
|
-
error_event = AnthropicErrorResponse(
|
|
229
|
-
error=AnthropicError(type="internal_error", message=str(e))
|
|
230
|
-
)
|
|
231
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
async def _convert_result_to_stream(
|
|
235
|
-
result: JSONResponse,
|
|
236
|
-
model: str,
|
|
237
|
-
) -> AsyncGenerator[str, None]:
|
|
238
|
-
"""Convert a JSONResponse to streaming SSE format."""
|
|
239
|
-
import time
|
|
240
|
-
|
|
241
|
-
body = json.loads(result.body)
|
|
242
|
-
message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
|
|
243
|
-
content = body.get("content", [])
|
|
244
|
-
usage = body.get("usage", {})
|
|
245
|
-
stop_reason = body.get("stop_reason", "end_turn")
|
|
246
|
-
|
|
247
|
-
# Map stop_reason
|
|
248
|
-
stop_reason_map = {"end_turn": "stop", "max_tokens": "length", "tool_use": "tool_calls"}
|
|
249
|
-
openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
|
|
250
|
-
|
|
251
|
-
# 1. message_start event
|
|
252
|
-
start_event = {
|
|
253
|
-
"type": "message_start",
|
|
254
|
-
"message": {
|
|
255
|
-
"id": message_id,
|
|
256
|
-
"type": "message",
|
|
257
|
-
"role": "assistant",
|
|
258
|
-
"content": [],
|
|
259
|
-
"model": model,
|
|
260
|
-
"stop_reason": None,
|
|
261
|
-
"stop_sequence": None,
|
|
262
|
-
"usage": {
|
|
263
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
264
|
-
"output_tokens": 0,
|
|
265
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
266
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
267
|
-
},
|
|
268
|
-
},
|
|
269
|
-
}
|
|
270
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
271
|
-
|
|
272
|
-
# 2. Process content blocks
|
|
273
|
-
for i, block in enumerate(content):
|
|
274
|
-
block_type = block.get("type")
|
|
275
|
-
|
|
276
|
-
if block_type == "text":
|
|
277
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
|
278
|
-
text = block.get("text", "")
|
|
279
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
|
|
280
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
281
|
-
|
|
282
|
-
elif block_type == "tool_use":
|
|
283
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
284
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
285
|
-
|
|
286
|
-
elif block_type == "server_tool_use":
|
|
287
|
-
# Preserve official Anthropic block type so clients can count server tool uses.
|
|
288
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
289
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
290
|
-
|
|
291
|
-
elif block_type == "web_search_tool_result":
|
|
292
|
-
# Stream the tool result as its own content block.
|
|
293
|
-
# Some clients expect `results`, others expect `content`; include both when possible.
|
|
294
|
-
tool_result_block = dict(block)
|
|
295
|
-
if "content" not in tool_result_block and "results" in tool_result_block:
|
|
296
|
-
tool_result_block["content"] = tool_result_block["results"]
|
|
297
|
-
|
|
298
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
299
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
300
|
-
|
|
301
|
-
elif block_type == "thinking":
|
|
302
|
-
# Handle thinking blocks (BetaThinkingBlock)
|
|
303
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
|
|
304
|
-
thinking_text = block.get("thinking", "")
|
|
305
|
-
if thinking_text:
|
|
306
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
|
|
307
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
308
|
-
|
|
309
|
-
# 3. message_delta with final usage
|
|
310
|
-
delta_event = {
|
|
311
|
-
"type": "message_delta",
|
|
312
|
-
"delta": {"stop_reason": stop_reason},
|
|
313
|
-
"usage": {
|
|
314
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
315
|
-
"output_tokens": usage.get("output_tokens", 0),
|
|
316
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
317
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
318
|
-
"server_tool_use": usage.get("server_tool_use"),
|
|
319
|
-
},
|
|
320
|
-
}
|
|
321
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
322
|
-
|
|
323
|
-
# 4. message_stop
|
|
324
|
-
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
class ServerToolHandler:
|
|
328
|
-
"""Handles server tool execution for non-streaming requests."""
|
|
329
|
-
|
|
330
|
-
def __init__(
|
|
331
|
-
self,
|
|
332
|
-
server_tools: list[type],
|
|
333
|
-
configs: dict[str, dict[str, Any]],
|
|
334
|
-
settings: Settings,
|
|
335
|
-
):
|
|
336
|
-
self.server_tools = {t.tool_name: t for t in server_tools}
|
|
337
|
-
self.configs = configs
|
|
338
|
-
self.settings = settings
|
|
339
|
-
self.usage: dict[str, int] = {}
|
|
340
|
-
|
|
341
|
-
def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
|
|
342
|
-
"""Check if a tool call is for a server tool."""
|
|
343
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
344
|
-
return func_name in self.server_tools
|
|
345
|
-
|
|
346
|
-
async def execute_tool(
|
|
347
|
-
self,
|
|
348
|
-
tool_call: dict[str, Any],
|
|
349
|
-
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
350
|
-
"""
|
|
351
|
-
Execute a server tool and return content blocks + tool result message.
|
|
352
|
-
Returns: (content_blocks, tool_result_message)
|
|
353
|
-
"""
|
|
354
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
355
|
-
call_id = tool_call.get("id", "")
|
|
356
|
-
|
|
357
|
-
tool_class = self.server_tools[func_name]
|
|
358
|
-
config = self.configs.get(tool_class.tool_type, {})
|
|
359
|
-
|
|
360
|
-
# Extract call arguments
|
|
361
|
-
args = tool_class.extract_call_args(tool_call)
|
|
362
|
-
if args is None:
|
|
363
|
-
args = {}
|
|
364
|
-
|
|
365
|
-
# Execute the tool
|
|
366
|
-
result = await tool_class.execute(call_id, args, config, self.settings)
|
|
367
|
-
|
|
368
|
-
# Update usage
|
|
369
|
-
for key, value in result.usage_increment.items():
|
|
370
|
-
self.usage[key] = self.usage.get(key, 0) + value
|
|
371
|
-
|
|
372
|
-
# Build content blocks
|
|
373
|
-
content_blocks = tool_class.build_content_blocks(call_id, args, result)
|
|
374
|
-
|
|
375
|
-
# Build tool result message for OpenAI
|
|
376
|
-
tool_result_msg = tool_class.build_tool_result_message(call_id, args, result)
|
|
377
|
-
|
|
378
|
-
return content_blocks, tool_result_msg
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
async def _handle_with_server_tools(
|
|
382
|
-
openai_params: dict[str, Any],
|
|
383
|
-
url: str,
|
|
384
|
-
headers: dict[str, str],
|
|
385
|
-
settings: Settings,
|
|
386
|
-
server_tools: list[type],
|
|
387
|
-
model: str,
|
|
388
|
-
) -> JSONResponse:
|
|
389
|
-
"""Handle request with server tool execution loop."""
|
|
390
|
-
params = dict(openai_params)
|
|
391
|
-
configs = params.pop("_server_tools_config", {})
|
|
392
|
-
|
|
393
|
-
handler = ServerToolHandler(server_tools, configs, settings)
|
|
394
|
-
accumulated_content: list[dict[str, Any]] = []
|
|
395
|
-
|
|
396
|
-
# Get max_uses from configs (default to settings or 5)
|
|
397
|
-
max_uses = settings.websearch_max_uses
|
|
398
|
-
for config in configs.values():
|
|
399
|
-
if config.get("max_uses"):
|
|
400
|
-
max_uses = config["max_uses"]
|
|
401
|
-
break
|
|
402
|
-
|
|
403
|
-
total_tool_calls = 0
|
|
404
|
-
|
|
405
|
-
while True:
|
|
406
|
-
async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
|
|
407
|
-
try:
|
|
408
|
-
# Log full request for debugging
|
|
409
|
-
logger.info(f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}")
|
|
410
|
-
|
|
411
|
-
response = await client.post(url, headers=headers, json=params)
|
|
412
|
-
|
|
413
|
-
if response.status_code != 200:
|
|
414
|
-
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
|
|
415
|
-
error_response = AnthropicErrorResponse(
|
|
416
|
-
error=AnthropicError(type="api_error", message=response.text)
|
|
417
|
-
)
|
|
418
|
-
return JSONResponse(
|
|
419
|
-
status_code=response.status_code,
|
|
420
|
-
content=error_response.model_dump(),
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
completion_data = response.json()
|
|
424
|
-
logger.info(f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}...")
|
|
425
|
-
from openai.types.chat import ChatCompletion
|
|
426
|
-
completion = ChatCompletion.model_validate(completion_data)
|
|
427
|
-
|
|
428
|
-
# Check for server tool calls
|
|
429
|
-
server_tool_calls = []
|
|
430
|
-
other_tool_calls = []
|
|
431
|
-
|
|
432
|
-
tool_calls = completion.choices[0].message.tool_calls
|
|
433
|
-
logger.info(f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}")
|
|
434
|
-
|
|
435
|
-
if tool_calls:
|
|
436
|
-
for tc in tool_calls:
|
|
437
|
-
func_name = tc.function.name if tc.function else ""
|
|
438
|
-
logger.info(f" Tool call: {func_name}")
|
|
439
|
-
|
|
440
|
-
# Generate Anthropic-style ID for server tools
|
|
441
|
-
is_server = handler.is_server_tool_call({
|
|
442
|
-
"id": tc.id,
|
|
443
|
-
"function": {"name": func_name, "arguments": ""},
|
|
444
|
-
})
|
|
445
|
-
|
|
446
|
-
# Use Anthropic-style ID for server tools, original ID otherwise
|
|
447
|
-
tool_id = _generate_server_tool_id() if is_server else tc.id
|
|
448
|
-
|
|
449
|
-
tc_dict = {
|
|
450
|
-
"id": tool_id,
|
|
451
|
-
"function": {
|
|
452
|
-
"name": func_name,
|
|
453
|
-
"arguments": tc.function.arguments if tc.function else "{}",
|
|
454
|
-
},
|
|
455
|
-
}
|
|
456
|
-
logger.info(f" Is server tool: {is_server}, ID: {tool_id}")
|
|
457
|
-
if is_server:
|
|
458
|
-
server_tool_calls.append(tc_dict)
|
|
459
|
-
else:
|
|
460
|
-
other_tool_calls.append(tc)
|
|
461
|
-
|
|
462
|
-
# No server tool calls - we're done
|
|
463
|
-
logger.info(f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}")
|
|
464
|
-
if not server_tool_calls:
|
|
465
|
-
message = convert_openai_to_anthropic(completion, model)
|
|
466
|
-
|
|
467
|
-
if accumulated_content:
|
|
468
|
-
message_dict = message.model_dump()
|
|
469
|
-
message_dict["content"] = accumulated_content + message_dict.get("content", [])
|
|
470
|
-
|
|
471
|
-
if message_dict.get("usage"):
|
|
472
|
-
message_dict["usage"]["server_tool_use"] = handler.usage
|
|
473
|
-
|
|
474
|
-
# Log full response for debugging
|
|
475
|
-
logger.info(f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}")
|
|
476
|
-
logger.info(f"Response usage: {message_dict.get('usage')}")
|
|
477
|
-
logger.info(f"Server tool use count: {handler.usage}")
|
|
478
|
-
|
|
479
|
-
return JSONResponse(content=message_dict)
|
|
480
|
-
|
|
481
|
-
return JSONResponse(content=message.model_dump())
|
|
482
|
-
|
|
483
|
-
# Check max_uses limit
|
|
484
|
-
if total_tool_calls >= max_uses:
|
|
485
|
-
logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
|
|
486
|
-
# Return error for each call
|
|
487
|
-
for call in server_tool_calls:
|
|
488
|
-
func_name = call.get("function", {}).get("name", "")
|
|
489
|
-
tool_class = handler.server_tools.get(func_name)
|
|
490
|
-
if tool_class:
|
|
491
|
-
from local_openai2anthropic.server_tools import ToolResult
|
|
492
|
-
error_result = ToolResult(
|
|
493
|
-
success=False,
|
|
494
|
-
content=[],
|
|
495
|
-
error_code="max_uses_exceeded",
|
|
496
|
-
)
|
|
497
|
-
error_blocks = tool_class.build_content_blocks(
|
|
498
|
-
call["id"],
|
|
499
|
-
{},
|
|
500
|
-
error_result,
|
|
501
|
-
)
|
|
502
|
-
accumulated_content.extend(error_blocks)
|
|
503
|
-
|
|
504
|
-
# Continue with modified messages
|
|
505
|
-
messages = params.get("messages", [])
|
|
506
|
-
messages = _add_tool_results_to_messages(
|
|
507
|
-
messages, server_tool_calls, handler, is_error=True
|
|
508
|
-
)
|
|
509
|
-
params["messages"] = messages
|
|
510
|
-
continue
|
|
511
|
-
|
|
512
|
-
# Execute server tools
|
|
513
|
-
messages = params.get("messages", [])
|
|
514
|
-
assistant_tool_calls = []
|
|
515
|
-
tool_results = []
|
|
516
|
-
|
|
517
|
-
for call in server_tool_calls:
|
|
518
|
-
total_tool_calls += 1
|
|
519
|
-
content_blocks, tool_result = await handler.execute_tool(call)
|
|
520
|
-
accumulated_content.extend(content_blocks)
|
|
521
|
-
|
|
522
|
-
# Track for assistant message
|
|
523
|
-
assistant_tool_calls.append({
|
|
524
|
-
"id": call["id"],
|
|
525
|
-
"type": "function",
|
|
526
|
-
"function": {
|
|
527
|
-
"name": call["function"]["name"],
|
|
528
|
-
"arguments": call["function"]["arguments"],
|
|
529
|
-
},
|
|
530
|
-
})
|
|
531
|
-
tool_results.append(tool_result)
|
|
532
|
-
|
|
533
|
-
# Add to messages for next iteration
|
|
534
|
-
messages = _add_tool_results_to_messages(
|
|
535
|
-
messages, assistant_tool_calls, handler, tool_results=tool_results
|
|
536
|
-
)
|
|
537
|
-
params["messages"] = messages
|
|
538
|
-
|
|
539
|
-
except httpx.TimeoutException:
|
|
540
|
-
error_response = AnthropicErrorResponse(
|
|
541
|
-
error=AnthropicError(type="timeout_error", message="Request timed out")
|
|
542
|
-
)
|
|
543
|
-
raise HTTPException(
|
|
544
|
-
status_code=HTTPStatus.GATEWAY_TIMEOUT,
|
|
545
|
-
detail=error_response.model_dump(),
|
|
546
|
-
)
|
|
547
|
-
except httpx.RequestError as e:
|
|
548
|
-
error_response = AnthropicErrorResponse(
|
|
549
|
-
error=AnthropicError(type="connection_error", message=str(e))
|
|
550
|
-
)
|
|
551
|
-
raise HTTPException(
|
|
552
|
-
status_code=HTTPStatus.BAD_GATEWAY,
|
|
553
|
-
detail=error_response.model_dump(),
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
def _add_tool_results_to_messages(
|
|
558
|
-
messages: list[dict[str, Any]],
|
|
559
|
-
tool_calls: list[dict[str, Any]],
|
|
560
|
-
handler: ServerToolHandler,
|
|
561
|
-
tool_results: list[dict[str, Any]] | None = None,
|
|
562
|
-
is_error: bool = False,
|
|
563
|
-
) -> list[dict[str, Any]]:
|
|
564
|
-
"""Add assistant tool call and results to messages."""
|
|
565
|
-
messages = list(messages)
|
|
566
|
-
|
|
567
|
-
# Add assistant message with tool calls
|
|
568
|
-
# SGLang requires content to be a string, not None
|
|
569
|
-
assistant_msg: dict[str, Any] = {
|
|
570
|
-
"role": "assistant",
|
|
571
|
-
"content": "", # Empty string instead of None for SGLang compatibility
|
|
572
|
-
"tool_calls": tool_calls,
|
|
573
|
-
}
|
|
574
|
-
messages.append(assistant_msg)
|
|
575
|
-
|
|
576
|
-
# Add tool results
|
|
577
|
-
if is_error:
|
|
578
|
-
for call in tool_calls:
|
|
579
|
-
messages.append({
|
|
580
|
-
"role": "tool",
|
|
581
|
-
"tool_call_id": call["id"],
|
|
582
|
-
"content": json.dumps({
|
|
583
|
-
"error": "max_uses_exceeded",
|
|
584
|
-
"message": "Maximum tool uses exceeded.",
|
|
585
|
-
}),
|
|
586
|
-
})
|
|
587
|
-
elif tool_results:
|
|
588
|
-
messages.extend(tool_results)
|
|
589
|
-
|
|
590
|
-
return messages
|
|
591
|
-
|
|
592
|
-
|
|
593
78
|
@router.post(
|
|
594
79
|
"/v1/messages",
|
|
595
80
|
response_model=Message,
|
|
@@ -611,14 +96,18 @@ async def create_message(
|
|
|
611
96
|
try:
|
|
612
97
|
body_bytes = await request.body()
|
|
613
98
|
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
614
|
-
logger.debug(
|
|
99
|
+
logger.debug(
|
|
100
|
+
f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
|
|
101
|
+
)
|
|
615
102
|
anthropic_params = body_json
|
|
616
103
|
except json.JSONDecodeError as e:
|
|
617
104
|
logger.error(f"Invalid JSON in request body: {e}")
|
|
618
105
|
error_response = AnthropicErrorResponse(
|
|
619
|
-
error=AnthropicError(
|
|
106
|
+
error=AnthropicError(
|
|
107
|
+
type="invalid_request_error", message=f"Invalid JSON: {e}"
|
|
108
|
+
)
|
|
620
109
|
)
|
|
621
|
-
return JSONResponse(status_code=
|
|
110
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
622
111
|
except Exception as e:
|
|
623
112
|
logger.error(f"Failed to parse request body: {e}")
|
|
624
113
|
error_response = AnthropicErrorResponse(
|
|
@@ -629,30 +118,40 @@ async def create_message(
|
|
|
629
118
|
# Validate request shape early (avoid making upstream calls for obviously invalid requests)
|
|
630
119
|
if not isinstance(anthropic_params, dict):
|
|
631
120
|
error_response = AnthropicErrorResponse(
|
|
632
|
-
error=AnthropicError(
|
|
121
|
+
error=AnthropicError(
|
|
122
|
+
type="invalid_request_error",
|
|
123
|
+
message="Request body must be a JSON object",
|
|
124
|
+
)
|
|
633
125
|
)
|
|
634
|
-
return JSONResponse(status_code=
|
|
126
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
635
127
|
|
|
636
128
|
model_value = anthropic_params.get("model")
|
|
637
129
|
if not isinstance(model_value, str) or not model_value.strip():
|
|
638
130
|
error_response = AnthropicErrorResponse(
|
|
639
|
-
error=AnthropicError(
|
|
131
|
+
error=AnthropicError(
|
|
132
|
+
type="invalid_request_error", message="Model must be a non-empty string"
|
|
133
|
+
)
|
|
640
134
|
)
|
|
641
|
-
return JSONResponse(status_code=
|
|
135
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
642
136
|
|
|
643
137
|
messages_value = anthropic_params.get("messages")
|
|
644
138
|
if not isinstance(messages_value, list) or len(messages_value) == 0:
|
|
645
139
|
error_response = AnthropicErrorResponse(
|
|
646
|
-
error=AnthropicError(
|
|
140
|
+
error=AnthropicError(
|
|
141
|
+
type="invalid_request_error",
|
|
142
|
+
message="Messages must be a non-empty list",
|
|
143
|
+
)
|
|
647
144
|
)
|
|
648
|
-
return JSONResponse(status_code=
|
|
145
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
649
146
|
|
|
650
147
|
max_tokens_value = anthropic_params.get("max_tokens")
|
|
651
148
|
if not isinstance(max_tokens_value, int):
|
|
652
149
|
error_response = AnthropicErrorResponse(
|
|
653
|
-
error=AnthropicError(
|
|
150
|
+
error=AnthropicError(
|
|
151
|
+
type="invalid_request_error", message="max_tokens is required"
|
|
152
|
+
)
|
|
654
153
|
)
|
|
655
|
-
return JSONResponse(status_code=
|
|
154
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
656
155
|
|
|
657
156
|
# Check for server tools
|
|
658
157
|
tools = anthropic_params.get("tools", [])
|
|
@@ -664,14 +163,16 @@ async def create_message(
|
|
|
664
163
|
|
|
665
164
|
# Convert Anthropic params to OpenAI params
|
|
666
165
|
openai_params_obj = convert_anthropic_to_openai(
|
|
667
|
-
anthropic_params,
|
|
166
|
+
cast(MessageCreateParams, anthropic_params),
|
|
668
167
|
enabled_server_tools=enabled_server_tools if has_server_tools else None,
|
|
669
168
|
)
|
|
670
169
|
openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
|
|
671
|
-
|
|
170
|
+
|
|
672
171
|
# Log converted OpenAI request (remove internal fields)
|
|
673
|
-
log_params = {k: v for k, v in openai_params.items() if not k.startswith(
|
|
674
|
-
logger.debug(
|
|
172
|
+
log_params = {k: v for k, v in openai_params.items() if not k.startswith("_")}
|
|
173
|
+
logger.debug(
|
|
174
|
+
f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}"
|
|
175
|
+
)
|
|
675
176
|
|
|
676
177
|
stream = openai_params.get("stream", False)
|
|
677
178
|
model = openai_params.get("model", "")
|
|
@@ -698,7 +199,7 @@ async def create_message(
|
|
|
698
199
|
result = await _handle_with_server_tools(
|
|
699
200
|
openai_params, url, headers, settings, tool_classes, model
|
|
700
201
|
)
|
|
701
|
-
|
|
202
|
+
|
|
702
203
|
# If original request was streaming, convert result to streaming format
|
|
703
204
|
if stream:
|
|
704
205
|
return StreamingResponse(
|
|
@@ -719,8 +220,23 @@ async def create_message(
|
|
|
719
220
|
response = await client.post(url, headers=headers, json=openai_params)
|
|
720
221
|
|
|
721
222
|
if response.status_code != 200:
|
|
223
|
+
raw_text = response.text
|
|
224
|
+
try:
|
|
225
|
+
if not raw_text:
|
|
226
|
+
raw_text = response.content.decode(
|
|
227
|
+
"utf-8", errors="replace"
|
|
228
|
+
)
|
|
229
|
+
except Exception:
|
|
230
|
+
raw_text = ""
|
|
231
|
+
if not raw_text:
|
|
232
|
+
raw_text = response.reason_phrase or ""
|
|
233
|
+
error_message = (raw_text or "").strip()
|
|
722
234
|
error_response = AnthropicErrorResponse(
|
|
723
|
-
error=AnthropicError(
|
|
235
|
+
error=AnthropicError(
|
|
236
|
+
type="api_error",
|
|
237
|
+
message=error_message
|
|
238
|
+
or f"Upstream API error ({response.status_code})",
|
|
239
|
+
)
|
|
724
240
|
)
|
|
725
241
|
return JSONResponse(
|
|
726
242
|
status_code=response.status_code,
|
|
@@ -728,32 +244,42 @@ async def create_message(
|
|
|
728
244
|
)
|
|
729
245
|
|
|
730
246
|
openai_completion = response.json()
|
|
731
|
-
logger.debug(
|
|
732
|
-
|
|
247
|
+
logger.debug(
|
|
248
|
+
f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}"
|
|
249
|
+
)
|
|
250
|
+
|
|
733
251
|
from openai.types.chat import ChatCompletion
|
|
252
|
+
|
|
734
253
|
completion = ChatCompletion.model_validate(openai_completion)
|
|
735
254
|
anthropic_message = convert_openai_to_anthropic(completion, model)
|
|
736
|
-
|
|
255
|
+
|
|
737
256
|
anthropic_response = anthropic_message.model_dump()
|
|
738
|
-
|
|
257
|
+
anthropic_response["usage"] = _normalize_usage(
|
|
258
|
+
anthropic_response.get("usage")
|
|
259
|
+
)
|
|
260
|
+
logger.debug(
|
|
261
|
+
f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}"
|
|
262
|
+
)
|
|
739
263
|
|
|
740
264
|
return JSONResponse(content=anthropic_response)
|
|
741
265
|
|
|
742
266
|
except httpx.TimeoutException:
|
|
743
267
|
error_response = AnthropicErrorResponse(
|
|
744
|
-
error=AnthropicError(
|
|
268
|
+
error=AnthropicError(
|
|
269
|
+
type="timeout_error", message="Request timed out"
|
|
270
|
+
)
|
|
745
271
|
)
|
|
746
|
-
|
|
272
|
+
return JSONResponse(
|
|
747
273
|
status_code=HTTPStatus.GATEWAY_TIMEOUT,
|
|
748
|
-
|
|
274
|
+
content=error_response.model_dump(),
|
|
749
275
|
)
|
|
750
276
|
except httpx.RequestError as e:
|
|
751
277
|
error_response = AnthropicErrorResponse(
|
|
752
278
|
error=AnthropicError(type="connection_error", message=str(e))
|
|
753
279
|
)
|
|
754
|
-
|
|
280
|
+
return JSONResponse(
|
|
755
281
|
status_code=HTTPStatus.BAD_GATEWAY,
|
|
756
|
-
|
|
282
|
+
content=error_response.model_dump(),
|
|
757
283
|
)
|
|
758
284
|
|
|
759
285
|
|
|
@@ -786,6 +312,112 @@ async def list_models(
|
|
|
786
312
|
)
|
|
787
313
|
|
|
788
314
|
|
|
315
|
+
@router.post("/v1/messages/count_tokens")
|
|
316
|
+
async def count_tokens(
|
|
317
|
+
request: Request,
|
|
318
|
+
settings: Settings = Depends(get_request_settings),
|
|
319
|
+
) -> JSONResponse:
|
|
320
|
+
"""
|
|
321
|
+
Count tokens in messages without creating a message.
|
|
322
|
+
Uses tiktoken for local token counting.
|
|
323
|
+
"""
|
|
324
|
+
try:
|
|
325
|
+
body_bytes = await request.body()
|
|
326
|
+
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
327
|
+
logger.debug(
|
|
328
|
+
f"[Count Tokens Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
|
|
329
|
+
)
|
|
330
|
+
except json.JSONDecodeError as e:
|
|
331
|
+
error_response = AnthropicErrorResponse(
|
|
332
|
+
error=AnthropicError(
|
|
333
|
+
type="invalid_request_error", message=f"Invalid JSON: {e}"
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
337
|
+
except Exception as e:
|
|
338
|
+
error_response = AnthropicErrorResponse(
|
|
339
|
+
error=AnthropicError(type="invalid_request_error", message=str(e))
|
|
340
|
+
)
|
|
341
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
342
|
+
|
|
343
|
+
# Validate required fields
|
|
344
|
+
if not isinstance(body_json, dict):
|
|
345
|
+
error_response = AnthropicErrorResponse(
|
|
346
|
+
error=AnthropicError(
|
|
347
|
+
type="invalid_request_error",
|
|
348
|
+
message="Request body must be a JSON object",
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
352
|
+
|
|
353
|
+
messages = body_json.get("messages", [])
|
|
354
|
+
if not isinstance(messages, list):
|
|
355
|
+
error_response = AnthropicErrorResponse(
|
|
356
|
+
error=AnthropicError(
|
|
357
|
+
type="invalid_request_error", message="messages must be a list"
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
361
|
+
|
|
362
|
+
model = body_json.get("model", "")
|
|
363
|
+
system = body_json.get("system")
|
|
364
|
+
tools = body_json.get("tools", [])
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
# Use tiktoken for token counting
|
|
368
|
+
import tiktoken # type: ignore[import-not-found]
|
|
369
|
+
|
|
370
|
+
# Map model names to tiktoken encoding
|
|
371
|
+
# Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation
|
|
372
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
373
|
+
|
|
374
|
+
total_tokens = 0
|
|
375
|
+
|
|
376
|
+
# Count system prompt tokens if present
|
|
377
|
+
if system:
|
|
378
|
+
if isinstance(system, str):
|
|
379
|
+
total_tokens += len(encoding.encode(system))
|
|
380
|
+
elif isinstance(system, list):
|
|
381
|
+
for block in system:
|
|
382
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
383
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
384
|
+
|
|
385
|
+
# Count message tokens
|
|
386
|
+
for msg in messages:
|
|
387
|
+
content = msg.get("content", "")
|
|
388
|
+
if isinstance(content, str):
|
|
389
|
+
total_tokens += len(encoding.encode(content))
|
|
390
|
+
elif isinstance(content, list):
|
|
391
|
+
for block in content:
|
|
392
|
+
if isinstance(block, dict):
|
|
393
|
+
if block.get("type") == "text":
|
|
394
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
395
|
+
elif block.get("type") == "image":
|
|
396
|
+
# Images are typically counted as a fixed number of tokens
|
|
397
|
+
# This is an approximation
|
|
398
|
+
total_tokens += 85 # Standard approximation for images
|
|
399
|
+
|
|
400
|
+
# Count tool definitions tokens
|
|
401
|
+
if tools:
|
|
402
|
+
for tool in tools:
|
|
403
|
+
tool_def = tool if isinstance(tool, dict) else tool.model_dump()
|
|
404
|
+
# Rough approximation for tool definitions
|
|
405
|
+
total_tokens += len(encoding.encode(json.dumps(tool_def)))
|
|
406
|
+
|
|
407
|
+
logger.debug(f"[Count Tokens Response] input_tokens: {total_tokens}")
|
|
408
|
+
|
|
409
|
+
return JSONResponse(content={"input_tokens": total_tokens})
|
|
410
|
+
|
|
411
|
+
except Exception as e:
|
|
412
|
+
logger.error(f"Token counting error: {e}")
|
|
413
|
+
error_response = AnthropicErrorResponse(
|
|
414
|
+
error=AnthropicError(
|
|
415
|
+
type="internal_error", message=f"Failed to count tokens: {str(e)}"
|
|
416
|
+
)
|
|
417
|
+
)
|
|
418
|
+
return JSONResponse(status_code=500, content=error_response.model_dump())
|
|
419
|
+
|
|
420
|
+
|
|
789
421
|
@router.get("/health")
|
|
790
422
|
async def health_check() -> dict[str, str]:
|
|
791
423
|
"""Health check endpoint."""
|