local-openai2anthropic 0.1.0__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/__main__.py +7 -0
- local_openai2anthropic/config.py +132 -18
- local_openai2anthropic/converter.py +107 -250
- local_openai2anthropic/daemon.py +382 -0
- local_openai2anthropic/daemon_runner.py +116 -0
- local_openai2anthropic/main.py +256 -33
- local_openai2anthropic/openai_types.py +149 -0
- local_openai2anthropic/protocol.py +1 -1
- local_openai2anthropic/router.py +211 -520
- local_openai2anthropic/streaming/__init__.py +6 -0
- local_openai2anthropic/streaming/handler.py +444 -0
- local_openai2anthropic/tools/__init__.py +14 -0
- local_openai2anthropic/tools/handler.py +357 -0
- local_openai2anthropic/utils/__init__.py +18 -0
- local_openai2anthropic/utils/tokens.py +96 -0
- local_openai2anthropic-0.3.6.dist-info/METADATA +374 -0
- local_openai2anthropic-0.3.6.dist-info/RECORD +25 -0
- local_openai2anthropic-0.1.0.dist-info/METADATA +0 -689
- local_openai2anthropic-0.1.0.dist-info/RECORD +0 -15
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/licenses/LICENSE +0 -0
local_openai2anthropic/router.py
CHANGED
|
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import secrets
|
|
9
|
-
import string
|
|
10
8
|
from http import HTTPStatus
|
|
11
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, cast
|
|
12
10
|
|
|
13
11
|
import httpx
|
|
14
12
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
|
|
|
26
24
|
MessageCreateParams,
|
|
27
25
|
)
|
|
28
26
|
from local_openai2anthropic.server_tools import ServerToolRegistry
|
|
27
|
+
from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
|
|
28
|
+
from local_openai2anthropic.tools import (
|
|
29
|
+
ServerToolHandler,
|
|
30
|
+
_add_tool_results_to_messages,
|
|
31
|
+
_handle_with_server_tools,
|
|
32
|
+
)
|
|
33
|
+
from local_openai2anthropic.utils import (
|
|
34
|
+
_chunk_text,
|
|
35
|
+
_count_tokens,
|
|
36
|
+
_estimate_input_tokens,
|
|
37
|
+
_generate_server_tool_id,
|
|
38
|
+
_normalize_usage,
|
|
39
|
+
)
|
|
29
40
|
|
|
30
41
|
logger = logging.getLogger(__name__)
|
|
31
42
|
router = APIRouter()
|
|
32
43
|
|
|
44
|
+
# Backward compatibility: re-export functions used by tests
|
|
45
|
+
__all__ = [
|
|
46
|
+
"router",
|
|
47
|
+
"get_request_settings",
|
|
48
|
+
"create_message",
|
|
49
|
+
"list_models",
|
|
50
|
+
"count_tokens",
|
|
51
|
+
"health_check",
|
|
52
|
+
# Backward compatibility exports
|
|
53
|
+
"_stream_response",
|
|
54
|
+
"_convert_result_to_stream",
|
|
55
|
+
"ServerToolHandler",
|
|
56
|
+
"_handle_with_server_tools",
|
|
57
|
+
"_add_tool_results_to_messages",
|
|
58
|
+
"_generate_server_tool_id",
|
|
59
|
+
"_normalize_usage",
|
|
60
|
+
"_count_tokens",
|
|
61
|
+
"_chunk_text",
|
|
62
|
+
"_estimate_input_tokens",
|
|
63
|
+
]
|
|
64
|
+
|
|
33
65
|
|
|
34
66
|
def get_request_settings(request: Request) -> Settings:
|
|
35
67
|
"""Resolve Settings from the running app when available.
|
|
@@ -43,504 +75,6 @@ def get_request_settings(request: Request) -> Settings:
|
|
|
43
75
|
return get_settings()
|
|
44
76
|
|
|
45
77
|
|
|
46
|
-
def _generate_server_tool_id() -> str:
|
|
47
|
-
"""Generate Anthropic-style server tool use ID (srvtoolu_...)."""
|
|
48
|
-
# Generate 24 random alphanumeric characters
|
|
49
|
-
chars = string.ascii_lowercase + string.digits
|
|
50
|
-
random_part = ''.join(secrets.choice(chars) for _ in range(24))
|
|
51
|
-
return f"srvtoolu_{random_part}"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
async def _stream_response(
|
|
55
|
-
client: httpx.AsyncClient,
|
|
56
|
-
url: str,
|
|
57
|
-
headers: dict,
|
|
58
|
-
json_data: dict,
|
|
59
|
-
model: str,
|
|
60
|
-
) -> AsyncGenerator[str, None]:
|
|
61
|
-
"""
|
|
62
|
-
Stream response from OpenAI and convert to Anthropic format.
|
|
63
|
-
"""
|
|
64
|
-
try:
|
|
65
|
-
async with client.stream("POST", url, headers=headers, json=json_data) as response:
|
|
66
|
-
if response.status_code != 200:
|
|
67
|
-
error_body = await response.aread()
|
|
68
|
-
try:
|
|
69
|
-
error_json = json.loads(error_body.decode())
|
|
70
|
-
error_msg = error_json.get("error", {}).get("message", error_body.decode())
|
|
71
|
-
except json.JSONDecodeError:
|
|
72
|
-
error_msg = error_body.decode()
|
|
73
|
-
|
|
74
|
-
error_event = AnthropicErrorResponse(
|
|
75
|
-
error=AnthropicError(type="api_error", message=error_msg)
|
|
76
|
-
)
|
|
77
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
78
|
-
yield "data: [DONE]\n\n"
|
|
79
|
-
return
|
|
80
|
-
|
|
81
|
-
# Process SSE stream
|
|
82
|
-
first_chunk = True
|
|
83
|
-
content_block_started = False
|
|
84
|
-
content_block_index = 0
|
|
85
|
-
finish_reason = None
|
|
86
|
-
input_tokens = 0
|
|
87
|
-
output_tokens = 0
|
|
88
|
-
message_id = None
|
|
89
|
-
|
|
90
|
-
async for line in response.aiter_lines():
|
|
91
|
-
if not line.startswith("data: "):
|
|
92
|
-
continue
|
|
93
|
-
|
|
94
|
-
data = line[6:]
|
|
95
|
-
if data == "[DONE]":
|
|
96
|
-
break
|
|
97
|
-
|
|
98
|
-
try:
|
|
99
|
-
chunk = json.loads(data)
|
|
100
|
-
except json.JSONDecodeError:
|
|
101
|
-
continue
|
|
102
|
-
|
|
103
|
-
# First chunk: message_start
|
|
104
|
-
if first_chunk:
|
|
105
|
-
message_id = chunk.get("id", "")
|
|
106
|
-
usage = chunk.get("usage", {})
|
|
107
|
-
input_tokens = usage.get("prompt_tokens", 0)
|
|
108
|
-
|
|
109
|
-
start_event = {
|
|
110
|
-
"type": "message_start",
|
|
111
|
-
"message": {
|
|
112
|
-
"id": message_id,
|
|
113
|
-
"type": "message",
|
|
114
|
-
"role": "assistant",
|
|
115
|
-
"content": [],
|
|
116
|
-
"model": model,
|
|
117
|
-
"stop_reason": None,
|
|
118
|
-
"stop_sequence": None,
|
|
119
|
-
"usage": {
|
|
120
|
-
"input_tokens": input_tokens,
|
|
121
|
-
"output_tokens": 0,
|
|
122
|
-
"cache_creation_input_tokens": None,
|
|
123
|
-
"cache_read_input_tokens": None,
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
}
|
|
127
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
128
|
-
first_chunk = False
|
|
129
|
-
continue
|
|
130
|
-
|
|
131
|
-
# Handle usage-only chunks
|
|
132
|
-
if not chunk.get("choices"):
|
|
133
|
-
usage = chunk.get("usage", {})
|
|
134
|
-
if usage:
|
|
135
|
-
if content_block_started:
|
|
136
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
137
|
-
content_block_started = False
|
|
138
|
-
|
|
139
|
-
stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
|
|
140
|
-
yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}})}\n\n"
|
|
141
|
-
continue
|
|
142
|
-
|
|
143
|
-
choice = chunk["choices"][0]
|
|
144
|
-
delta = choice.get("delta", {})
|
|
145
|
-
|
|
146
|
-
# Track finish reason
|
|
147
|
-
if choice.get("finish_reason"):
|
|
148
|
-
finish_reason = choice["finish_reason"]
|
|
149
|
-
continue
|
|
150
|
-
|
|
151
|
-
# Handle content
|
|
152
|
-
if delta.get("content"):
|
|
153
|
-
if not content_block_started:
|
|
154
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
|
155
|
-
content_block_started = True
|
|
156
|
-
|
|
157
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}})}\n\n"
|
|
158
|
-
|
|
159
|
-
# Handle tool calls
|
|
160
|
-
if delta.get("tool_calls"):
|
|
161
|
-
tool_call = delta["tool_calls"][0]
|
|
162
|
-
|
|
163
|
-
if tool_call.get("id"):
|
|
164
|
-
if content_block_started:
|
|
165
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
166
|
-
content_block_started = False
|
|
167
|
-
content_block_index += 1
|
|
168
|
-
|
|
169
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': tool_call.get('function', {}).get('name', ''), 'input': {}}})}\n\n"
|
|
170
|
-
content_block_started = True
|
|
171
|
-
|
|
172
|
-
elif tool_call.get("function", {}).get("arguments"):
|
|
173
|
-
args = tool_call["function"]["arguments"]
|
|
174
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
175
|
-
|
|
176
|
-
# Close final content block
|
|
177
|
-
if content_block_started:
|
|
178
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
179
|
-
|
|
180
|
-
# Message stop
|
|
181
|
-
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
182
|
-
yield "data: [DONE]\n\n"
|
|
183
|
-
|
|
184
|
-
except Exception as e:
|
|
185
|
-
error_event = AnthropicErrorResponse(
|
|
186
|
-
error=AnthropicError(type="internal_error", message=str(e))
|
|
187
|
-
)
|
|
188
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
189
|
-
yield "data: [DONE]\n\n"
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
async def _convert_result_to_stream(
|
|
193
|
-
result: JSONResponse,
|
|
194
|
-
model: str,
|
|
195
|
-
) -> AsyncGenerator[str, None]:
|
|
196
|
-
"""Convert a JSONResponse to streaming SSE format."""
|
|
197
|
-
import time
|
|
198
|
-
|
|
199
|
-
body = json.loads(result.body)
|
|
200
|
-
message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
|
|
201
|
-
content = body.get("content", [])
|
|
202
|
-
usage = body.get("usage", {})
|
|
203
|
-
stop_reason = body.get("stop_reason", "end_turn")
|
|
204
|
-
|
|
205
|
-
# Map stop_reason
|
|
206
|
-
stop_reason_map = {"end_turn": "stop", "max_tokens": "length", "tool_use": "tool_calls"}
|
|
207
|
-
openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
|
|
208
|
-
|
|
209
|
-
# 1. message_start event
|
|
210
|
-
start_event = {
|
|
211
|
-
"type": "message_start",
|
|
212
|
-
"message": {
|
|
213
|
-
"id": message_id,
|
|
214
|
-
"type": "message",
|
|
215
|
-
"role": "assistant",
|
|
216
|
-
"content": [],
|
|
217
|
-
"model": model,
|
|
218
|
-
"stop_reason": None,
|
|
219
|
-
"stop_sequence": None,
|
|
220
|
-
"usage": {
|
|
221
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
222
|
-
"output_tokens": 0,
|
|
223
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
224
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
225
|
-
},
|
|
226
|
-
},
|
|
227
|
-
}
|
|
228
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
229
|
-
|
|
230
|
-
# 2. Process content blocks
|
|
231
|
-
for i, block in enumerate(content):
|
|
232
|
-
block_type = block.get("type")
|
|
233
|
-
|
|
234
|
-
if block_type == "text":
|
|
235
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
|
236
|
-
text = block.get("text", "")
|
|
237
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
|
|
238
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
239
|
-
|
|
240
|
-
elif block_type == "tool_use":
|
|
241
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
242
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
243
|
-
|
|
244
|
-
elif block_type == "server_tool_use":
|
|
245
|
-
# Preserve official Anthropic block type so clients can count server tool uses.
|
|
246
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
247
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
248
|
-
|
|
249
|
-
elif block_type == "web_search_tool_result":
|
|
250
|
-
# Stream the tool result as its own content block.
|
|
251
|
-
# Some clients expect `results`, others expect `content`; include both when possible.
|
|
252
|
-
tool_result_block = dict(block)
|
|
253
|
-
if "content" not in tool_result_block and "results" in tool_result_block:
|
|
254
|
-
tool_result_block["content"] = tool_result_block["results"]
|
|
255
|
-
|
|
256
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
257
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
258
|
-
|
|
259
|
-
# 3. message_delta with final usage
|
|
260
|
-
delta_event = {
|
|
261
|
-
"type": "message_delta",
|
|
262
|
-
"delta": {"stop_reason": stop_reason},
|
|
263
|
-
"usage": {
|
|
264
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
265
|
-
"output_tokens": usage.get("output_tokens", 0),
|
|
266
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
267
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
268
|
-
"server_tool_use": usage.get("server_tool_use"),
|
|
269
|
-
},
|
|
270
|
-
}
|
|
271
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
272
|
-
|
|
273
|
-
# 4. message_stop
|
|
274
|
-
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
275
|
-
yield "data: [DONE]\n\n"
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
class ServerToolHandler:
|
|
279
|
-
"""Handles server tool execution for non-streaming requests."""
|
|
280
|
-
|
|
281
|
-
def __init__(
|
|
282
|
-
self,
|
|
283
|
-
server_tools: list[type],
|
|
284
|
-
configs: dict[str, dict[str, Any]],
|
|
285
|
-
settings: Settings,
|
|
286
|
-
):
|
|
287
|
-
self.server_tools = {t.tool_name: t for t in server_tools}
|
|
288
|
-
self.configs = configs
|
|
289
|
-
self.settings = settings
|
|
290
|
-
self.usage: dict[str, int] = {}
|
|
291
|
-
|
|
292
|
-
def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
|
|
293
|
-
"""Check if a tool call is for a server tool."""
|
|
294
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
295
|
-
return func_name in self.server_tools
|
|
296
|
-
|
|
297
|
-
async def execute_tool(
|
|
298
|
-
self,
|
|
299
|
-
tool_call: dict[str, Any],
|
|
300
|
-
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
301
|
-
"""
|
|
302
|
-
Execute a server tool and return content blocks + tool result message.
|
|
303
|
-
Returns: (content_blocks, tool_result_message)
|
|
304
|
-
"""
|
|
305
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
306
|
-
call_id = tool_call.get("id", "")
|
|
307
|
-
|
|
308
|
-
tool_class = self.server_tools[func_name]
|
|
309
|
-
config = self.configs.get(tool_class.tool_type, {})
|
|
310
|
-
|
|
311
|
-
# Extract call arguments
|
|
312
|
-
args = tool_class.extract_call_args(tool_call)
|
|
313
|
-
if args is None:
|
|
314
|
-
args = {}
|
|
315
|
-
|
|
316
|
-
# Execute the tool
|
|
317
|
-
result = await tool_class.execute(call_id, args, config, self.settings)
|
|
318
|
-
|
|
319
|
-
# Update usage
|
|
320
|
-
for key, value in result.usage_increment.items():
|
|
321
|
-
self.usage[key] = self.usage.get(key, 0) + value
|
|
322
|
-
|
|
323
|
-
# Build content blocks
|
|
324
|
-
content_blocks = tool_class.build_content_blocks(call_id, args, result)
|
|
325
|
-
|
|
326
|
-
# Build tool result message for OpenAI
|
|
327
|
-
tool_result_msg = tool_class.build_tool_result_message(call_id, args, result)
|
|
328
|
-
|
|
329
|
-
return content_blocks, tool_result_msg
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
async def _handle_with_server_tools(
|
|
333
|
-
openai_params: dict[str, Any],
|
|
334
|
-
url: str,
|
|
335
|
-
headers: dict[str, str],
|
|
336
|
-
settings: Settings,
|
|
337
|
-
server_tools: list[type],
|
|
338
|
-
model: str,
|
|
339
|
-
) -> JSONResponse:
|
|
340
|
-
"""Handle request with server tool execution loop."""
|
|
341
|
-
params = dict(openai_params)
|
|
342
|
-
configs = params.pop("_server_tools_config", {})
|
|
343
|
-
|
|
344
|
-
handler = ServerToolHandler(server_tools, configs, settings)
|
|
345
|
-
accumulated_content: list[dict[str, Any]] = []
|
|
346
|
-
|
|
347
|
-
# Get max_uses from configs (default to settings or 5)
|
|
348
|
-
max_uses = settings.websearch_max_uses
|
|
349
|
-
for config in configs.values():
|
|
350
|
-
if config.get("max_uses"):
|
|
351
|
-
max_uses = config["max_uses"]
|
|
352
|
-
break
|
|
353
|
-
|
|
354
|
-
total_tool_calls = 0
|
|
355
|
-
|
|
356
|
-
while True:
|
|
357
|
-
async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
|
|
358
|
-
try:
|
|
359
|
-
# Log full request for debugging
|
|
360
|
-
logger.info(f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}")
|
|
361
|
-
|
|
362
|
-
response = await client.post(url, headers=headers, json=params)
|
|
363
|
-
|
|
364
|
-
if response.status_code != 200:
|
|
365
|
-
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
|
|
366
|
-
error_response = AnthropicErrorResponse(
|
|
367
|
-
error=AnthropicError(type="api_error", message=response.text)
|
|
368
|
-
)
|
|
369
|
-
return JSONResponse(
|
|
370
|
-
status_code=response.status_code,
|
|
371
|
-
content=error_response.model_dump(),
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
completion_data = response.json()
|
|
375
|
-
logger.info(f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}...")
|
|
376
|
-
from openai.types.chat import ChatCompletion
|
|
377
|
-
completion = ChatCompletion.model_validate(completion_data)
|
|
378
|
-
|
|
379
|
-
# Check for server tool calls
|
|
380
|
-
server_tool_calls = []
|
|
381
|
-
other_tool_calls = []
|
|
382
|
-
|
|
383
|
-
tool_calls = completion.choices[0].message.tool_calls
|
|
384
|
-
logger.info(f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}")
|
|
385
|
-
|
|
386
|
-
if tool_calls:
|
|
387
|
-
for tc in tool_calls:
|
|
388
|
-
func_name = tc.function.name if tc.function else ""
|
|
389
|
-
logger.info(f" Tool call: {func_name}")
|
|
390
|
-
|
|
391
|
-
# Generate Anthropic-style ID for server tools
|
|
392
|
-
is_server = handler.is_server_tool_call({
|
|
393
|
-
"id": tc.id,
|
|
394
|
-
"function": {"name": func_name, "arguments": ""},
|
|
395
|
-
})
|
|
396
|
-
|
|
397
|
-
# Use Anthropic-style ID for server tools, original ID otherwise
|
|
398
|
-
tool_id = _generate_server_tool_id() if is_server else tc.id
|
|
399
|
-
|
|
400
|
-
tc_dict = {
|
|
401
|
-
"id": tool_id,
|
|
402
|
-
"function": {
|
|
403
|
-
"name": func_name,
|
|
404
|
-
"arguments": tc.function.arguments if tc.function else "{}",
|
|
405
|
-
},
|
|
406
|
-
}
|
|
407
|
-
logger.info(f" Is server tool: {is_server}, ID: {tool_id}")
|
|
408
|
-
if is_server:
|
|
409
|
-
server_tool_calls.append(tc_dict)
|
|
410
|
-
else:
|
|
411
|
-
other_tool_calls.append(tc)
|
|
412
|
-
|
|
413
|
-
# No server tool calls - we're done
|
|
414
|
-
logger.info(f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}")
|
|
415
|
-
if not server_tool_calls:
|
|
416
|
-
message = convert_openai_to_anthropic(completion, model)
|
|
417
|
-
|
|
418
|
-
if accumulated_content:
|
|
419
|
-
message_dict = message.model_dump()
|
|
420
|
-
message_dict["content"] = accumulated_content + message_dict.get("content", [])
|
|
421
|
-
|
|
422
|
-
if message_dict.get("usage"):
|
|
423
|
-
message_dict["usage"]["server_tool_use"] = handler.usage
|
|
424
|
-
|
|
425
|
-
# Log full response for debugging
|
|
426
|
-
logger.info(f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}")
|
|
427
|
-
logger.info(f"Response usage: {message_dict.get('usage')}")
|
|
428
|
-
logger.info(f"Server tool use count: {handler.usage}")
|
|
429
|
-
|
|
430
|
-
return JSONResponse(content=message_dict)
|
|
431
|
-
|
|
432
|
-
return JSONResponse(content=message.model_dump())
|
|
433
|
-
|
|
434
|
-
# Check max_uses limit
|
|
435
|
-
if total_tool_calls >= max_uses:
|
|
436
|
-
logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
|
|
437
|
-
# Return error for each call
|
|
438
|
-
for call in server_tool_calls:
|
|
439
|
-
func_name = call.get("function", {}).get("name", "")
|
|
440
|
-
tool_class = handler.server_tools.get(func_name)
|
|
441
|
-
if tool_class:
|
|
442
|
-
from local_openai2anthropic.server_tools import ToolResult
|
|
443
|
-
error_result = ToolResult(
|
|
444
|
-
success=False,
|
|
445
|
-
content=[],
|
|
446
|
-
error_code="max_uses_exceeded",
|
|
447
|
-
)
|
|
448
|
-
error_blocks = tool_class.build_content_blocks(
|
|
449
|
-
call["id"],
|
|
450
|
-
{},
|
|
451
|
-
error_result,
|
|
452
|
-
)
|
|
453
|
-
accumulated_content.extend(error_blocks)
|
|
454
|
-
|
|
455
|
-
# Continue with modified messages
|
|
456
|
-
messages = params.get("messages", [])
|
|
457
|
-
messages = _add_tool_results_to_messages(
|
|
458
|
-
messages, server_tool_calls, handler, is_error=True
|
|
459
|
-
)
|
|
460
|
-
params["messages"] = messages
|
|
461
|
-
continue
|
|
462
|
-
|
|
463
|
-
# Execute server tools
|
|
464
|
-
messages = params.get("messages", [])
|
|
465
|
-
assistant_tool_calls = []
|
|
466
|
-
tool_results = []
|
|
467
|
-
|
|
468
|
-
for call in server_tool_calls:
|
|
469
|
-
total_tool_calls += 1
|
|
470
|
-
content_blocks, tool_result = await handler.execute_tool(call)
|
|
471
|
-
accumulated_content.extend(content_blocks)
|
|
472
|
-
|
|
473
|
-
# Track for assistant message
|
|
474
|
-
assistant_tool_calls.append({
|
|
475
|
-
"id": call["id"],
|
|
476
|
-
"type": "function",
|
|
477
|
-
"function": {
|
|
478
|
-
"name": call["function"]["name"],
|
|
479
|
-
"arguments": call["function"]["arguments"],
|
|
480
|
-
},
|
|
481
|
-
})
|
|
482
|
-
tool_results.append(tool_result)
|
|
483
|
-
|
|
484
|
-
# Add to messages for next iteration
|
|
485
|
-
messages = _add_tool_results_to_messages(
|
|
486
|
-
messages, assistant_tool_calls, handler, tool_results=tool_results
|
|
487
|
-
)
|
|
488
|
-
params["messages"] = messages
|
|
489
|
-
|
|
490
|
-
except httpx.TimeoutException:
|
|
491
|
-
error_response = AnthropicErrorResponse(
|
|
492
|
-
error=AnthropicError(type="timeout_error", message="Request timed out")
|
|
493
|
-
)
|
|
494
|
-
raise HTTPException(
|
|
495
|
-
status_code=HTTPStatus.GATEWAY_TIMEOUT,
|
|
496
|
-
detail=error_response.model_dump(),
|
|
497
|
-
)
|
|
498
|
-
except httpx.RequestError as e:
|
|
499
|
-
error_response = AnthropicErrorResponse(
|
|
500
|
-
error=AnthropicError(type="connection_error", message=str(e))
|
|
501
|
-
)
|
|
502
|
-
raise HTTPException(
|
|
503
|
-
status_code=HTTPStatus.BAD_GATEWAY,
|
|
504
|
-
detail=error_response.model_dump(),
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
def _add_tool_results_to_messages(
|
|
509
|
-
messages: list[dict[str, Any]],
|
|
510
|
-
tool_calls: list[dict[str, Any]],
|
|
511
|
-
handler: ServerToolHandler,
|
|
512
|
-
tool_results: list[dict[str, Any]] | None = None,
|
|
513
|
-
is_error: bool = False,
|
|
514
|
-
) -> list[dict[str, Any]]:
|
|
515
|
-
"""Add assistant tool call and results to messages."""
|
|
516
|
-
messages = list(messages)
|
|
517
|
-
|
|
518
|
-
# Add assistant message with tool calls
|
|
519
|
-
# SGLang requires content to be a string, not None
|
|
520
|
-
assistant_msg: dict[str, Any] = {
|
|
521
|
-
"role": "assistant",
|
|
522
|
-
"content": "", # Empty string instead of None for SGLang compatibility
|
|
523
|
-
"tool_calls": tool_calls,
|
|
524
|
-
}
|
|
525
|
-
messages.append(assistant_msg)
|
|
526
|
-
|
|
527
|
-
# Add tool results
|
|
528
|
-
if is_error:
|
|
529
|
-
for call in tool_calls:
|
|
530
|
-
messages.append({
|
|
531
|
-
"role": "tool",
|
|
532
|
-
"tool_call_id": call["id"],
|
|
533
|
-
"content": json.dumps({
|
|
534
|
-
"error": "max_uses_exceeded",
|
|
535
|
-
"message": "Maximum tool uses exceeded.",
|
|
536
|
-
}),
|
|
537
|
-
})
|
|
538
|
-
elif tool_results:
|
|
539
|
-
messages.extend(tool_results)
|
|
540
|
-
|
|
541
|
-
return messages
|
|
542
|
-
|
|
543
|
-
|
|
544
78
|
@router.post(
|
|
545
79
|
"/v1/messages",
|
|
546
80
|
response_model=Message,
|
|
@@ -562,13 +96,18 @@ async def create_message(
|
|
|
562
96
|
try:
|
|
563
97
|
body_bytes = await request.body()
|
|
564
98
|
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
99
|
+
logger.debug(
|
|
100
|
+
f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
|
|
101
|
+
)
|
|
565
102
|
anthropic_params = body_json
|
|
566
103
|
except json.JSONDecodeError as e:
|
|
567
104
|
logger.error(f"Invalid JSON in request body: {e}")
|
|
568
105
|
error_response = AnthropicErrorResponse(
|
|
569
|
-
error=AnthropicError(
|
|
106
|
+
error=AnthropicError(
|
|
107
|
+
type="invalid_request_error", message=f"Invalid JSON: {e}"
|
|
108
|
+
)
|
|
570
109
|
)
|
|
571
|
-
return JSONResponse(status_code=
|
|
110
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
572
111
|
except Exception as e:
|
|
573
112
|
logger.error(f"Failed to parse request body: {e}")
|
|
574
113
|
error_response = AnthropicErrorResponse(
|
|
@@ -579,30 +118,40 @@ async def create_message(
|
|
|
579
118
|
# Validate request shape early (avoid making upstream calls for obviously invalid requests)
|
|
580
119
|
if not isinstance(anthropic_params, dict):
|
|
581
120
|
error_response = AnthropicErrorResponse(
|
|
582
|
-
error=AnthropicError(
|
|
121
|
+
error=AnthropicError(
|
|
122
|
+
type="invalid_request_error",
|
|
123
|
+
message="Request body must be a JSON object",
|
|
124
|
+
)
|
|
583
125
|
)
|
|
584
|
-
return JSONResponse(status_code=
|
|
126
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
585
127
|
|
|
586
128
|
model_value = anthropic_params.get("model")
|
|
587
129
|
if not isinstance(model_value, str) or not model_value.strip():
|
|
588
130
|
error_response = AnthropicErrorResponse(
|
|
589
|
-
error=AnthropicError(
|
|
131
|
+
error=AnthropicError(
|
|
132
|
+
type="invalid_request_error", message="Model must be a non-empty string"
|
|
133
|
+
)
|
|
590
134
|
)
|
|
591
|
-
return JSONResponse(status_code=
|
|
135
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
592
136
|
|
|
593
137
|
messages_value = anthropic_params.get("messages")
|
|
594
138
|
if not isinstance(messages_value, list) or len(messages_value) == 0:
|
|
595
139
|
error_response = AnthropicErrorResponse(
|
|
596
|
-
error=AnthropicError(
|
|
140
|
+
error=AnthropicError(
|
|
141
|
+
type="invalid_request_error",
|
|
142
|
+
message="Messages must be a non-empty list",
|
|
143
|
+
)
|
|
597
144
|
)
|
|
598
|
-
return JSONResponse(status_code=
|
|
145
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
599
146
|
|
|
600
147
|
max_tokens_value = anthropic_params.get("max_tokens")
|
|
601
148
|
if not isinstance(max_tokens_value, int):
|
|
602
149
|
error_response = AnthropicErrorResponse(
|
|
603
|
-
error=AnthropicError(
|
|
150
|
+
error=AnthropicError(
|
|
151
|
+
type="invalid_request_error", message="max_tokens is required"
|
|
152
|
+
)
|
|
604
153
|
)
|
|
605
|
-
return JSONResponse(status_code=
|
|
154
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
606
155
|
|
|
607
156
|
# Check for server tools
|
|
608
157
|
tools = anthropic_params.get("tools", [])
|
|
@@ -614,11 +163,17 @@ async def create_message(
|
|
|
614
163
|
|
|
615
164
|
# Convert Anthropic params to OpenAI params
|
|
616
165
|
openai_params_obj = convert_anthropic_to_openai(
|
|
617
|
-
anthropic_params,
|
|
166
|
+
cast(MessageCreateParams, anthropic_params),
|
|
618
167
|
enabled_server_tools=enabled_server_tools if has_server_tools else None,
|
|
619
168
|
)
|
|
620
169
|
openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
|
|
621
170
|
|
|
171
|
+
# Log converted OpenAI request (remove internal fields)
|
|
172
|
+
log_params = {k: v for k, v in openai_params.items() if not k.startswith("_")}
|
|
173
|
+
logger.debug(
|
|
174
|
+
f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}"
|
|
175
|
+
)
|
|
176
|
+
|
|
622
177
|
stream = openai_params.get("stream", False)
|
|
623
178
|
model = openai_params.get("model", "")
|
|
624
179
|
|
|
@@ -644,7 +199,7 @@ async def create_message(
|
|
|
644
199
|
result = await _handle_with_server_tools(
|
|
645
200
|
openai_params, url, headers, settings, tool_classes, model
|
|
646
201
|
)
|
|
647
|
-
|
|
202
|
+
|
|
648
203
|
# If original request was streaming, convert result to streaming format
|
|
649
204
|
if stream:
|
|
650
205
|
return StreamingResponse(
|
|
@@ -665,8 +220,23 @@ async def create_message(
|
|
|
665
220
|
response = await client.post(url, headers=headers, json=openai_params)
|
|
666
221
|
|
|
667
222
|
if response.status_code != 200:
|
|
223
|
+
raw_text = response.text
|
|
224
|
+
try:
|
|
225
|
+
if not raw_text:
|
|
226
|
+
raw_text = response.content.decode(
|
|
227
|
+
"utf-8", errors="replace"
|
|
228
|
+
)
|
|
229
|
+
except Exception:
|
|
230
|
+
raw_text = ""
|
|
231
|
+
if not raw_text:
|
|
232
|
+
raw_text = response.reason_phrase or ""
|
|
233
|
+
error_message = (raw_text or "").strip()
|
|
668
234
|
error_response = AnthropicErrorResponse(
|
|
669
|
-
error=AnthropicError(
|
|
235
|
+
error=AnthropicError(
|
|
236
|
+
type="api_error",
|
|
237
|
+
message=error_message
|
|
238
|
+
or f"Upstream API error ({response.status_code})",
|
|
239
|
+
)
|
|
670
240
|
)
|
|
671
241
|
return JSONResponse(
|
|
672
242
|
status_code=response.status_code,
|
|
@@ -674,27 +244,42 @@ async def create_message(
|
|
|
674
244
|
)
|
|
675
245
|
|
|
676
246
|
openai_completion = response.json()
|
|
247
|
+
logger.debug(
|
|
248
|
+
f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}"
|
|
249
|
+
)
|
|
250
|
+
|
|
677
251
|
from openai.types.chat import ChatCompletion
|
|
252
|
+
|
|
678
253
|
completion = ChatCompletion.model_validate(openai_completion)
|
|
679
254
|
anthropic_message = convert_openai_to_anthropic(completion, model)
|
|
680
255
|
|
|
681
|
-
|
|
256
|
+
anthropic_response = anthropic_message.model_dump()
|
|
257
|
+
anthropic_response["usage"] = _normalize_usage(
|
|
258
|
+
anthropic_response.get("usage")
|
|
259
|
+
)
|
|
260
|
+
logger.debug(
|
|
261
|
+
f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return JSONResponse(content=anthropic_response)
|
|
682
265
|
|
|
683
266
|
except httpx.TimeoutException:
|
|
684
267
|
error_response = AnthropicErrorResponse(
|
|
685
|
-
error=AnthropicError(
|
|
268
|
+
error=AnthropicError(
|
|
269
|
+
type="timeout_error", message="Request timed out"
|
|
270
|
+
)
|
|
686
271
|
)
|
|
687
|
-
|
|
272
|
+
return JSONResponse(
|
|
688
273
|
status_code=HTTPStatus.GATEWAY_TIMEOUT,
|
|
689
|
-
|
|
274
|
+
content=error_response.model_dump(),
|
|
690
275
|
)
|
|
691
276
|
except httpx.RequestError as e:
|
|
692
277
|
error_response = AnthropicErrorResponse(
|
|
693
278
|
error=AnthropicError(type="connection_error", message=str(e))
|
|
694
279
|
)
|
|
695
|
-
|
|
280
|
+
return JSONResponse(
|
|
696
281
|
status_code=HTTPStatus.BAD_GATEWAY,
|
|
697
|
-
|
|
282
|
+
content=error_response.model_dump(),
|
|
698
283
|
)
|
|
699
284
|
|
|
700
285
|
|
|
@@ -727,6 +312,112 @@ async def list_models(
|
|
|
727
312
|
)
|
|
728
313
|
|
|
729
314
|
|
|
315
|
+
@router.post("/v1/messages/count_tokens")
|
|
316
|
+
async def count_tokens(
|
|
317
|
+
request: Request,
|
|
318
|
+
settings: Settings = Depends(get_request_settings),
|
|
319
|
+
) -> JSONResponse:
|
|
320
|
+
"""
|
|
321
|
+
Count tokens in messages without creating a message.
|
|
322
|
+
Uses tiktoken for local token counting.
|
|
323
|
+
"""
|
|
324
|
+
try:
|
|
325
|
+
body_bytes = await request.body()
|
|
326
|
+
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
327
|
+
logger.debug(
|
|
328
|
+
f"[Count Tokens Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}"
|
|
329
|
+
)
|
|
330
|
+
except json.JSONDecodeError as e:
|
|
331
|
+
error_response = AnthropicErrorResponse(
|
|
332
|
+
error=AnthropicError(
|
|
333
|
+
type="invalid_request_error", message=f"Invalid JSON: {e}"
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
337
|
+
except Exception as e:
|
|
338
|
+
error_response = AnthropicErrorResponse(
|
|
339
|
+
error=AnthropicError(type="invalid_request_error", message=str(e))
|
|
340
|
+
)
|
|
341
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
342
|
+
|
|
343
|
+
# Validate required fields
|
|
344
|
+
if not isinstance(body_json, dict):
|
|
345
|
+
error_response = AnthropicErrorResponse(
|
|
346
|
+
error=AnthropicError(
|
|
347
|
+
type="invalid_request_error",
|
|
348
|
+
message="Request body must be a JSON object",
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
352
|
+
|
|
353
|
+
messages = body_json.get("messages", [])
|
|
354
|
+
if not isinstance(messages, list):
|
|
355
|
+
error_response = AnthropicErrorResponse(
|
|
356
|
+
error=AnthropicError(
|
|
357
|
+
type="invalid_request_error", message="messages must be a list"
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
361
|
+
|
|
362
|
+
model = body_json.get("model", "")
|
|
363
|
+
system = body_json.get("system")
|
|
364
|
+
tools = body_json.get("tools", [])
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
# Use tiktoken for token counting
|
|
368
|
+
import tiktoken # type: ignore[import-not-found]
|
|
369
|
+
|
|
370
|
+
# Map model names to tiktoken encoding
|
|
371
|
+
# Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation
|
|
372
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
373
|
+
|
|
374
|
+
total_tokens = 0
|
|
375
|
+
|
|
376
|
+
# Count system prompt tokens if present
|
|
377
|
+
if system:
|
|
378
|
+
if isinstance(system, str):
|
|
379
|
+
total_tokens += len(encoding.encode(system))
|
|
380
|
+
elif isinstance(system, list):
|
|
381
|
+
for block in system:
|
|
382
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
383
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
384
|
+
|
|
385
|
+
# Count message tokens
|
|
386
|
+
for msg in messages:
|
|
387
|
+
content = msg.get("content", "")
|
|
388
|
+
if isinstance(content, str):
|
|
389
|
+
total_tokens += len(encoding.encode(content))
|
|
390
|
+
elif isinstance(content, list):
|
|
391
|
+
for block in content:
|
|
392
|
+
if isinstance(block, dict):
|
|
393
|
+
if block.get("type") == "text":
|
|
394
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
395
|
+
elif block.get("type") == "image":
|
|
396
|
+
# Images are typically counted as a fixed number of tokens
|
|
397
|
+
# This is an approximation
|
|
398
|
+
total_tokens += 85 # Standard approximation for images
|
|
399
|
+
|
|
400
|
+
# Count tool definitions tokens
|
|
401
|
+
if tools:
|
|
402
|
+
for tool in tools:
|
|
403
|
+
tool_def = tool if isinstance(tool, dict) else tool.model_dump()
|
|
404
|
+
# Rough approximation for tool definitions
|
|
405
|
+
total_tokens += len(encoding.encode(json.dumps(tool_def)))
|
|
406
|
+
|
|
407
|
+
logger.debug(f"[Count Tokens Response] input_tokens: {total_tokens}")
|
|
408
|
+
|
|
409
|
+
return JSONResponse(content={"input_tokens": total_tokens})
|
|
410
|
+
|
|
411
|
+
except Exception as e:
|
|
412
|
+
logger.error(f"Token counting error: {e}")
|
|
413
|
+
error_response = AnthropicErrorResponse(
|
|
414
|
+
error=AnthropicError(
|
|
415
|
+
type="internal_error", message=f"Failed to count tokens: {str(e)}"
|
|
416
|
+
)
|
|
417
|
+
)
|
|
418
|
+
return JSONResponse(status_code=500, content=error_response.model_dump())
|
|
419
|
+
|
|
420
|
+
|
|
730
421
|
@router.get("/health")
|
|
731
422
|
async def health_check() -> dict[str, str]:
|
|
732
423
|
"""Health check endpoint."""
|