local-openai2anthropic 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/config.py +1 -0
- local_openai2anthropic/converter.py +8 -4
- local_openai2anthropic/main.py +75 -6
- local_openai2anthropic/protocol.py +1 -1
- local_openai2anthropic/router.py +35 -840
- local_openai2anthropic/streaming/__init__.py +6 -0
- local_openai2anthropic/streaming/handler.py +444 -0
- local_openai2anthropic/tools/__init__.py +14 -0
- local_openai2anthropic/tools/handler.py +357 -0
- local_openai2anthropic/utils/__init__.py +18 -0
- local_openai2anthropic/utils/tokens.py +96 -0
- {local_openai2anthropic-0.3.3.dist-info → local_openai2anthropic-0.3.5.dist-info}/METADATA +1 -1
- local_openai2anthropic-0.3.5.dist-info/RECORD +25 -0
- local_openai2anthropic-0.3.3.dist-info/RECORD +0 -19
- {local_openai2anthropic-0.3.3.dist-info → local_openai2anthropic-0.3.5.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.3.3.dist-info → local_openai2anthropic-0.3.5.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.3.3.dist-info → local_openai2anthropic-0.3.5.dist-info}/licenses/LICENSE +0 -0
local_openai2anthropic/router.py
CHANGED
|
@@ -5,10 +5,8 @@ FastAPI router for Anthropic-compatible Messages API.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import secrets
|
|
9
|
-
import string
|
|
10
8
|
from http import HTTPStatus
|
|
11
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, cast
|
|
12
10
|
|
|
13
11
|
import httpx
|
|
14
12
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
@@ -26,10 +24,44 @@ from local_openai2anthropic.protocol import (
|
|
|
26
24
|
MessageCreateParams,
|
|
27
25
|
)
|
|
28
26
|
from local_openai2anthropic.server_tools import ServerToolRegistry
|
|
27
|
+
from local_openai2anthropic.streaming import _convert_result_to_stream, _stream_response
|
|
28
|
+
from local_openai2anthropic.tools import (
|
|
29
|
+
ServerToolHandler,
|
|
30
|
+
_add_tool_results_to_messages,
|
|
31
|
+
_handle_with_server_tools,
|
|
32
|
+
)
|
|
33
|
+
from local_openai2anthropic.utils import (
|
|
34
|
+
_chunk_text,
|
|
35
|
+
_count_tokens,
|
|
36
|
+
_estimate_input_tokens,
|
|
37
|
+
_generate_server_tool_id,
|
|
38
|
+
_normalize_usage,
|
|
39
|
+
)
|
|
29
40
|
|
|
30
41
|
logger = logging.getLogger(__name__)
|
|
31
42
|
router = APIRouter()
|
|
32
43
|
|
|
44
|
+
# Backward compatibility: re-export functions used by tests
|
|
45
|
+
__all__ = [
|
|
46
|
+
"router",
|
|
47
|
+
"get_request_settings",
|
|
48
|
+
"create_message",
|
|
49
|
+
"list_models",
|
|
50
|
+
"count_tokens",
|
|
51
|
+
"health_check",
|
|
52
|
+
# Backward compatibility exports
|
|
53
|
+
"_stream_response",
|
|
54
|
+
"_convert_result_to_stream",
|
|
55
|
+
"ServerToolHandler",
|
|
56
|
+
"_handle_with_server_tools",
|
|
57
|
+
"_add_tool_results_to_messages",
|
|
58
|
+
"_generate_server_tool_id",
|
|
59
|
+
"_normalize_usage",
|
|
60
|
+
"_count_tokens",
|
|
61
|
+
"_chunk_text",
|
|
62
|
+
"_estimate_input_tokens",
|
|
63
|
+
]
|
|
64
|
+
|
|
33
65
|
|
|
34
66
|
def get_request_settings(request: Request) -> Settings:
|
|
35
67
|
"""Resolve Settings from the running app when available.
|
|
@@ -43,843 +75,6 @@ def get_request_settings(request: Request) -> Settings:
|
|
|
43
75
|
return get_settings()
|
|
44
76
|
|
|
45
77
|
|
|
46
|
-
def _generate_server_tool_id() -> str:
|
|
47
|
-
"""Generate Anthropic-style server tool use ID (srvtoolu_...)."""
|
|
48
|
-
# Generate 24 random alphanumeric characters
|
|
49
|
-
chars = string.ascii_lowercase + string.digits
|
|
50
|
-
random_part = "".join(secrets.choice(chars) for _ in range(24))
|
|
51
|
-
return f"srvtoolu_{random_part}"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
55
|
-
if not isinstance(usage, dict):
|
|
56
|
-
return usage
|
|
57
|
-
allowed_keys = {
|
|
58
|
-
"input_tokens",
|
|
59
|
-
"output_tokens",
|
|
60
|
-
"cache_creation_input_tokens",
|
|
61
|
-
"cache_read_input_tokens",
|
|
62
|
-
"server_tool_use",
|
|
63
|
-
}
|
|
64
|
-
normalized = {k: v for k, v in usage.items() if k in allowed_keys}
|
|
65
|
-
return normalized or None
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def _count_tokens(text: str) -> int:
|
|
69
|
-
try:
|
|
70
|
-
import tiktoken # type: ignore[import-not-found]
|
|
71
|
-
except Exception:
|
|
72
|
-
return 0
|
|
73
|
-
|
|
74
|
-
encoding = tiktoken.get_encoding("cl100k_base")
|
|
75
|
-
return len(encoding.encode(text))
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def _chunk_text(text: str, chunk_size: int = 200) -> list[str]:
|
|
79
|
-
if not text:
|
|
80
|
-
return []
|
|
81
|
-
return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _estimate_input_tokens(openai_params: dict[str, Any]) -> int:
|
|
85
|
-
try:
|
|
86
|
-
import tiktoken # type: ignore[import-not-found]
|
|
87
|
-
except Exception:
|
|
88
|
-
return 0
|
|
89
|
-
|
|
90
|
-
encoding = tiktoken.get_encoding("cl100k_base")
|
|
91
|
-
total_tokens = 0
|
|
92
|
-
|
|
93
|
-
system = openai_params.get("system")
|
|
94
|
-
if isinstance(system, str):
|
|
95
|
-
total_tokens += len(encoding.encode(system))
|
|
96
|
-
|
|
97
|
-
messages = openai_params.get("messages", [])
|
|
98
|
-
if isinstance(messages, list):
|
|
99
|
-
for msg in messages:
|
|
100
|
-
if not isinstance(msg, dict):
|
|
101
|
-
continue
|
|
102
|
-
content = msg.get("content", "")
|
|
103
|
-
if isinstance(content, str):
|
|
104
|
-
total_tokens += len(encoding.encode(content))
|
|
105
|
-
elif isinstance(content, list):
|
|
106
|
-
for block in content:
|
|
107
|
-
if not isinstance(block, dict):
|
|
108
|
-
total_tokens += len(encoding.encode(str(block)))
|
|
109
|
-
continue
|
|
110
|
-
block_type = block.get("type")
|
|
111
|
-
if block_type == "text":
|
|
112
|
-
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
113
|
-
elif block_type == "image_url":
|
|
114
|
-
total_tokens += 85
|
|
115
|
-
|
|
116
|
-
tool_calls = msg.get("tool_calls")
|
|
117
|
-
if isinstance(tool_calls, list) and tool_calls:
|
|
118
|
-
total_tokens += len(encoding.encode(json.dumps(tool_calls)))
|
|
119
|
-
|
|
120
|
-
tools = openai_params.get("tools")
|
|
121
|
-
if isinstance(tools, list) and tools:
|
|
122
|
-
total_tokens += len(encoding.encode(json.dumps(tools)))
|
|
123
|
-
|
|
124
|
-
tool_choice = openai_params.get("tool_choice")
|
|
125
|
-
if tool_choice is not None:
|
|
126
|
-
total_tokens += len(encoding.encode(json.dumps(tool_choice)))
|
|
127
|
-
|
|
128
|
-
response_format = openai_params.get("response_format")
|
|
129
|
-
if response_format is not None:
|
|
130
|
-
total_tokens += len(encoding.encode(json.dumps(response_format)))
|
|
131
|
-
|
|
132
|
-
return total_tokens
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
async def _stream_response(
|
|
136
|
-
client: httpx.AsyncClient,
|
|
137
|
-
url: str,
|
|
138
|
-
headers: dict,
|
|
139
|
-
json_data: dict,
|
|
140
|
-
model: str,
|
|
141
|
-
) -> AsyncGenerator[str, None]:
|
|
142
|
-
"""
|
|
143
|
-
Stream response from OpenAI and convert to Anthropic format.
|
|
144
|
-
"""
|
|
145
|
-
try:
|
|
146
|
-
async with client.stream(
|
|
147
|
-
"POST", url, headers=headers, json=json_data
|
|
148
|
-
) as response:
|
|
149
|
-
if response.status_code != 200:
|
|
150
|
-
error_body = await response.aread()
|
|
151
|
-
error_text = error_body.decode("utf-8", errors="replace").strip()
|
|
152
|
-
try:
|
|
153
|
-
error_json = json.loads(error_text) if error_text else {}
|
|
154
|
-
error_msg = error_json.get("error", {}).get("message") or error_text
|
|
155
|
-
except json.JSONDecodeError:
|
|
156
|
-
error_msg = error_text
|
|
157
|
-
if not error_msg:
|
|
158
|
-
error_msg = (
|
|
159
|
-
response.reason_phrase
|
|
160
|
-
or f"Upstream API error ({response.status_code})"
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
error_event = AnthropicErrorResponse(
|
|
164
|
-
error=AnthropicError(type="api_error", message=error_msg)
|
|
165
|
-
)
|
|
166
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
167
|
-
yield "data: [DONE]\n\n"
|
|
168
|
-
return
|
|
169
|
-
|
|
170
|
-
# Process SSE stream
|
|
171
|
-
first_chunk = True
|
|
172
|
-
content_block_started = False
|
|
173
|
-
content_block_index = 0
|
|
174
|
-
current_block_type = None # 'thinking', 'text', or 'tool_use'
|
|
175
|
-
current_tool_call_index = None
|
|
176
|
-
tool_call_buffers: dict[int, str] = {}
|
|
177
|
-
finish_reason = None
|
|
178
|
-
input_tokens = _estimate_input_tokens(json_data)
|
|
179
|
-
output_tokens = 0
|
|
180
|
-
message_id = None
|
|
181
|
-
sent_message_delta = False
|
|
182
|
-
|
|
183
|
-
async for line in response.aiter_lines():
|
|
184
|
-
if not line.startswith("data: "):
|
|
185
|
-
continue
|
|
186
|
-
|
|
187
|
-
data = line[6:]
|
|
188
|
-
if data == "[DONE]":
|
|
189
|
-
if not sent_message_delta:
|
|
190
|
-
stop_reason_map = {
|
|
191
|
-
"stop": "end_turn",
|
|
192
|
-
"length": "max_tokens",
|
|
193
|
-
"tool_calls": "tool_use",
|
|
194
|
-
}
|
|
195
|
-
delta_event = {
|
|
196
|
-
"type": "message_delta",
|
|
197
|
-
"delta": {
|
|
198
|
-
"stop_reason": stop_reason_map.get(
|
|
199
|
-
finish_reason or "stop", "end_turn"
|
|
200
|
-
)
|
|
201
|
-
},
|
|
202
|
-
"usage": {
|
|
203
|
-
"input_tokens": input_tokens,
|
|
204
|
-
"output_tokens": output_tokens,
|
|
205
|
-
"cache_creation_input_tokens": None,
|
|
206
|
-
"cache_read_input_tokens": None,
|
|
207
|
-
},
|
|
208
|
-
}
|
|
209
|
-
logger.debug(
|
|
210
|
-
f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
|
|
211
|
-
)
|
|
212
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
213
|
-
break
|
|
214
|
-
|
|
215
|
-
try:
|
|
216
|
-
chunk = json.loads(data)
|
|
217
|
-
logger.debug(
|
|
218
|
-
f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}"
|
|
219
|
-
)
|
|
220
|
-
except json.JSONDecodeError:
|
|
221
|
-
continue
|
|
222
|
-
|
|
223
|
-
# First chunk: message_start
|
|
224
|
-
if first_chunk:
|
|
225
|
-
message_id = chunk.get("id", "")
|
|
226
|
-
usage = chunk.get("usage") or {}
|
|
227
|
-
input_tokens = usage.get("prompt_tokens", input_tokens)
|
|
228
|
-
|
|
229
|
-
start_event = {
|
|
230
|
-
"type": "message_start",
|
|
231
|
-
"message": {
|
|
232
|
-
"id": message_id,
|
|
233
|
-
"type": "message",
|
|
234
|
-
"role": "assistant",
|
|
235
|
-
"content": [],
|
|
236
|
-
"model": model,
|
|
237
|
-
"stop_reason": None,
|
|
238
|
-
"stop_sequence": None,
|
|
239
|
-
"usage": {
|
|
240
|
-
"input_tokens": input_tokens,
|
|
241
|
-
"output_tokens": 0,
|
|
242
|
-
"cache_creation_input_tokens": None,
|
|
243
|
-
"cache_read_input_tokens": None,
|
|
244
|
-
},
|
|
245
|
-
},
|
|
246
|
-
}
|
|
247
|
-
logger.debug(
|
|
248
|
-
f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}"
|
|
249
|
-
)
|
|
250
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
251
|
-
first_chunk = False
|
|
252
|
-
continue
|
|
253
|
-
|
|
254
|
-
# Handle usage-only chunks
|
|
255
|
-
if not chunk.get("choices"):
|
|
256
|
-
usage = chunk.get("usage") or {}
|
|
257
|
-
if usage:
|
|
258
|
-
input_tokens = usage.get("prompt_tokens", input_tokens)
|
|
259
|
-
output_tokens = usage.get("completion_tokens", output_tokens)
|
|
260
|
-
if content_block_started:
|
|
261
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
262
|
-
content_block_started = False
|
|
263
|
-
|
|
264
|
-
stop_reason_map = {
|
|
265
|
-
"stop": "end_turn",
|
|
266
|
-
"length": "max_tokens",
|
|
267
|
-
"tool_calls": "tool_use",
|
|
268
|
-
}
|
|
269
|
-
delta_event = {
|
|
270
|
-
"type": "message_delta",
|
|
271
|
-
"delta": {
|
|
272
|
-
"stop_reason": stop_reason_map.get(
|
|
273
|
-
finish_reason or "stop", "end_turn"
|
|
274
|
-
)
|
|
275
|
-
},
|
|
276
|
-
"usage": {
|
|
277
|
-
"input_tokens": usage.get(
|
|
278
|
-
"prompt_tokens", input_tokens
|
|
279
|
-
),
|
|
280
|
-
"output_tokens": usage.get("completion_tokens", 0),
|
|
281
|
-
"cache_creation_input_tokens": None,
|
|
282
|
-
"cache_read_input_tokens": None,
|
|
283
|
-
},
|
|
284
|
-
}
|
|
285
|
-
logger.debug(
|
|
286
|
-
f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}"
|
|
287
|
-
)
|
|
288
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
289
|
-
sent_message_delta = True
|
|
290
|
-
continue
|
|
291
|
-
|
|
292
|
-
choice = chunk["choices"][0]
|
|
293
|
-
delta = choice.get("delta", {})
|
|
294
|
-
|
|
295
|
-
# Track finish reason (but don't skip - content may also be present)
|
|
296
|
-
if choice.get("finish_reason"):
|
|
297
|
-
finish_reason = choice["finish_reason"]
|
|
298
|
-
|
|
299
|
-
# Handle reasoning content (thinking)
|
|
300
|
-
if delta.get("reasoning_content"):
|
|
301
|
-
reasoning = delta["reasoning_content"]
|
|
302
|
-
# Start thinking content block if not already started
|
|
303
|
-
if not content_block_started or current_block_type != "thinking":
|
|
304
|
-
# Close previous block if exists
|
|
305
|
-
if content_block_started:
|
|
306
|
-
stop_block = {
|
|
307
|
-
"type": "content_block_stop",
|
|
308
|
-
"index": content_block_index,
|
|
309
|
-
}
|
|
310
|
-
logger.debug(
|
|
311
|
-
f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
|
|
312
|
-
)
|
|
313
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
314
|
-
content_block_index += 1
|
|
315
|
-
start_block = {
|
|
316
|
-
"type": "content_block_start",
|
|
317
|
-
"index": content_block_index,
|
|
318
|
-
"content_block": {
|
|
319
|
-
"type": "thinking",
|
|
320
|
-
"thinking": "",
|
|
321
|
-
"signature": "",
|
|
322
|
-
},
|
|
323
|
-
}
|
|
324
|
-
logger.debug(
|
|
325
|
-
f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}"
|
|
326
|
-
)
|
|
327
|
-
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
328
|
-
content_block_started = True
|
|
329
|
-
current_block_type = "thinking"
|
|
330
|
-
|
|
331
|
-
for chunk in _chunk_text(reasoning):
|
|
332
|
-
delta_block = {
|
|
333
|
-
"type": "content_block_delta",
|
|
334
|
-
"index": content_block_index,
|
|
335
|
-
"delta": {"type": "thinking_delta", "thinking": chunk},
|
|
336
|
-
}
|
|
337
|
-
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
338
|
-
continue
|
|
339
|
-
|
|
340
|
-
# Handle content
|
|
341
|
-
if delta.get("content"):
|
|
342
|
-
if not content_block_started or current_block_type != "text":
|
|
343
|
-
# Close previous block if exists
|
|
344
|
-
if content_block_started:
|
|
345
|
-
stop_block = {
|
|
346
|
-
"type": "content_block_stop",
|
|
347
|
-
"index": content_block_index,
|
|
348
|
-
}
|
|
349
|
-
logger.debug(
|
|
350
|
-
f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}"
|
|
351
|
-
)
|
|
352
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
353
|
-
content_block_index += 1
|
|
354
|
-
start_block = {
|
|
355
|
-
"type": "content_block_start",
|
|
356
|
-
"index": content_block_index,
|
|
357
|
-
"content_block": {"type": "text", "text": ""},
|
|
358
|
-
}
|
|
359
|
-
logger.debug(
|
|
360
|
-
f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}"
|
|
361
|
-
)
|
|
362
|
-
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
363
|
-
content_block_started = True
|
|
364
|
-
current_block_type = "text"
|
|
365
|
-
|
|
366
|
-
output_tokens += _count_tokens(delta["content"])
|
|
367
|
-
delta_block = {
|
|
368
|
-
"type": "content_block_delta",
|
|
369
|
-
"index": content_block_index,
|
|
370
|
-
"delta": {"type": "text_delta", "text": delta["content"]},
|
|
371
|
-
}
|
|
372
|
-
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
373
|
-
|
|
374
|
-
# Handle tool calls
|
|
375
|
-
if delta.get("tool_calls"):
|
|
376
|
-
for tool_call in delta["tool_calls"]:
|
|
377
|
-
tool_call_idx = tool_call.get("index", 0)
|
|
378
|
-
|
|
379
|
-
if tool_call.get("id"):
|
|
380
|
-
if content_block_started and (
|
|
381
|
-
current_block_type != "tool_use"
|
|
382
|
-
or current_tool_call_index != tool_call_idx
|
|
383
|
-
):
|
|
384
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
385
|
-
content_block_started = False
|
|
386
|
-
content_block_index += 1
|
|
387
|
-
|
|
388
|
-
if not content_block_started:
|
|
389
|
-
func = tool_call.get("function") or {}
|
|
390
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
391
|
-
content_block_started = True
|
|
392
|
-
current_block_type = "tool_use"
|
|
393
|
-
current_tool_call_index = tool_call_idx
|
|
394
|
-
tool_call_buffers.setdefault(tool_call_idx, "")
|
|
395
|
-
|
|
396
|
-
if (tool_call.get("function") or {}).get("arguments"):
|
|
397
|
-
args = (tool_call.get("function") or {}).get(
|
|
398
|
-
"arguments", ""
|
|
399
|
-
)
|
|
400
|
-
if (
|
|
401
|
-
not content_block_started
|
|
402
|
-
or current_block_type != "tool_use"
|
|
403
|
-
or current_tool_call_index != tool_call_idx
|
|
404
|
-
):
|
|
405
|
-
if content_block_started:
|
|
406
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
407
|
-
content_block_index += 1
|
|
408
|
-
func = tool_call.get("function") or {}
|
|
409
|
-
tool_id = tool_call.get("id", "")
|
|
410
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_id, 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
411
|
-
content_block_started = True
|
|
412
|
-
current_block_type = "tool_use"
|
|
413
|
-
current_tool_call_index = tool_call_idx
|
|
414
|
-
tool_call_buffers.setdefault(tool_call_idx, "")
|
|
415
|
-
tool_call_buffers[tool_call_idx] = (
|
|
416
|
-
tool_call_buffers.get(tool_call_idx, "") + args
|
|
417
|
-
)
|
|
418
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
419
|
-
|
|
420
|
-
# Close final content block
|
|
421
|
-
if content_block_started:
|
|
422
|
-
stop_block = {
|
|
423
|
-
"type": "content_block_stop",
|
|
424
|
-
"index": content_block_index,
|
|
425
|
-
}
|
|
426
|
-
logger.debug(
|
|
427
|
-
f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}"
|
|
428
|
-
)
|
|
429
|
-
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
430
|
-
|
|
431
|
-
# Message stop
|
|
432
|
-
stop_event = {"type": "message_stop"}
|
|
433
|
-
logger.debug(
|
|
434
|
-
f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}"
|
|
435
|
-
)
|
|
436
|
-
yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
|
|
437
|
-
|
|
438
|
-
except Exception as e:
|
|
439
|
-
import traceback
|
|
440
|
-
|
|
441
|
-
error_msg = f"{str(e)}\n{traceback.format_exc()}"
|
|
442
|
-
logger.error(f"Stream error: {error_msg}")
|
|
443
|
-
error_event = AnthropicErrorResponse(
|
|
444
|
-
error=AnthropicError(type="internal_error", message=str(e))
|
|
445
|
-
)
|
|
446
|
-
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
async def _convert_result_to_stream(
|
|
450
|
-
result: JSONResponse,
|
|
451
|
-
model: str,
|
|
452
|
-
) -> AsyncGenerator[str, None]:
|
|
453
|
-
"""Convert a JSONResponse to streaming SSE format."""
|
|
454
|
-
import time
|
|
455
|
-
|
|
456
|
-
body = json.loads(bytes(result.body).decode("utf-8"))
|
|
457
|
-
message_id = body.get("id", f"msg_{int(time.time() * 1000)}")
|
|
458
|
-
content = body.get("content", [])
|
|
459
|
-
usage = body.get("usage", {})
|
|
460
|
-
stop_reason = body.get("stop_reason", "end_turn")
|
|
461
|
-
|
|
462
|
-
# Map stop_reason
|
|
463
|
-
stop_reason_map = {
|
|
464
|
-
"end_turn": "stop",
|
|
465
|
-
"max_tokens": "length",
|
|
466
|
-
"tool_use": "tool_calls",
|
|
467
|
-
}
|
|
468
|
-
openai_stop_reason = stop_reason_map.get(stop_reason, "stop")
|
|
469
|
-
|
|
470
|
-
# 1. message_start event
|
|
471
|
-
start_event = {
|
|
472
|
-
"type": "message_start",
|
|
473
|
-
"message": {
|
|
474
|
-
"id": message_id,
|
|
475
|
-
"type": "message",
|
|
476
|
-
"role": "assistant",
|
|
477
|
-
"content": [],
|
|
478
|
-
"model": model,
|
|
479
|
-
"stop_reason": None,
|
|
480
|
-
"stop_sequence": None,
|
|
481
|
-
"usage": {
|
|
482
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
483
|
-
"output_tokens": 0,
|
|
484
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
485
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
486
|
-
},
|
|
487
|
-
},
|
|
488
|
-
}
|
|
489
|
-
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
490
|
-
|
|
491
|
-
# 2. Process content blocks
|
|
492
|
-
for i, block in enumerate(content):
|
|
493
|
-
block_type = block.get("type")
|
|
494
|
-
|
|
495
|
-
if block_type == "text":
|
|
496
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
|
497
|
-
text = block.get("text", "")
|
|
498
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'text_delta', 'text': text}})}\n\n"
|
|
499
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
500
|
-
|
|
501
|
-
elif block_type == "tool_use":
|
|
502
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
503
|
-
tool_input = block.get("input", {})
|
|
504
|
-
if tool_input:
|
|
505
|
-
input_json = json.dumps(tool_input, ensure_ascii=False)
|
|
506
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'input_json_delta', 'partial_json': input_json}})}\n\n"
|
|
507
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
508
|
-
|
|
509
|
-
elif block_type == "server_tool_use":
|
|
510
|
-
# Preserve official Anthropic block type so clients can count server tool uses.
|
|
511
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'server_tool_use', 'id': block.get('id', ''), 'name': block.get('name', ''), 'input': block.get('input', {})}})}\n\n"
|
|
512
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
513
|
-
|
|
514
|
-
elif block_type == "web_search_tool_result":
|
|
515
|
-
# Stream the tool result as its own content block.
|
|
516
|
-
tool_result_block = dict(block)
|
|
517
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
518
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
519
|
-
|
|
520
|
-
elif block_type == "thinking":
|
|
521
|
-
# Handle thinking blocks (BetaThinkingBlock)
|
|
522
|
-
signature = block.get("signature", "")
|
|
523
|
-
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': '', 'signature': signature}})}\n\n"
|
|
524
|
-
thinking_text = block.get("thinking", "")
|
|
525
|
-
if thinking_text:
|
|
526
|
-
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
|
|
527
|
-
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
528
|
-
|
|
529
|
-
# 3. message_delta with final usage
|
|
530
|
-
delta_event = {
|
|
531
|
-
"type": "message_delta",
|
|
532
|
-
"delta": {"stop_reason": stop_reason},
|
|
533
|
-
"usage": {
|
|
534
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
535
|
-
"output_tokens": usage.get("output_tokens", 0),
|
|
536
|
-
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens"),
|
|
537
|
-
"cache_read_input_tokens": usage.get("cache_read_input_tokens"),
|
|
538
|
-
"server_tool_use": usage.get("server_tool_use"),
|
|
539
|
-
},
|
|
540
|
-
}
|
|
541
|
-
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
542
|
-
|
|
543
|
-
# 4. message_stop
|
|
544
|
-
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
class ServerToolHandler:
|
|
548
|
-
"""Handles server tool execution for non-streaming requests."""
|
|
549
|
-
|
|
550
|
-
def __init__(
|
|
551
|
-
self,
|
|
552
|
-
server_tools: list[type],
|
|
553
|
-
configs: dict[str, dict[str, Any]],
|
|
554
|
-
settings: Settings,
|
|
555
|
-
):
|
|
556
|
-
self.server_tools = {t.tool_name: t for t in server_tools}
|
|
557
|
-
self.configs = configs
|
|
558
|
-
self.settings = settings
|
|
559
|
-
self.usage: dict[str, int] = {}
|
|
560
|
-
|
|
561
|
-
def is_server_tool_call(self, tool_call: dict[str, Any]) -> bool:
|
|
562
|
-
"""Check if a tool call is for a server tool."""
|
|
563
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
564
|
-
return func_name in self.server_tools
|
|
565
|
-
|
|
566
|
-
async def execute_tool(
|
|
567
|
-
self,
|
|
568
|
-
tool_call: dict[str, Any],
|
|
569
|
-
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
570
|
-
"""
|
|
571
|
-
Execute a server tool and return content blocks + tool result message.
|
|
572
|
-
Returns: (content_blocks, tool_result_message)
|
|
573
|
-
"""
|
|
574
|
-
func_name = tool_call.get("function", {}).get("name")
|
|
575
|
-
call_id = tool_call.get("id", "")
|
|
576
|
-
openai_call_id = tool_call.get("openai_id", call_id)
|
|
577
|
-
|
|
578
|
-
tool_class = self.server_tools[func_name]
|
|
579
|
-
config = self.configs.get(tool_class.tool_type, {})
|
|
580
|
-
|
|
581
|
-
# Extract call arguments
|
|
582
|
-
args = tool_class.extract_call_args(tool_call)
|
|
583
|
-
if args is None:
|
|
584
|
-
args = {}
|
|
585
|
-
|
|
586
|
-
# Execute the tool
|
|
587
|
-
result = await tool_class.execute(call_id, args, config, self.settings)
|
|
588
|
-
|
|
589
|
-
# Update usage
|
|
590
|
-
for key, value in result.usage_increment.items():
|
|
591
|
-
self.usage[key] = self.usage.get(key, 0) + value
|
|
592
|
-
|
|
593
|
-
# Build content blocks
|
|
594
|
-
content_blocks = tool_class.build_content_blocks(call_id, args, result)
|
|
595
|
-
|
|
596
|
-
# Build tool result message for OpenAI
|
|
597
|
-
tool_result_msg = tool_class.build_tool_result_message(
|
|
598
|
-
openai_call_id, args, result
|
|
599
|
-
)
|
|
600
|
-
|
|
601
|
-
return content_blocks, tool_result_msg
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
async def _handle_with_server_tools(
|
|
605
|
-
openai_params: dict[str, Any],
|
|
606
|
-
url: str,
|
|
607
|
-
headers: dict[str, str],
|
|
608
|
-
settings: Settings,
|
|
609
|
-
server_tools: list[type],
|
|
610
|
-
model: str,
|
|
611
|
-
) -> JSONResponse:
|
|
612
|
-
"""Handle request with server tool execution loop."""
|
|
613
|
-
params = dict(openai_params)
|
|
614
|
-
configs = params.pop("_server_tools_config", {})
|
|
615
|
-
|
|
616
|
-
handler = ServerToolHandler(server_tools, configs, settings)
|
|
617
|
-
accumulated_content: list[dict[str, Any]] = []
|
|
618
|
-
|
|
619
|
-
# Get max_uses from configs (default to settings or 5)
|
|
620
|
-
max_uses = settings.websearch_max_uses
|
|
621
|
-
for config in configs.values():
|
|
622
|
-
if config.get("max_uses"):
|
|
623
|
-
max_uses = config["max_uses"]
|
|
624
|
-
break
|
|
625
|
-
|
|
626
|
-
total_tool_calls = 0
|
|
627
|
-
|
|
628
|
-
while True:
|
|
629
|
-
async with httpx.AsyncClient(timeout=settings.request_timeout) as client:
|
|
630
|
-
try:
|
|
631
|
-
# Log full request for debugging
|
|
632
|
-
logger.debug(
|
|
633
|
-
f"Request body: {json.dumps(params, indent=2, default=str)[:3000]}"
|
|
634
|
-
)
|
|
635
|
-
|
|
636
|
-
response = await client.post(url, headers=headers, json=params)
|
|
637
|
-
|
|
638
|
-
if response.status_code != 200:
|
|
639
|
-
logger.error(
|
|
640
|
-
f"OpenAI API error: {response.status_code} - {response.text}"
|
|
641
|
-
)
|
|
642
|
-
raw_text = response.text
|
|
643
|
-
try:
|
|
644
|
-
if not raw_text:
|
|
645
|
-
raw_text = response.content.decode(
|
|
646
|
-
"utf-8", errors="replace"
|
|
647
|
-
)
|
|
648
|
-
except Exception:
|
|
649
|
-
raw_text = ""
|
|
650
|
-
if not raw_text:
|
|
651
|
-
raw_text = response.reason_phrase or ""
|
|
652
|
-
error_message = (raw_text or "").strip()
|
|
653
|
-
error_response = AnthropicErrorResponse(
|
|
654
|
-
error=AnthropicError(
|
|
655
|
-
type="api_error",
|
|
656
|
-
message=error_message
|
|
657
|
-
or f"Upstream API error ({response.status_code})",
|
|
658
|
-
)
|
|
659
|
-
)
|
|
660
|
-
return JSONResponse(
|
|
661
|
-
status_code=response.status_code,
|
|
662
|
-
content=error_response.model_dump(),
|
|
663
|
-
)
|
|
664
|
-
|
|
665
|
-
completion_data = response.json()
|
|
666
|
-
logger.debug(
|
|
667
|
-
f"OpenAI response: {json.dumps(completion_data, indent=2)[:500]}..."
|
|
668
|
-
)
|
|
669
|
-
from openai.types.chat import ChatCompletion
|
|
670
|
-
|
|
671
|
-
completion = ChatCompletion.model_validate(completion_data)
|
|
672
|
-
|
|
673
|
-
# Check for server tool calls
|
|
674
|
-
server_tool_calls = []
|
|
675
|
-
other_tool_calls = []
|
|
676
|
-
|
|
677
|
-
tool_calls = completion.choices[0].message.tool_calls
|
|
678
|
-
logger.info(
|
|
679
|
-
f"Model returned tool_calls: {len(tool_calls) if tool_calls else 0}"
|
|
680
|
-
)
|
|
681
|
-
|
|
682
|
-
if tool_calls:
|
|
683
|
-
for tc in tool_calls:
|
|
684
|
-
func = getattr(tc, "function", None)
|
|
685
|
-
func_name = func.name if func else ""
|
|
686
|
-
logger.info(f" Tool call: {func_name}")
|
|
687
|
-
|
|
688
|
-
# Generate Anthropic-style ID for server tools
|
|
689
|
-
is_server = handler.is_server_tool_call(
|
|
690
|
-
{
|
|
691
|
-
"id": tc.id,
|
|
692
|
-
"function": {"name": func_name, "arguments": ""},
|
|
693
|
-
}
|
|
694
|
-
)
|
|
695
|
-
|
|
696
|
-
# Use Anthropic-style ID for server tools, original ID otherwise
|
|
697
|
-
client_tool_id = (
|
|
698
|
-
_generate_server_tool_id() if is_server else tc.id
|
|
699
|
-
)
|
|
700
|
-
|
|
701
|
-
tc_dict = {
|
|
702
|
-
"id": client_tool_id,
|
|
703
|
-
"openai_id": tc.id,
|
|
704
|
-
"function": {
|
|
705
|
-
"name": func_name,
|
|
706
|
-
"arguments": func.arguments if func else "{}",
|
|
707
|
-
},
|
|
708
|
-
}
|
|
709
|
-
logger.info(
|
|
710
|
-
f" Is server tool: {is_server}, ID: {client_tool_id}"
|
|
711
|
-
)
|
|
712
|
-
if is_server:
|
|
713
|
-
server_tool_calls.append(tc_dict)
|
|
714
|
-
else:
|
|
715
|
-
other_tool_calls.append(tc)
|
|
716
|
-
|
|
717
|
-
# No server tool calls - we're done
|
|
718
|
-
logger.info(
|
|
719
|
-
f"Server tool calls: {len(server_tool_calls)}, Other: {len(other_tool_calls)}"
|
|
720
|
-
)
|
|
721
|
-
if not server_tool_calls:
|
|
722
|
-
message = convert_openai_to_anthropic(completion, model)
|
|
723
|
-
|
|
724
|
-
if accumulated_content:
|
|
725
|
-
message_dict = message.model_dump()
|
|
726
|
-
message_dict["content"] = (
|
|
727
|
-
accumulated_content + message_dict.get("content", [])
|
|
728
|
-
)
|
|
729
|
-
|
|
730
|
-
if message_dict.get("usage"):
|
|
731
|
-
message_dict["usage"]["server_tool_use"] = handler.usage
|
|
732
|
-
message_dict["usage"] = _normalize_usage(
|
|
733
|
-
message_dict.get("usage")
|
|
734
|
-
)
|
|
735
|
-
|
|
736
|
-
# Log full response for debugging
|
|
737
|
-
logger.info(
|
|
738
|
-
f"Response content blocks: {json.dumps(message_dict.get('content', []), ensure_ascii=False)[:1000]}"
|
|
739
|
-
)
|
|
740
|
-
logger.info(f"Response usage: {message_dict.get('usage')}")
|
|
741
|
-
logger.info(f"Server tool use count: {handler.usage}")
|
|
742
|
-
|
|
743
|
-
return JSONResponse(content=message_dict)
|
|
744
|
-
|
|
745
|
-
message_dict = message.model_dump()
|
|
746
|
-
message_dict["usage"] = _normalize_usage(message_dict.get("usage"))
|
|
747
|
-
return JSONResponse(content=message_dict)
|
|
748
|
-
|
|
749
|
-
# Check max_uses limit
|
|
750
|
-
if total_tool_calls >= max_uses:
|
|
751
|
-
logger.warning(f"Server tool max_uses ({max_uses}) exceeded")
|
|
752
|
-
# Return error for each call
|
|
753
|
-
for call in server_tool_calls:
|
|
754
|
-
func_name = call.get("function", {}).get("name", "")
|
|
755
|
-
tool_class = handler.server_tools.get(func_name)
|
|
756
|
-
if tool_class:
|
|
757
|
-
from local_openai2anthropic.server_tools import ToolResult
|
|
758
|
-
|
|
759
|
-
error_result = ToolResult(
|
|
760
|
-
success=False,
|
|
761
|
-
content=[],
|
|
762
|
-
error_code="max_uses_exceeded",
|
|
763
|
-
)
|
|
764
|
-
error_blocks = tool_class.build_content_blocks(
|
|
765
|
-
call["id"],
|
|
766
|
-
{},
|
|
767
|
-
error_result,
|
|
768
|
-
)
|
|
769
|
-
accumulated_content.extend(error_blocks)
|
|
770
|
-
|
|
771
|
-
# Continue with modified messages
|
|
772
|
-
assistant_tool_calls = []
|
|
773
|
-
for call in server_tool_calls:
|
|
774
|
-
assistant_tool_calls.append(
|
|
775
|
-
{
|
|
776
|
-
"id": call.get("openai_id", call.get("id", "")),
|
|
777
|
-
"type": "function",
|
|
778
|
-
"function": {
|
|
779
|
-
"name": call.get("function", {}).get("name", ""),
|
|
780
|
-
"arguments": call.get("function", {}).get(
|
|
781
|
-
"arguments", "{}"
|
|
782
|
-
),
|
|
783
|
-
},
|
|
784
|
-
}
|
|
785
|
-
)
|
|
786
|
-
messages = params.get("messages", [])
|
|
787
|
-
messages = _add_tool_results_to_messages(
|
|
788
|
-
messages, assistant_tool_calls, handler, is_error=True
|
|
789
|
-
)
|
|
790
|
-
params["messages"] = messages
|
|
791
|
-
continue
|
|
792
|
-
|
|
793
|
-
# Execute server tools
|
|
794
|
-
messages = params.get("messages", [])
|
|
795
|
-
assistant_tool_calls = []
|
|
796
|
-
tool_results = []
|
|
797
|
-
|
|
798
|
-
for call in server_tool_calls:
|
|
799
|
-
total_tool_calls += 1
|
|
800
|
-
content_blocks, tool_result = await handler.execute_tool(call)
|
|
801
|
-
accumulated_content.extend(content_blocks)
|
|
802
|
-
|
|
803
|
-
# Track for assistant message
|
|
804
|
-
assistant_tool_calls.append(
|
|
805
|
-
{
|
|
806
|
-
"id": call.get("openai_id", call.get("id", "")),
|
|
807
|
-
"type": "function",
|
|
808
|
-
"function": {
|
|
809
|
-
"name": call["function"]["name"],
|
|
810
|
-
"arguments": call["function"]["arguments"],
|
|
811
|
-
},
|
|
812
|
-
}
|
|
813
|
-
)
|
|
814
|
-
tool_results.append(tool_result)
|
|
815
|
-
|
|
816
|
-
# Add to messages for next iteration
|
|
817
|
-
messages = _add_tool_results_to_messages(
|
|
818
|
-
messages, assistant_tool_calls, handler, tool_results=tool_results
|
|
819
|
-
)
|
|
820
|
-
params["messages"] = messages
|
|
821
|
-
|
|
822
|
-
except httpx.TimeoutException:
|
|
823
|
-
error_response = AnthropicErrorResponse(
|
|
824
|
-
error=AnthropicError(
|
|
825
|
-
type="timeout_error", message="Request timed out"
|
|
826
|
-
)
|
|
827
|
-
)
|
|
828
|
-
return JSONResponse(
|
|
829
|
-
status_code=HTTPStatus.GATEWAY_TIMEOUT,
|
|
830
|
-
content=error_response.model_dump(),
|
|
831
|
-
)
|
|
832
|
-
except httpx.RequestError as e:
|
|
833
|
-
error_response = AnthropicErrorResponse(
|
|
834
|
-
error=AnthropicError(type="connection_error", message=str(e))
|
|
835
|
-
)
|
|
836
|
-
return JSONResponse(
|
|
837
|
-
status_code=HTTPStatus.BAD_GATEWAY,
|
|
838
|
-
content=error_response.model_dump(),
|
|
839
|
-
)
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
def _add_tool_results_to_messages(
|
|
843
|
-
messages: list[dict[str, Any]],
|
|
844
|
-
tool_calls: list[dict[str, Any]],
|
|
845
|
-
handler: ServerToolHandler,
|
|
846
|
-
tool_results: list[dict[str, Any]] | None = None,
|
|
847
|
-
is_error: bool = False,
|
|
848
|
-
) -> list[dict[str, Any]]:
|
|
849
|
-
"""Add assistant tool call and results to messages."""
|
|
850
|
-
messages = list(messages)
|
|
851
|
-
|
|
852
|
-
# Add assistant message with tool calls
|
|
853
|
-
# SGLang requires content to be a string, not None
|
|
854
|
-
assistant_msg: dict[str, Any] = {
|
|
855
|
-
"role": "assistant",
|
|
856
|
-
"content": "", # Empty string instead of None for SGLang compatibility
|
|
857
|
-
"tool_calls": tool_calls,
|
|
858
|
-
}
|
|
859
|
-
messages.append(assistant_msg)
|
|
860
|
-
|
|
861
|
-
# Add tool results
|
|
862
|
-
if is_error:
|
|
863
|
-
for call in tool_calls:
|
|
864
|
-
tool_call_id = call.get("openai_id", call.get("id", ""))
|
|
865
|
-
messages.append(
|
|
866
|
-
{
|
|
867
|
-
"role": "tool",
|
|
868
|
-
"tool_call_id": tool_call_id,
|
|
869
|
-
"content": json.dumps(
|
|
870
|
-
{
|
|
871
|
-
"error": "max_uses_exceeded",
|
|
872
|
-
"message": "Maximum tool uses exceeded.",
|
|
873
|
-
}
|
|
874
|
-
),
|
|
875
|
-
}
|
|
876
|
-
)
|
|
877
|
-
elif tool_results:
|
|
878
|
-
messages.extend(tool_results)
|
|
879
|
-
|
|
880
|
-
return messages
|
|
881
|
-
|
|
882
|
-
|
|
883
78
|
@router.post(
|
|
884
79
|
"/v1/messages",
|
|
885
80
|
response_model=Message,
|