local-openai2anthropic 0.1.0__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/__main__.py +7 -0
- local_openai2anthropic/config.py +132 -18
- local_openai2anthropic/converter.py +107 -250
- local_openai2anthropic/daemon.py +382 -0
- local_openai2anthropic/daemon_runner.py +116 -0
- local_openai2anthropic/main.py +256 -33
- local_openai2anthropic/openai_types.py +149 -0
- local_openai2anthropic/protocol.py +1 -1
- local_openai2anthropic/router.py +211 -520
- local_openai2anthropic/streaming/__init__.py +6 -0
- local_openai2anthropic/streaming/handler.py +444 -0
- local_openai2anthropic/tools/__init__.py +14 -0
- local_openai2anthropic/tools/handler.py +357 -0
- local_openai2anthropic/utils/__init__.py +18 -0
- local_openai2anthropic/utils/tokens.py +96 -0
- local_openai2anthropic-0.3.6.dist-info/METADATA +374 -0
- local_openai2anthropic-0.3.6.dist-info/RECORD +25 -0
- local_openai2anthropic-0.1.0.dist-info/METADATA +0 -689
- local_openai2anthropic-0.1.0.dist-info/RECORD +0 -15
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.3.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,35 +5,23 @@ Core conversion logic between Anthropic and OpenAI formats.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import
|
|
9
|
-
from typing import Any, AsyncGenerator, Optional
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
8
|
+
from typing import Any, Optional
|
|
12
9
|
|
|
13
10
|
from anthropic.types import (
|
|
14
11
|
ContentBlock,
|
|
15
|
-
ContentBlockDeltaEvent,
|
|
16
|
-
ContentBlockStartEvent,
|
|
17
|
-
ContentBlockStopEvent,
|
|
18
12
|
Message,
|
|
19
|
-
MessageDeltaEvent,
|
|
20
13
|
MessageParam,
|
|
21
|
-
MessageStartEvent,
|
|
22
|
-
MessageStopEvent,
|
|
23
14
|
TextBlock,
|
|
24
|
-
TextDelta,
|
|
25
15
|
ToolUseBlock,
|
|
26
16
|
)
|
|
27
17
|
from anthropic.types.message_create_params import MessageCreateParams
|
|
28
18
|
from openai.types.chat import (
|
|
29
19
|
ChatCompletion,
|
|
30
|
-
ChatCompletionChunk,
|
|
31
20
|
ChatCompletionToolParam,
|
|
32
21
|
)
|
|
33
22
|
from openai.types.chat.completion_create_params import CompletionCreateParams
|
|
34
23
|
|
|
35
24
|
from local_openai2anthropic.protocol import UsageWithCache
|
|
36
|
-
from local_openai2anthropic.server_tools import ServerToolRegistry
|
|
37
25
|
|
|
38
26
|
logger = logging.getLogger(__name__)
|
|
39
27
|
|
|
@@ -59,11 +47,12 @@ def convert_anthropic_to_openai(
|
|
|
59
47
|
system = anthropic_params.get("system")
|
|
60
48
|
stop_sequences = anthropic_params.get("stop_sequences")
|
|
61
49
|
stream = anthropic_params.get("stream", False)
|
|
62
|
-
temperature = anthropic_params.get("temperature")
|
|
50
|
+
temperature = anthropic_params.get("temperature", 0.6)
|
|
63
51
|
tool_choice = anthropic_params.get("tool_choice")
|
|
64
52
|
tools = anthropic_params.get("tools")
|
|
65
53
|
top_k = anthropic_params.get("top_k")
|
|
66
|
-
top_p = anthropic_params.get("top_p")
|
|
54
|
+
top_p = anthropic_params.get("top_p", 0.95)
|
|
55
|
+
repetition_penalty = anthropic_params.get("repetition_penalty", 1.1)
|
|
67
56
|
thinking = anthropic_params.get("thinking")
|
|
68
57
|
# metadata is accepted but not forwarded to OpenAI
|
|
69
58
|
|
|
@@ -102,7 +91,9 @@ def convert_anthropic_to_openai(
|
|
|
102
91
|
converted_messages = _convert_anthropic_message_to_openai(msg)
|
|
103
92
|
openai_messages.extend(converted_messages)
|
|
104
93
|
msg_count += 1
|
|
105
|
-
logger.debug(
|
|
94
|
+
logger.debug(
|
|
95
|
+
f"Converted {msg_count} messages, total OpenAI messages: {len(openai_messages)}"
|
|
96
|
+
)
|
|
106
97
|
|
|
107
98
|
# Build OpenAI params
|
|
108
99
|
params: dict[str, Any] = {
|
|
@@ -110,6 +101,7 @@ def convert_anthropic_to_openai(
|
|
|
110
101
|
"messages": openai_messages,
|
|
111
102
|
"max_tokens": max_tokens,
|
|
112
103
|
"stream": stream,
|
|
104
|
+
"repetition_penalty": repetition_penalty,
|
|
113
105
|
}
|
|
114
106
|
|
|
115
107
|
# Always include usage in stream for accurate token counting
|
|
@@ -149,17 +141,21 @@ def convert_anthropic_to_openai(
|
|
|
149
141
|
openai_tools.append(openai_tool)
|
|
150
142
|
|
|
151
143
|
# Add server tools as OpenAI function tools
|
|
152
|
-
for tool_class in
|
|
144
|
+
for tool_class in enabled_server_tools or []:
|
|
153
145
|
if tool_class.tool_type in server_tools_config:
|
|
154
146
|
config = server_tools_config[tool_class.tool_type]
|
|
155
147
|
openai_tools.append(tool_class.to_openai_tool(config))
|
|
156
148
|
|
|
157
149
|
if openai_tools:
|
|
158
150
|
params["tools"] = openai_tools
|
|
159
|
-
|
|
151
|
+
|
|
160
152
|
# Convert tool_choice
|
|
161
153
|
if tool_choice:
|
|
162
|
-
tc =
|
|
154
|
+
tc = (
|
|
155
|
+
tool_choice
|
|
156
|
+
if isinstance(tool_choice, dict)
|
|
157
|
+
else tool_choice.model_dump()
|
|
158
|
+
)
|
|
163
159
|
tc_type = tc.get("type")
|
|
164
160
|
if tc_type == "auto":
|
|
165
161
|
params["tool_choice"] = "auto"
|
|
@@ -172,14 +168,18 @@ def convert_anthropic_to_openai(
|
|
|
172
168
|
}
|
|
173
169
|
else:
|
|
174
170
|
params["tool_choice"] = "auto"
|
|
175
|
-
|
|
171
|
+
|
|
176
172
|
# Handle thinking parameter
|
|
177
173
|
# vLLM/SGLang use chat_template_kwargs.thinking to toggle thinking mode
|
|
174
|
+
# Some models use "thinking", others use "enable_thinking", so we include both
|
|
178
175
|
if thinking and isinstance(thinking, dict):
|
|
179
176
|
thinking_type = thinking.get("type")
|
|
180
177
|
if thinking_type == "enabled":
|
|
181
|
-
# Enable thinking mode for
|
|
182
|
-
params["chat_template_kwargs"] = {
|
|
178
|
+
# Enable thinking mode - include both variants for compatibility
|
|
179
|
+
params["chat_template_kwargs"] = {
|
|
180
|
+
"thinking": True,
|
|
181
|
+
"enable_thinking": True,
|
|
182
|
+
}
|
|
183
183
|
|
|
184
184
|
# Log if budget_tokens was provided but will be ignored
|
|
185
185
|
budget_tokens = thinking.get("budget_tokens")
|
|
@@ -187,14 +187,20 @@ def convert_anthropic_to_openai(
|
|
|
187
187
|
logger.debug(
|
|
188
188
|
"thinking.budget_tokens (%s) is accepted but not supported by "
|
|
189
189
|
"vLLM/SGLang. Using default thinking configuration.",
|
|
190
|
-
budget_tokens
|
|
190
|
+
budget_tokens,
|
|
191
191
|
)
|
|
192
192
|
else:
|
|
193
193
|
# Default to disabled thinking mode if not explicitly enabled
|
|
194
|
-
params["chat_template_kwargs"] = {
|
|
194
|
+
params["chat_template_kwargs"] = {
|
|
195
|
+
"thinking": False,
|
|
196
|
+
"enable_thinking": False,
|
|
197
|
+
}
|
|
195
198
|
else:
|
|
196
199
|
# Default to disabled thinking mode when thinking is not provided
|
|
197
|
-
params["chat_template_kwargs"] = {
|
|
200
|
+
params["chat_template_kwargs"] = {
|
|
201
|
+
"thinking": False,
|
|
202
|
+
"enable_thinking": False,
|
|
203
|
+
}
|
|
198
204
|
|
|
199
205
|
# Store server tool configs for later use by router
|
|
200
206
|
if server_tools_config:
|
|
@@ -208,32 +214,32 @@ def _convert_anthropic_message_to_openai(
|
|
|
208
214
|
) -> list[dict[str, Any]]:
|
|
209
215
|
"""
|
|
210
216
|
Convert a single Anthropic message to OpenAI format.
|
|
211
|
-
|
|
212
|
-
Returns a list of messages because tool_results need to be
|
|
217
|
+
|
|
218
|
+
Returns a list of messages because tool_results need to be
|
|
213
219
|
separate tool messages in OpenAI format.
|
|
214
220
|
"""
|
|
215
221
|
role = msg.get("role", "user")
|
|
216
222
|
content = msg.get("content", "")
|
|
217
|
-
|
|
223
|
+
|
|
218
224
|
if isinstance(content, str):
|
|
219
225
|
return [{"role": role, "content": content}]
|
|
220
|
-
|
|
226
|
+
|
|
221
227
|
# Handle list of content blocks
|
|
222
228
|
openai_content: list[dict[str, Any]] = []
|
|
223
229
|
tool_calls: list[dict[str, Any]] = []
|
|
224
230
|
tool_call_results: list[dict[str, Any]] = []
|
|
225
|
-
|
|
231
|
+
|
|
226
232
|
for block in content:
|
|
227
233
|
if isinstance(block, str):
|
|
228
234
|
openai_content.append({"type": "text", "text": block})
|
|
229
235
|
continue
|
|
230
|
-
|
|
236
|
+
|
|
231
237
|
block_type = block.get("type") if isinstance(block, dict) else block.type
|
|
232
|
-
|
|
238
|
+
|
|
233
239
|
if block_type == "text":
|
|
234
240
|
text = block.get("text") if isinstance(block, dict) else block.text
|
|
235
241
|
openai_content.append({"type": "text", "text": text})
|
|
236
|
-
|
|
242
|
+
|
|
237
243
|
elif block_type == "image":
|
|
238
244
|
# Convert image to image_url format
|
|
239
245
|
source = block.get("source") if isinstance(block, dict) else block.source
|
|
@@ -246,11 +252,13 @@ def _convert_anthropic_message_to_openai(
|
|
|
246
252
|
data = source.data
|
|
247
253
|
# Build data URL
|
|
248
254
|
url = f"data:{media_type};base64,{data}"
|
|
249
|
-
openai_content.append(
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
255
|
+
openai_content.append(
|
|
256
|
+
{
|
|
257
|
+
"type": "image_url",
|
|
258
|
+
"image_url": {"url": url},
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
|
|
254
262
|
elif block_type == "tool_use":
|
|
255
263
|
# Convert to function call
|
|
256
264
|
if isinstance(block, dict):
|
|
@@ -261,27 +269,31 @@ def _convert_anthropic_message_to_openai(
|
|
|
261
269
|
tool_id = block.id
|
|
262
270
|
name = block.name
|
|
263
271
|
input_data = block.input
|
|
264
|
-
|
|
265
|
-
tool_calls.append(
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
"
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
272
|
+
|
|
273
|
+
tool_calls.append(
|
|
274
|
+
{
|
|
275
|
+
"id": tool_id,
|
|
276
|
+
"type": "function",
|
|
277
|
+
"function": {
|
|
278
|
+
"name": name,
|
|
279
|
+
"arguments": json.dumps(input_data)
|
|
280
|
+
if isinstance(input_data, dict)
|
|
281
|
+
else str(input_data),
|
|
282
|
+
},
|
|
283
|
+
}
|
|
284
|
+
)
|
|
285
|
+
|
|
274
286
|
elif block_type == "tool_result":
|
|
275
287
|
# Tool results need to be separate tool messages
|
|
276
288
|
if isinstance(block, dict):
|
|
277
289
|
tool_use_id = block.get("tool_use_id", "")
|
|
278
290
|
result_content = block.get("content", "")
|
|
279
|
-
|
|
291
|
+
# Note: is_error is not directly supported in OpenAI API
|
|
280
292
|
else:
|
|
281
293
|
tool_use_id = block.tool_use_id
|
|
282
294
|
result_content = block.content
|
|
283
|
-
is_error
|
|
284
|
-
|
|
295
|
+
# Note: is_error is not directly supported in OpenAI API
|
|
296
|
+
|
|
285
297
|
# Handle content that might be a list or string
|
|
286
298
|
if isinstance(result_content, list):
|
|
287
299
|
# Extract text from content blocks
|
|
@@ -298,7 +310,7 @@ def _convert_anthropic_message_to_openai(
|
|
|
298
310
|
result_text = "\n".join(text_parts)
|
|
299
311
|
else:
|
|
300
312
|
result_text = str(result_content)
|
|
301
|
-
|
|
313
|
+
|
|
302
314
|
tool_msg: dict[str, Any] = {
|
|
303
315
|
"role": "tool",
|
|
304
316
|
"tool_call_id": tool_use_id,
|
|
@@ -306,28 +318,28 @@ def _convert_anthropic_message_to_openai(
|
|
|
306
318
|
}
|
|
307
319
|
# Note: is_error is not directly supported in OpenAI API
|
|
308
320
|
# but we could add it to content if needed
|
|
309
|
-
|
|
321
|
+
|
|
310
322
|
tool_call_results.append(tool_msg)
|
|
311
|
-
|
|
323
|
+
|
|
312
324
|
# Build primary message
|
|
313
325
|
messages: list[dict[str, Any]] = []
|
|
314
326
|
# SGLang requires content field to be present, default to empty string
|
|
315
327
|
primary_msg: dict[str, Any] = {"role": role, "content": ""}
|
|
316
|
-
|
|
328
|
+
|
|
317
329
|
if openai_content:
|
|
318
330
|
if len(openai_content) == 1 and openai_content[0]["type"] == "text":
|
|
319
331
|
primary_msg["content"] = openai_content[0]["text"]
|
|
320
332
|
else:
|
|
321
333
|
primary_msg["content"] = openai_content
|
|
322
|
-
|
|
334
|
+
|
|
323
335
|
if tool_calls:
|
|
324
336
|
primary_msg["tool_calls"] = tool_calls
|
|
325
|
-
|
|
337
|
+
|
|
326
338
|
messages.append(primary_msg)
|
|
327
|
-
|
|
339
|
+
|
|
328
340
|
# Add tool result messages separately
|
|
329
341
|
messages.extend(tool_call_results)
|
|
330
|
-
|
|
342
|
+
|
|
331
343
|
return messages
|
|
332
344
|
|
|
333
345
|
|
|
@@ -353,20 +365,33 @@ def convert_openai_to_anthropic(
|
|
|
353
365
|
) -> Message:
|
|
354
366
|
"""
|
|
355
367
|
Convert OpenAI ChatCompletion to Anthropic Message.
|
|
356
|
-
|
|
368
|
+
|
|
357
369
|
Args:
|
|
358
370
|
completion: OpenAI chat completion response
|
|
359
371
|
model: Model name
|
|
360
|
-
|
|
372
|
+
|
|
361
373
|
Returns:
|
|
362
374
|
Anthropic Message response
|
|
363
375
|
"""
|
|
376
|
+
from anthropic.types.beta import BetaThinkingBlock
|
|
377
|
+
|
|
364
378
|
choice = completion.choices[0]
|
|
365
379
|
message = choice.message
|
|
366
|
-
|
|
380
|
+
|
|
367
381
|
# Convert content blocks
|
|
368
382
|
content: list[ContentBlock] = []
|
|
369
|
-
|
|
383
|
+
|
|
384
|
+
# Add reasoning content (thinking) first if present
|
|
385
|
+
reasoning_content = getattr(message, "reasoning_content", None)
|
|
386
|
+
if reasoning_content:
|
|
387
|
+
content.append(
|
|
388
|
+
BetaThinkingBlock(
|
|
389
|
+
type="thinking",
|
|
390
|
+
thinking=reasoning_content,
|
|
391
|
+
signature="", # Signature not available from OpenAI format
|
|
392
|
+
)
|
|
393
|
+
)
|
|
394
|
+
|
|
370
395
|
# Add text content if present
|
|
371
396
|
if message.content:
|
|
372
397
|
if isinstance(message.content, str):
|
|
@@ -375,16 +400,20 @@ def convert_openai_to_anthropic(
|
|
|
375
400
|
for part in message.content:
|
|
376
401
|
if part.type == "text":
|
|
377
402
|
content.append(TextBlock(type="text", text=part.text))
|
|
378
|
-
|
|
403
|
+
|
|
379
404
|
# Convert tool calls
|
|
380
405
|
if message.tool_calls:
|
|
381
406
|
for tc in message.tool_calls:
|
|
407
|
+
# Handle case where function might be None
|
|
408
|
+
if not tc.function:
|
|
409
|
+
continue
|
|
410
|
+
|
|
382
411
|
tool_input: dict[str, Any] = {}
|
|
383
412
|
try:
|
|
384
413
|
tool_input = json.loads(tc.function.arguments)
|
|
385
414
|
except json.JSONDecodeError:
|
|
386
415
|
tool_input = {"raw": tc.function.arguments}
|
|
387
|
-
|
|
416
|
+
|
|
388
417
|
content.append(
|
|
389
418
|
ToolUseBlock(
|
|
390
419
|
type="tool_use",
|
|
@@ -393,7 +422,7 @@ def convert_openai_to_anthropic(
|
|
|
393
422
|
input=tool_input,
|
|
394
423
|
)
|
|
395
424
|
)
|
|
396
|
-
|
|
425
|
+
|
|
397
426
|
# Determine stop reason
|
|
398
427
|
stop_reason_map = {
|
|
399
428
|
"stop": "end_turn",
|
|
@@ -401,18 +430,24 @@ def convert_openai_to_anthropic(
|
|
|
401
430
|
"tool_calls": "tool_use",
|
|
402
431
|
"content_filter": "end_turn",
|
|
403
432
|
}
|
|
404
|
-
anthropic_stop_reason = stop_reason_map.get(
|
|
405
|
-
|
|
433
|
+
anthropic_stop_reason = stop_reason_map.get(
|
|
434
|
+
choice.finish_reason or "stop", "end_turn"
|
|
435
|
+
)
|
|
436
|
+
|
|
406
437
|
# Build usage dict with cache support (if available from upstream)
|
|
407
438
|
usage_dict = None
|
|
408
439
|
if completion.usage:
|
|
409
440
|
usage_dict = {
|
|
410
441
|
"input_tokens": completion.usage.prompt_tokens,
|
|
411
442
|
"output_tokens": completion.usage.completion_tokens,
|
|
412
|
-
"cache_creation_input_tokens": getattr(
|
|
413
|
-
|
|
443
|
+
"cache_creation_input_tokens": getattr(
|
|
444
|
+
completion.usage, "cache_creation_input_tokens", None
|
|
445
|
+
),
|
|
446
|
+
"cache_read_input_tokens": getattr(
|
|
447
|
+
completion.usage, "cache_read_input_tokens", None
|
|
448
|
+
),
|
|
414
449
|
}
|
|
415
|
-
|
|
450
|
+
|
|
416
451
|
# Build message dict to avoid Pydantic validation issues
|
|
417
452
|
message_dict = {
|
|
418
453
|
"id": completion.id,
|
|
@@ -424,183 +459,5 @@ def convert_openai_to_anthropic(
|
|
|
424
459
|
"stop_sequence": None,
|
|
425
460
|
"usage": usage_dict,
|
|
426
461
|
}
|
|
427
|
-
|
|
428
|
-
return Message.model_validate(message_dict)
|
|
429
462
|
|
|
430
|
-
|
|
431
|
-
async def convert_openai_stream_to_anthropic(
|
|
432
|
-
stream: AsyncGenerator[ChatCompletionChunk, None],
|
|
433
|
-
model: str,
|
|
434
|
-
enable_ping: bool = False,
|
|
435
|
-
ping_interval: float = 15.0,
|
|
436
|
-
) -> AsyncGenerator[dict, None]:
|
|
437
|
-
"""
|
|
438
|
-
Convert OpenAI streaming response to Anthropic streaming events.
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
stream: OpenAI chat completion stream
|
|
442
|
-
model: Model name
|
|
443
|
-
enable_ping: Whether to send periodic ping events
|
|
444
|
-
ping_interval: Interval between ping events in seconds
|
|
445
|
-
|
|
446
|
-
Yields:
|
|
447
|
-
Anthropic MessageStreamEvent objects as dicts
|
|
448
|
-
"""
|
|
449
|
-
message_id = f"msg_{int(time.time() * 1000)}"
|
|
450
|
-
first_chunk = True
|
|
451
|
-
content_block_started = False
|
|
452
|
-
content_block_index = 0
|
|
453
|
-
current_tool_call: Optional[dict[str, Any]] = None
|
|
454
|
-
finish_reason: Optional[str] = None
|
|
455
|
-
|
|
456
|
-
# Track usage for final message_delta
|
|
457
|
-
input_tokens = 0
|
|
458
|
-
output_tokens = 0
|
|
459
|
-
|
|
460
|
-
last_ping_time = time.time()
|
|
461
|
-
|
|
462
|
-
async for chunk in stream:
|
|
463
|
-
# Send ping events if enabled and interval has passed
|
|
464
|
-
if enable_ping:
|
|
465
|
-
current_time = time.time()
|
|
466
|
-
if current_time - last_ping_time >= ping_interval:
|
|
467
|
-
yield {"type": "ping"}
|
|
468
|
-
last_ping_time = current_time
|
|
469
|
-
|
|
470
|
-
# First chunk: message_start event
|
|
471
|
-
if first_chunk:
|
|
472
|
-
if chunk.usage:
|
|
473
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
474
|
-
output_tokens = chunk.usage.completion_tokens
|
|
475
|
-
|
|
476
|
-
yield {
|
|
477
|
-
"type": "message_start",
|
|
478
|
-
"message": {
|
|
479
|
-
"id": message_id,
|
|
480
|
-
"type": "message",
|
|
481
|
-
"role": "assistant",
|
|
482
|
-
"content": [],
|
|
483
|
-
"model": model,
|
|
484
|
-
"stop_reason": None,
|
|
485
|
-
"stop_sequence": None,
|
|
486
|
-
"usage": {
|
|
487
|
-
"input_tokens": input_tokens,
|
|
488
|
-
"output_tokens": 0,
|
|
489
|
-
"cache_creation_input_tokens": None,
|
|
490
|
-
"cache_read_input_tokens": None,
|
|
491
|
-
},
|
|
492
|
-
},
|
|
493
|
-
}
|
|
494
|
-
first_chunk = False
|
|
495
|
-
continue
|
|
496
|
-
|
|
497
|
-
# Handle usage-only chunks (last chunk)
|
|
498
|
-
if not chunk.choices:
|
|
499
|
-
if chunk.usage:
|
|
500
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
501
|
-
output_tokens = chunk.usage.completion_tokens
|
|
502
|
-
|
|
503
|
-
# Close any open content block
|
|
504
|
-
if content_block_started:
|
|
505
|
-
yield {
|
|
506
|
-
"type": "content_block_stop",
|
|
507
|
-
"index": content_block_index,
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
# Message delta with final usage
|
|
511
|
-
stop_reason_map = {
|
|
512
|
-
"stop": "end_turn",
|
|
513
|
-
"length": "max_tokens",
|
|
514
|
-
"tool_calls": "tool_use",
|
|
515
|
-
}
|
|
516
|
-
yield {
|
|
517
|
-
"type": "message_delta",
|
|
518
|
-
"delta": {
|
|
519
|
-
"stop_reason": stop_reason_map.get(finish_reason or "stop", "end_turn"),
|
|
520
|
-
},
|
|
521
|
-
"usage": {
|
|
522
|
-
"input_tokens": input_tokens,
|
|
523
|
-
"output_tokens": output_tokens,
|
|
524
|
-
"cache_creation_input_tokens": getattr(chunk.usage, "cache_creation_input_tokens", None),
|
|
525
|
-
"cache_read_input_tokens": getattr(chunk.usage, "cache_read_input_tokens", None),
|
|
526
|
-
},
|
|
527
|
-
}
|
|
528
|
-
continue
|
|
529
|
-
|
|
530
|
-
choice = chunk.choices[0]
|
|
531
|
-
delta = choice.delta
|
|
532
|
-
|
|
533
|
-
# Track finish reason
|
|
534
|
-
if choice.finish_reason:
|
|
535
|
-
finish_reason = choice.finish_reason
|
|
536
|
-
continue
|
|
537
|
-
|
|
538
|
-
# Handle content
|
|
539
|
-
if delta.content:
|
|
540
|
-
if not content_block_started:
|
|
541
|
-
# Start text content block
|
|
542
|
-
yield {
|
|
543
|
-
"type": "content_block_start",
|
|
544
|
-
"index": content_block_index,
|
|
545
|
-
"content_block": {"type": "text", "text": ""},
|
|
546
|
-
}
|
|
547
|
-
content_block_started = True
|
|
548
|
-
|
|
549
|
-
if delta.content:
|
|
550
|
-
yield {
|
|
551
|
-
"type": "content_block_delta",
|
|
552
|
-
"index": content_block_index,
|
|
553
|
-
"delta": {"type": "text_delta", "text": delta.content},
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
# Handle tool calls
|
|
557
|
-
if delta.tool_calls:
|
|
558
|
-
tool_call = delta.tool_calls[0]
|
|
559
|
-
|
|
560
|
-
if tool_call.id:
|
|
561
|
-
# Close previous content block if any
|
|
562
|
-
if content_block_started:
|
|
563
|
-
yield {
|
|
564
|
-
"type": "content_block_stop",
|
|
565
|
-
"index": content_block_index,
|
|
566
|
-
}
|
|
567
|
-
content_block_started = False
|
|
568
|
-
content_block_index += 1
|
|
569
|
-
|
|
570
|
-
# Start new tool_use block
|
|
571
|
-
current_tool_call = {
|
|
572
|
-
"id": tool_call.id,
|
|
573
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
574
|
-
"arguments": "",
|
|
575
|
-
}
|
|
576
|
-
yield {
|
|
577
|
-
"type": "content_block_start",
|
|
578
|
-
"index": content_block_index,
|
|
579
|
-
"content_block": {
|
|
580
|
-
"type": "tool_use",
|
|
581
|
-
"id": tool_call.id,
|
|
582
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
583
|
-
"input": {},
|
|
584
|
-
},
|
|
585
|
-
}
|
|
586
|
-
content_block_started = True
|
|
587
|
-
|
|
588
|
-
elif tool_call.function and tool_call.function.arguments:
|
|
589
|
-
# Continue tool call arguments
|
|
590
|
-
args = tool_call.function.arguments
|
|
591
|
-
current_tool_call["arguments"] += args
|
|
592
|
-
yield {
|
|
593
|
-
"type": "content_block_delta",
|
|
594
|
-
"index": content_block_index,
|
|
595
|
-
"delta": {"type": "input_json_delta", "partial_json": args},
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
# Close final content block
|
|
599
|
-
if content_block_started:
|
|
600
|
-
yield {
|
|
601
|
-
"type": "content_block_stop",
|
|
602
|
-
"index": content_block_index,
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
# Message stop event
|
|
606
|
-
yield {"type": "message_stop"}
|
|
463
|
+
return Message.model_validate(message_dict)
|