local-openai2anthropic 0.2.0__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/.gitignore +1 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/PKG-INFO +1 -1
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/pyproject.toml +1 -1
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/__init__.py +1 -1
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/config.py +1 -1
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/converter.py +28 -193
- local_openai2anthropic-0.2.3/src/local_openai2anthropic/openai_types.py +149 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/router.py +75 -16
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/tests/test_converter.py +3 -3
- local_openai2anthropic-0.2.3/tests/test_upstream.sh +79 -0
- local_openai2anthropic-0.2.3/uv.lock +963 -0
- local_openai2anthropic-0.2.0/.env.example +0 -39
- local_openai2anthropic-0.2.0/debug_request.py +0 -38
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/.github/workflows/publish.yml +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/LICENSE +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/README.md +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/README_zh.md +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/basic_chat.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/streaming.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/thinking_mode.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/tool_calling.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/vision.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/examples/web_search.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/__main__.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/daemon.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/daemon_runner.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/main.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/protocol.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/__init__.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/base.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/web_search.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/tavily_client.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/tests/__init__.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/tests/test_integration.py +0 -0
- {local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/tests/test_router.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: local-openai2anthropic
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
|
|
5
5
|
Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
|
|
6
6
|
Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
|
|
@@ -5,23 +5,13 @@ Core conversion logic between Anthropic and OpenAI formats.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import
|
|
9
|
-
from typing import Any, AsyncGenerator, Optional
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
8
|
+
from typing import Any, Optional
|
|
12
9
|
|
|
13
10
|
from anthropic.types import (
|
|
14
11
|
ContentBlock,
|
|
15
|
-
ContentBlockDeltaEvent,
|
|
16
|
-
ContentBlockStartEvent,
|
|
17
|
-
ContentBlockStopEvent,
|
|
18
12
|
Message,
|
|
19
|
-
MessageDeltaEvent,
|
|
20
13
|
MessageParam,
|
|
21
|
-
MessageStartEvent,
|
|
22
|
-
MessageStopEvent,
|
|
23
14
|
TextBlock,
|
|
24
|
-
TextDelta,
|
|
25
15
|
ToolUseBlock,
|
|
26
16
|
)
|
|
27
17
|
from anthropic.types.message_create_params import MessageCreateParams
|
|
@@ -175,11 +165,15 @@ def convert_anthropic_to_openai(
|
|
|
175
165
|
|
|
176
166
|
# Handle thinking parameter
|
|
177
167
|
# vLLM/SGLang use chat_template_kwargs.thinking to toggle thinking mode
|
|
168
|
+
# Some models use "thinking", others use "enable_thinking", so we include both
|
|
178
169
|
if thinking and isinstance(thinking, dict):
|
|
179
170
|
thinking_type = thinking.get("type")
|
|
180
171
|
if thinking_type == "enabled":
|
|
181
|
-
# Enable thinking mode for
|
|
182
|
-
params["chat_template_kwargs"] = {
|
|
172
|
+
# Enable thinking mode - include both variants for compatibility
|
|
173
|
+
params["chat_template_kwargs"] = {
|
|
174
|
+
"thinking": True,
|
|
175
|
+
"enable_thinking": True,
|
|
176
|
+
}
|
|
183
177
|
|
|
184
178
|
# Log if budget_tokens was provided but will be ignored
|
|
185
179
|
budget_tokens = thinking.get("budget_tokens")
|
|
@@ -191,10 +185,16 @@ def convert_anthropic_to_openai(
|
|
|
191
185
|
)
|
|
192
186
|
else:
|
|
193
187
|
# Default to disabled thinking mode if not explicitly enabled
|
|
194
|
-
params["chat_template_kwargs"] = {
|
|
188
|
+
params["chat_template_kwargs"] = {
|
|
189
|
+
"thinking": False,
|
|
190
|
+
"enable_thinking": False,
|
|
191
|
+
}
|
|
195
192
|
else:
|
|
196
193
|
# Default to disabled thinking mode when thinking is not provided
|
|
197
|
-
params["chat_template_kwargs"] = {
|
|
194
|
+
params["chat_template_kwargs"] = {
|
|
195
|
+
"thinking": False,
|
|
196
|
+
"enable_thinking": False,
|
|
197
|
+
}
|
|
198
198
|
|
|
199
199
|
# Store server tool configs for later use by router
|
|
200
200
|
if server_tools_config:
|
|
@@ -361,12 +361,25 @@ def convert_openai_to_anthropic(
|
|
|
361
361
|
Returns:
|
|
362
362
|
Anthropic Message response
|
|
363
363
|
"""
|
|
364
|
+
from anthropic.types.beta import BetaThinkingBlock
|
|
365
|
+
|
|
364
366
|
choice = completion.choices[0]
|
|
365
367
|
message = choice.message
|
|
366
368
|
|
|
367
369
|
# Convert content blocks
|
|
368
370
|
content: list[ContentBlock] = []
|
|
369
371
|
|
|
372
|
+
# Add reasoning content (thinking) first if present
|
|
373
|
+
reasoning_content = getattr(message, 'reasoning_content', None)
|
|
374
|
+
if reasoning_content:
|
|
375
|
+
content.append(
|
|
376
|
+
BetaThinkingBlock(
|
|
377
|
+
type="thinking",
|
|
378
|
+
thinking=reasoning_content,
|
|
379
|
+
signature="", # Signature not available from OpenAI format
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
370
383
|
# Add text content if present
|
|
371
384
|
if message.content:
|
|
372
385
|
if isinstance(message.content, str):
|
|
@@ -426,181 +439,3 @@ def convert_openai_to_anthropic(
|
|
|
426
439
|
}
|
|
427
440
|
|
|
428
441
|
return Message.model_validate(message_dict)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
async def convert_openai_stream_to_anthropic(
|
|
432
|
-
stream: AsyncGenerator[ChatCompletionChunk, None],
|
|
433
|
-
model: str,
|
|
434
|
-
enable_ping: bool = False,
|
|
435
|
-
ping_interval: float = 15.0,
|
|
436
|
-
) -> AsyncGenerator[dict, None]:
|
|
437
|
-
"""
|
|
438
|
-
Convert OpenAI streaming response to Anthropic streaming events.
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
stream: OpenAI chat completion stream
|
|
442
|
-
model: Model name
|
|
443
|
-
enable_ping: Whether to send periodic ping events
|
|
444
|
-
ping_interval: Interval between ping events in seconds
|
|
445
|
-
|
|
446
|
-
Yields:
|
|
447
|
-
Anthropic MessageStreamEvent objects as dicts
|
|
448
|
-
"""
|
|
449
|
-
message_id = f"msg_{int(time.time() * 1000)}"
|
|
450
|
-
first_chunk = True
|
|
451
|
-
content_block_started = False
|
|
452
|
-
content_block_index = 0
|
|
453
|
-
current_tool_call: Optional[dict[str, Any]] = None
|
|
454
|
-
finish_reason: Optional[str] = None
|
|
455
|
-
|
|
456
|
-
# Track usage for final message_delta
|
|
457
|
-
input_tokens = 0
|
|
458
|
-
output_tokens = 0
|
|
459
|
-
|
|
460
|
-
last_ping_time = time.time()
|
|
461
|
-
|
|
462
|
-
async for chunk in stream:
|
|
463
|
-
# Send ping events if enabled and interval has passed
|
|
464
|
-
if enable_ping:
|
|
465
|
-
current_time = time.time()
|
|
466
|
-
if current_time - last_ping_time >= ping_interval:
|
|
467
|
-
yield {"type": "ping"}
|
|
468
|
-
last_ping_time = current_time
|
|
469
|
-
|
|
470
|
-
# First chunk: message_start event
|
|
471
|
-
if first_chunk:
|
|
472
|
-
if chunk.usage:
|
|
473
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
474
|
-
output_tokens = chunk.usage.completion_tokens
|
|
475
|
-
|
|
476
|
-
yield {
|
|
477
|
-
"type": "message_start",
|
|
478
|
-
"message": {
|
|
479
|
-
"id": message_id,
|
|
480
|
-
"type": "message",
|
|
481
|
-
"role": "assistant",
|
|
482
|
-
"content": [],
|
|
483
|
-
"model": model,
|
|
484
|
-
"stop_reason": None,
|
|
485
|
-
"stop_sequence": None,
|
|
486
|
-
"usage": {
|
|
487
|
-
"input_tokens": input_tokens,
|
|
488
|
-
"output_tokens": 0,
|
|
489
|
-
"cache_creation_input_tokens": None,
|
|
490
|
-
"cache_read_input_tokens": None,
|
|
491
|
-
},
|
|
492
|
-
},
|
|
493
|
-
}
|
|
494
|
-
first_chunk = False
|
|
495
|
-
continue
|
|
496
|
-
|
|
497
|
-
# Handle usage-only chunks (last chunk)
|
|
498
|
-
if not chunk.choices:
|
|
499
|
-
if chunk.usage:
|
|
500
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
501
|
-
output_tokens = chunk.usage.completion_tokens
|
|
502
|
-
|
|
503
|
-
# Close any open content block
|
|
504
|
-
if content_block_started:
|
|
505
|
-
yield {
|
|
506
|
-
"type": "content_block_stop",
|
|
507
|
-
"index": content_block_index,
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
# Message delta with final usage
|
|
511
|
-
stop_reason_map = {
|
|
512
|
-
"stop": "end_turn",
|
|
513
|
-
"length": "max_tokens",
|
|
514
|
-
"tool_calls": "tool_use",
|
|
515
|
-
}
|
|
516
|
-
yield {
|
|
517
|
-
"type": "message_delta",
|
|
518
|
-
"delta": {
|
|
519
|
-
"stop_reason": stop_reason_map.get(finish_reason or "stop", "end_turn"),
|
|
520
|
-
},
|
|
521
|
-
"usage": {
|
|
522
|
-
"input_tokens": input_tokens,
|
|
523
|
-
"output_tokens": output_tokens,
|
|
524
|
-
"cache_creation_input_tokens": getattr(chunk.usage, "cache_creation_input_tokens", None),
|
|
525
|
-
"cache_read_input_tokens": getattr(chunk.usage, "cache_read_input_tokens", None),
|
|
526
|
-
},
|
|
527
|
-
}
|
|
528
|
-
continue
|
|
529
|
-
|
|
530
|
-
choice = chunk.choices[0]
|
|
531
|
-
delta = choice.delta
|
|
532
|
-
|
|
533
|
-
# Track finish reason
|
|
534
|
-
if choice.finish_reason:
|
|
535
|
-
finish_reason = choice.finish_reason
|
|
536
|
-
continue
|
|
537
|
-
|
|
538
|
-
# Handle content
|
|
539
|
-
if delta.content:
|
|
540
|
-
if not content_block_started:
|
|
541
|
-
# Start text content block
|
|
542
|
-
yield {
|
|
543
|
-
"type": "content_block_start",
|
|
544
|
-
"index": content_block_index,
|
|
545
|
-
"content_block": {"type": "text", "text": ""},
|
|
546
|
-
}
|
|
547
|
-
content_block_started = True
|
|
548
|
-
|
|
549
|
-
if delta.content:
|
|
550
|
-
yield {
|
|
551
|
-
"type": "content_block_delta",
|
|
552
|
-
"index": content_block_index,
|
|
553
|
-
"delta": {"type": "text_delta", "text": delta.content},
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
# Handle tool calls
|
|
557
|
-
if delta.tool_calls:
|
|
558
|
-
tool_call = delta.tool_calls[0]
|
|
559
|
-
|
|
560
|
-
if tool_call.id:
|
|
561
|
-
# Close previous content block if any
|
|
562
|
-
if content_block_started:
|
|
563
|
-
yield {
|
|
564
|
-
"type": "content_block_stop",
|
|
565
|
-
"index": content_block_index,
|
|
566
|
-
}
|
|
567
|
-
content_block_started = False
|
|
568
|
-
content_block_index += 1
|
|
569
|
-
|
|
570
|
-
# Start new tool_use block
|
|
571
|
-
current_tool_call = {
|
|
572
|
-
"id": tool_call.id,
|
|
573
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
574
|
-
"arguments": "",
|
|
575
|
-
}
|
|
576
|
-
yield {
|
|
577
|
-
"type": "content_block_start",
|
|
578
|
-
"index": content_block_index,
|
|
579
|
-
"content_block": {
|
|
580
|
-
"type": "tool_use",
|
|
581
|
-
"id": tool_call.id,
|
|
582
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
583
|
-
"input": {},
|
|
584
|
-
},
|
|
585
|
-
}
|
|
586
|
-
content_block_started = True
|
|
587
|
-
|
|
588
|
-
elif tool_call.function and tool_call.function.arguments:
|
|
589
|
-
# Continue tool call arguments
|
|
590
|
-
args = tool_call.function.arguments
|
|
591
|
-
current_tool_call["arguments"] += args
|
|
592
|
-
yield {
|
|
593
|
-
"type": "content_block_delta",
|
|
594
|
-
"index": content_block_index,
|
|
595
|
-
"delta": {"type": "input_json_delta", "partial_json": args},
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
# Close final content block
|
|
599
|
-
if content_block_started:
|
|
600
|
-
yield {
|
|
601
|
-
"type": "content_block_stop",
|
|
602
|
-
"index": content_block_index,
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
# Message stop event
|
|
606
|
-
yield {"type": "message_stop"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""
|
|
3
|
+
OpenAI API type definitions for compatibility with vLLM/SGLang responses.
|
|
4
|
+
|
|
5
|
+
This module defines Pydantic models compatible with OpenAI API responses,
|
|
6
|
+
these models support additional fields like `reasoning_content` that are
|
|
7
|
+
returned by vLLM/SGLang but not present in the official OpenAI SDK.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Literal, Optional, TypedDict
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# TypedDict types for parameters (used as dict in code)
|
|
16
|
+
class ChatCompletionToolFunction(TypedDict):
|
|
17
|
+
"""Function definition for a tool."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
description: str
|
|
21
|
+
parameters: dict[str, Any]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ChatCompletionToolParam(TypedDict):
|
|
25
|
+
"""Tool parameter for chat completion."""
|
|
26
|
+
|
|
27
|
+
type: Literal["function"]
|
|
28
|
+
function: ChatCompletionToolFunction
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CompletionCreateParams(TypedDict, total=False):
|
|
32
|
+
"""Parameters for creating a chat completion."""
|
|
33
|
+
|
|
34
|
+
model: str
|
|
35
|
+
messages: list[dict[str, Any]]
|
|
36
|
+
max_tokens: int
|
|
37
|
+
temperature: float
|
|
38
|
+
top_p: float
|
|
39
|
+
top_k: int
|
|
40
|
+
stream: bool
|
|
41
|
+
stop: list[str]
|
|
42
|
+
tools: list[ChatCompletionToolParam]
|
|
43
|
+
tool_choice: str | dict[str, Any]
|
|
44
|
+
stream_options: dict[str, Any]
|
|
45
|
+
# Additional fields for vLLM/SGLang compatibility
|
|
46
|
+
chat_template_kwargs: dict[str, Any]
|
|
47
|
+
# Internal field for server tools config
|
|
48
|
+
_server_tools_config: dict[str, dict[str, Any]]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Pydantic models for API responses
|
|
52
|
+
class Function(BaseModel):
|
|
53
|
+
"""A function call."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
arguments: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ChatCompletionMessageToolCall(BaseModel):
|
|
60
|
+
"""A tool call in a chat completion message."""
|
|
61
|
+
|
|
62
|
+
id: str
|
|
63
|
+
type: str = "function"
|
|
64
|
+
function: Function
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChatCompletionMessage(BaseModel):
|
|
68
|
+
"""A chat completion message."""
|
|
69
|
+
|
|
70
|
+
role: str
|
|
71
|
+
content: Optional[str] = None
|
|
72
|
+
tool_calls: Optional[list[ChatCompletionMessageToolCall]] = None
|
|
73
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
74
|
+
reasoning_content: Optional[str] = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Choice(BaseModel):
|
|
78
|
+
"""A choice in a chat completion response."""
|
|
79
|
+
|
|
80
|
+
index: int = 0
|
|
81
|
+
message: ChatCompletionMessage
|
|
82
|
+
finish_reason: Optional[str] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class FunctionDelta(BaseModel):
|
|
86
|
+
"""A function call delta."""
|
|
87
|
+
|
|
88
|
+
name: Optional[str] = None
|
|
89
|
+
arguments: Optional[str] = None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ChatCompletionDeltaToolCall(BaseModel):
|
|
93
|
+
"""A tool call delta in a streaming response."""
|
|
94
|
+
|
|
95
|
+
index: int = 0
|
|
96
|
+
id: Optional[str] = None
|
|
97
|
+
type: Optional[str] = None
|
|
98
|
+
function: Optional[FunctionDelta] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ChoiceDelta(BaseModel):
|
|
102
|
+
"""A delta in a streaming chat completion response."""
|
|
103
|
+
|
|
104
|
+
role: Optional[str] = None
|
|
105
|
+
content: Optional[str] = None
|
|
106
|
+
tool_calls: Optional[list[ChatCompletionDeltaToolCall]] = None
|
|
107
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
108
|
+
reasoning_content: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class StreamingChoice(BaseModel):
|
|
112
|
+
"""A choice in a streaming chat completion response."""
|
|
113
|
+
|
|
114
|
+
index: int = 0
|
|
115
|
+
delta: ChoiceDelta
|
|
116
|
+
finish_reason: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CompletionUsage(BaseModel):
|
|
120
|
+
"""Usage statistics for a completion request."""
|
|
121
|
+
|
|
122
|
+
prompt_tokens: int
|
|
123
|
+
completion_tokens: int
|
|
124
|
+
total_tokens: int
|
|
125
|
+
# Optional cache-related fields
|
|
126
|
+
cache_creation_input_tokens: Optional[int] = None
|
|
127
|
+
cache_read_input_tokens: Optional[int] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ChatCompletion(BaseModel):
|
|
131
|
+
"""A chat completion response."""
|
|
132
|
+
|
|
133
|
+
id: str
|
|
134
|
+
object: str = "chat.completion"
|
|
135
|
+
created: int
|
|
136
|
+
model: str
|
|
137
|
+
choices: list[Choice]
|
|
138
|
+
usage: Optional[CompletionUsage] = None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ChatCompletionChunk(BaseModel):
|
|
142
|
+
"""A chunk in a streaming chat completion response."""
|
|
143
|
+
|
|
144
|
+
id: str
|
|
145
|
+
object: str = "chat.completion.chunk"
|
|
146
|
+
created: int
|
|
147
|
+
model: str
|
|
148
|
+
choices: list[StreamingChoice]
|
|
149
|
+
usage: Optional[CompletionUsage] = None
|
{local_openai2anthropic-0.2.0 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/router.py
RENAMED
|
@@ -82,6 +82,7 @@ async def _stream_response(
|
|
|
82
82
|
first_chunk = True
|
|
83
83
|
content_block_started = False
|
|
84
84
|
content_block_index = 0
|
|
85
|
+
current_block_type = None # 'thinking', 'text', or 'tool_use'
|
|
85
86
|
finish_reason = None
|
|
86
87
|
input_tokens = 0
|
|
87
88
|
output_tokens = 0
|
|
@@ -97,13 +98,14 @@ async def _stream_response(
|
|
|
97
98
|
|
|
98
99
|
try:
|
|
99
100
|
chunk = json.loads(data)
|
|
101
|
+
logger.debug(f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}")
|
|
100
102
|
except json.JSONDecodeError:
|
|
101
103
|
continue
|
|
102
104
|
|
|
103
105
|
# First chunk: message_start
|
|
104
106
|
if first_chunk:
|
|
105
107
|
message_id = chunk.get("id", "")
|
|
106
|
-
usage = chunk.get("usage"
|
|
108
|
+
usage = chunk.get("usage") or {}
|
|
107
109
|
input_tokens = usage.get("prompt_tokens", 0)
|
|
108
110
|
|
|
109
111
|
start_event = {
|
|
@@ -124,37 +126,70 @@ async def _stream_response(
|
|
|
124
126
|
},
|
|
125
127
|
},
|
|
126
128
|
}
|
|
129
|
+
logger.debug(f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}")
|
|
127
130
|
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
128
131
|
first_chunk = False
|
|
129
132
|
continue
|
|
130
133
|
|
|
131
134
|
# Handle usage-only chunks
|
|
132
135
|
if not chunk.get("choices"):
|
|
133
|
-
usage = chunk.get("usage"
|
|
136
|
+
usage = chunk.get("usage") or {}
|
|
134
137
|
if usage:
|
|
135
138
|
if content_block_started:
|
|
136
139
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
137
140
|
content_block_started = False
|
|
138
141
|
|
|
139
142
|
stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
|
|
140
|
-
|
|
143
|
+
delta_event = {'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}}
|
|
144
|
+
logger.debug(f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}")
|
|
145
|
+
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
141
146
|
continue
|
|
142
147
|
|
|
143
148
|
choice = chunk["choices"][0]
|
|
144
149
|
delta = choice.get("delta", {})
|
|
145
150
|
|
|
146
|
-
# Track finish reason
|
|
151
|
+
# Track finish reason (but don't skip - content may also be present)
|
|
147
152
|
if choice.get("finish_reason"):
|
|
148
153
|
finish_reason = choice["finish_reason"]
|
|
154
|
+
|
|
155
|
+
# Handle reasoning content (thinking)
|
|
156
|
+
if delta.get("reasoning_content"):
|
|
157
|
+
reasoning = delta["reasoning_content"]
|
|
158
|
+
# Start thinking content block if not already started
|
|
159
|
+
if not content_block_started or current_block_type != 'thinking':
|
|
160
|
+
# Close previous block if exists
|
|
161
|
+
if content_block_started:
|
|
162
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
163
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
164
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
165
|
+
content_block_index += 1
|
|
166
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'thinking', 'thinking': ''}}
|
|
167
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}")
|
|
168
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
169
|
+
content_block_started = True
|
|
170
|
+
current_block_type = 'thinking'
|
|
171
|
+
|
|
172
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'thinking_delta', 'thinking': reasoning}}
|
|
173
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
149
174
|
continue
|
|
150
175
|
|
|
151
176
|
# Handle content
|
|
152
177
|
if delta.get("content"):
|
|
153
|
-
if not content_block_started:
|
|
154
|
-
|
|
178
|
+
if not content_block_started or current_block_type != 'text':
|
|
179
|
+
# Close previous block if exists
|
|
180
|
+
if content_block_started:
|
|
181
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
182
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
183
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
184
|
+
content_block_index += 1
|
|
185
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}}
|
|
186
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}")
|
|
187
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
155
188
|
content_block_started = True
|
|
189
|
+
current_block_type = 'text'
|
|
156
190
|
|
|
157
|
-
|
|
191
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}}
|
|
192
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
158
193
|
|
|
159
194
|
# Handle tool calls
|
|
160
195
|
if delta.get("tool_calls"):
|
|
@@ -166,27 +201,34 @@ async def _stream_response(
|
|
|
166
201
|
content_block_started = False
|
|
167
202
|
content_block_index += 1
|
|
168
203
|
|
|
169
|
-
|
|
204
|
+
func = tool_call.get('function') or {}
|
|
205
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
170
206
|
content_block_started = True
|
|
207
|
+
current_block_type = 'tool_use'
|
|
171
208
|
|
|
172
|
-
elif tool_call.get(
|
|
173
|
-
args = tool_call
|
|
209
|
+
elif (tool_call.get('function') or {}).get("arguments"):
|
|
210
|
+
args = (tool_call.get('function') or {}).get("arguments", "")
|
|
174
211
|
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
175
212
|
|
|
176
213
|
# Close final content block
|
|
177
214
|
if content_block_started:
|
|
178
|
-
|
|
215
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
216
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
217
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
179
218
|
|
|
180
219
|
# Message stop
|
|
181
|
-
|
|
182
|
-
|
|
220
|
+
stop_event = {'type': 'message_stop'}
|
|
221
|
+
logger.debug(f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}")
|
|
222
|
+
yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
|
|
183
223
|
|
|
184
224
|
except Exception as e:
|
|
225
|
+
import traceback
|
|
226
|
+
error_msg = f"{str(e)}\n{traceback.format_exc()}"
|
|
227
|
+
logger.error(f"Stream error: {error_msg}")
|
|
185
228
|
error_event = AnthropicErrorResponse(
|
|
186
229
|
error=AnthropicError(type="internal_error", message=str(e))
|
|
187
230
|
)
|
|
188
231
|
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
189
|
-
yield "data: [DONE]\n\n"
|
|
190
232
|
|
|
191
233
|
|
|
192
234
|
async def _convert_result_to_stream(
|
|
@@ -255,6 +297,14 @@ async def _convert_result_to_stream(
|
|
|
255
297
|
|
|
256
298
|
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
257
299
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
300
|
+
|
|
301
|
+
elif block_type == "thinking":
|
|
302
|
+
# Handle thinking blocks (BetaThinkingBlock)
|
|
303
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
|
|
304
|
+
thinking_text = block.get("thinking", "")
|
|
305
|
+
if thinking_text:
|
|
306
|
+
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
|
|
307
|
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
258
308
|
|
|
259
309
|
# 3. message_delta with final usage
|
|
260
310
|
delta_event = {
|
|
@@ -272,7 +322,6 @@ async def _convert_result_to_stream(
|
|
|
272
322
|
|
|
273
323
|
# 4. message_stop
|
|
274
324
|
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
275
|
-
yield "data: [DONE]\n\n"
|
|
276
325
|
|
|
277
326
|
|
|
278
327
|
class ServerToolHandler:
|
|
@@ -562,6 +611,7 @@ async def create_message(
|
|
|
562
611
|
try:
|
|
563
612
|
body_bytes = await request.body()
|
|
564
613
|
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
614
|
+
logger.debug(f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}")
|
|
565
615
|
anthropic_params = body_json
|
|
566
616
|
except json.JSONDecodeError as e:
|
|
567
617
|
logger.error(f"Invalid JSON in request body: {e}")
|
|
@@ -618,6 +668,10 @@ async def create_message(
|
|
|
618
668
|
enabled_server_tools=enabled_server_tools if has_server_tools else None,
|
|
619
669
|
)
|
|
620
670
|
openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
|
|
671
|
+
|
|
672
|
+
# Log converted OpenAI request (remove internal fields)
|
|
673
|
+
log_params = {k: v for k, v in openai_params.items() if not k.startswith('_')}
|
|
674
|
+
logger.debug(f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}")
|
|
621
675
|
|
|
622
676
|
stream = openai_params.get("stream", False)
|
|
623
677
|
model = openai_params.get("model", "")
|
|
@@ -674,11 +728,16 @@ async def create_message(
|
|
|
674
728
|
)
|
|
675
729
|
|
|
676
730
|
openai_completion = response.json()
|
|
731
|
+
logger.debug(f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}")
|
|
732
|
+
|
|
677
733
|
from openai.types.chat import ChatCompletion
|
|
678
734
|
completion = ChatCompletion.model_validate(openai_completion)
|
|
679
735
|
anthropic_message = convert_openai_to_anthropic(completion, model)
|
|
736
|
+
|
|
737
|
+
anthropic_response = anthropic_message.model_dump()
|
|
738
|
+
logger.debug(f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}")
|
|
680
739
|
|
|
681
|
-
return JSONResponse(content=
|
|
740
|
+
return JSONResponse(content=anthropic_response)
|
|
682
741
|
|
|
683
742
|
except httpx.TimeoutException:
|
|
684
743
|
error_response = AnthropicErrorResponse(
|