local-openai2anthropic 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/config.py +1 -1
- local_openai2anthropic/converter.py +28 -220
- local_openai2anthropic/openai_types.py +149 -0
- local_openai2anthropic/router.py +163 -25
- {local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/METADATA +1 -1
- {local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/RECORD +10 -9
- {local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/licenses/LICENSE +0 -0
local_openai2anthropic/config.py
CHANGED
|
@@ -5,23 +5,13 @@ Core conversion logic between Anthropic and OpenAI formats.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import
|
|
9
|
-
from typing import Any, AsyncGenerator, Optional
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
8
|
+
from typing import Any, Optional
|
|
12
9
|
|
|
13
10
|
from anthropic.types import (
|
|
14
11
|
ContentBlock,
|
|
15
|
-
ContentBlockDeltaEvent,
|
|
16
|
-
ContentBlockStartEvent,
|
|
17
|
-
ContentBlockStopEvent,
|
|
18
12
|
Message,
|
|
19
|
-
MessageDeltaEvent,
|
|
20
13
|
MessageParam,
|
|
21
|
-
MessageStartEvent,
|
|
22
|
-
MessageStopEvent,
|
|
23
14
|
TextBlock,
|
|
24
|
-
TextDelta,
|
|
25
15
|
ToolUseBlock,
|
|
26
16
|
)
|
|
27
17
|
from anthropic.types.message_create_params import MessageCreateParams
|
|
@@ -175,11 +165,15 @@ def convert_anthropic_to_openai(
|
|
|
175
165
|
|
|
176
166
|
# Handle thinking parameter
|
|
177
167
|
# vLLM/SGLang use chat_template_kwargs.thinking to toggle thinking mode
|
|
168
|
+
# Some models use "thinking", others use "enable_thinking", so we include both
|
|
178
169
|
if thinking and isinstance(thinking, dict):
|
|
179
170
|
thinking_type = thinking.get("type")
|
|
180
171
|
if thinking_type == "enabled":
|
|
181
|
-
# Enable thinking mode for
|
|
182
|
-
params["chat_template_kwargs"] = {
|
|
172
|
+
# Enable thinking mode - include both variants for compatibility
|
|
173
|
+
params["chat_template_kwargs"] = {
|
|
174
|
+
"thinking": True,
|
|
175
|
+
"enable_thinking": True,
|
|
176
|
+
}
|
|
183
177
|
|
|
184
178
|
# Log if budget_tokens was provided but will be ignored
|
|
185
179
|
budget_tokens = thinking.get("budget_tokens")
|
|
@@ -191,10 +185,16 @@ def convert_anthropic_to_openai(
|
|
|
191
185
|
)
|
|
192
186
|
else:
|
|
193
187
|
# Default to disabled thinking mode if not explicitly enabled
|
|
194
|
-
params["chat_template_kwargs"] = {
|
|
188
|
+
params["chat_template_kwargs"] = {
|
|
189
|
+
"thinking": False,
|
|
190
|
+
"enable_thinking": False,
|
|
191
|
+
}
|
|
195
192
|
else:
|
|
196
193
|
# Default to disabled thinking mode when thinking is not provided
|
|
197
|
-
params["chat_template_kwargs"] = {
|
|
194
|
+
params["chat_template_kwargs"] = {
|
|
195
|
+
"thinking": False,
|
|
196
|
+
"enable_thinking": False,
|
|
197
|
+
}
|
|
198
198
|
|
|
199
199
|
# Store server tool configs for later use by router
|
|
200
200
|
if server_tools_config:
|
|
@@ -361,12 +361,25 @@ def convert_openai_to_anthropic(
|
|
|
361
361
|
Returns:
|
|
362
362
|
Anthropic Message response
|
|
363
363
|
"""
|
|
364
|
+
from anthropic.types.beta import BetaThinkingBlock
|
|
365
|
+
|
|
364
366
|
choice = completion.choices[0]
|
|
365
367
|
message = choice.message
|
|
366
368
|
|
|
367
369
|
# Convert content blocks
|
|
368
370
|
content: list[ContentBlock] = []
|
|
369
371
|
|
|
372
|
+
# Add reasoning content (thinking) first if present
|
|
373
|
+
reasoning_content = getattr(message, 'reasoning_content', None)
|
|
374
|
+
if reasoning_content:
|
|
375
|
+
content.append(
|
|
376
|
+
BetaThinkingBlock(
|
|
377
|
+
type="thinking",
|
|
378
|
+
thinking=reasoning_content,
|
|
379
|
+
signature="", # Signature not available from OpenAI format
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
370
383
|
# Add text content if present
|
|
371
384
|
if message.content:
|
|
372
385
|
if isinstance(message.content, str):
|
|
@@ -426,208 +439,3 @@ def convert_openai_to_anthropic(
|
|
|
426
439
|
}
|
|
427
440
|
|
|
428
441
|
return Message.model_validate(message_dict)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
async def convert_openai_stream_to_anthropic(
|
|
432
|
-
stream: AsyncGenerator[ChatCompletionChunk, None],
|
|
433
|
-
model: str,
|
|
434
|
-
enable_ping: bool = False,
|
|
435
|
-
ping_interval: float = 15.0,
|
|
436
|
-
) -> AsyncGenerator[dict, None]:
|
|
437
|
-
"""
|
|
438
|
-
Convert OpenAI streaming response to Anthropic streaming events.
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
stream: OpenAI chat completion stream
|
|
442
|
-
model: Model name
|
|
443
|
-
enable_ping: Whether to send periodic ping events
|
|
444
|
-
ping_interval: Interval between ping events in seconds
|
|
445
|
-
|
|
446
|
-
Yields:
|
|
447
|
-
Anthropic MessageStreamEvent objects as dicts
|
|
448
|
-
"""
|
|
449
|
-
message_id = f"msg_{int(time.time() * 1000)}"
|
|
450
|
-
first_chunk = True
|
|
451
|
-
content_block_started = False
|
|
452
|
-
content_block_index = 0
|
|
453
|
-
current_tool_call: Optional[dict[str, Any]] = None
|
|
454
|
-
finish_reason: Optional[str] = None
|
|
455
|
-
|
|
456
|
-
# Track usage for final message_delta
|
|
457
|
-
input_tokens = 0
|
|
458
|
-
output_tokens = 0
|
|
459
|
-
|
|
460
|
-
last_ping_time = time.time()
|
|
461
|
-
|
|
462
|
-
async for chunk in stream:
|
|
463
|
-
# Send ping events if enabled and interval has passed
|
|
464
|
-
if enable_ping:
|
|
465
|
-
current_time = time.time()
|
|
466
|
-
if current_time - last_ping_time >= ping_interval:
|
|
467
|
-
yield {"type": "ping"}
|
|
468
|
-
last_ping_time = current_time
|
|
469
|
-
|
|
470
|
-
# First chunk: message_start event
|
|
471
|
-
if first_chunk:
|
|
472
|
-
if chunk.usage:
|
|
473
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
474
|
-
output_tokens = chunk.usage.completion_tokens
|
|
475
|
-
|
|
476
|
-
yield {
|
|
477
|
-
"type": "message_start",
|
|
478
|
-
"message": {
|
|
479
|
-
"id": message_id,
|
|
480
|
-
"type": "message",
|
|
481
|
-
"role": "assistant",
|
|
482
|
-
"content": [],
|
|
483
|
-
"model": model,
|
|
484
|
-
"stop_reason": None,
|
|
485
|
-
"stop_sequence": None,
|
|
486
|
-
"usage": {
|
|
487
|
-
"input_tokens": input_tokens,
|
|
488
|
-
"output_tokens": 0,
|
|
489
|
-
"cache_creation_input_tokens": None,
|
|
490
|
-
"cache_read_input_tokens": None,
|
|
491
|
-
},
|
|
492
|
-
},
|
|
493
|
-
}
|
|
494
|
-
first_chunk = False
|
|
495
|
-
continue
|
|
496
|
-
|
|
497
|
-
# Handle usage-only chunks (last chunk)
|
|
498
|
-
if not chunk.choices:
|
|
499
|
-
if chunk.usage:
|
|
500
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
501
|
-
output_tokens = chunk.usage.completion_tokens
|
|
502
|
-
|
|
503
|
-
# Close any open content block
|
|
504
|
-
if content_block_started:
|
|
505
|
-
yield {
|
|
506
|
-
"type": "content_block_stop",
|
|
507
|
-
"index": content_block_index,
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
# Message delta with final usage
|
|
511
|
-
stop_reason_map = {
|
|
512
|
-
"stop": "end_turn",
|
|
513
|
-
"length": "max_tokens",
|
|
514
|
-
"tool_calls": "tool_use",
|
|
515
|
-
}
|
|
516
|
-
yield {
|
|
517
|
-
"type": "message_delta",
|
|
518
|
-
"delta": {
|
|
519
|
-
"stop_reason": stop_reason_map.get(finish_reason or "stop", "end_turn"),
|
|
520
|
-
},
|
|
521
|
-
"usage": {
|
|
522
|
-
"input_tokens": input_tokens,
|
|
523
|
-
"output_tokens": output_tokens,
|
|
524
|
-
"cache_creation_input_tokens": getattr(chunk.usage, "cache_creation_input_tokens", None),
|
|
525
|
-
"cache_read_input_tokens": getattr(chunk.usage, "cache_read_input_tokens", None),
|
|
526
|
-
},
|
|
527
|
-
}
|
|
528
|
-
continue
|
|
529
|
-
|
|
530
|
-
choice = chunk.choices[0]
|
|
531
|
-
delta = choice.delta
|
|
532
|
-
|
|
533
|
-
# Track finish reason
|
|
534
|
-
if choice.finish_reason:
|
|
535
|
-
finish_reason = choice.finish_reason
|
|
536
|
-
continue
|
|
537
|
-
|
|
538
|
-
# Handle reasoning content (thinking)
|
|
539
|
-
if delta.reasoning_content:
|
|
540
|
-
reasoning = delta.reasoning_content
|
|
541
|
-
# Start thinking content block if not already started
|
|
542
|
-
if not content_block_started or content_block_index == 0:
|
|
543
|
-
# We need a separate index for thinking block
|
|
544
|
-
if content_block_started:
|
|
545
|
-
# Close previous block
|
|
546
|
-
yield {
|
|
547
|
-
"type": "content_block_stop",
|
|
548
|
-
"index": content_block_index,
|
|
549
|
-
}
|
|
550
|
-
content_block_index += 1
|
|
551
|
-
yield {
|
|
552
|
-
"type": "content_block_start",
|
|
553
|
-
"index": content_block_index,
|
|
554
|
-
"content_block": {"type": "thinking", "thinking": ""},
|
|
555
|
-
}
|
|
556
|
-
content_block_started = True
|
|
557
|
-
|
|
558
|
-
yield {
|
|
559
|
-
"type": "content_block_delta",
|
|
560
|
-
"index": content_block_index,
|
|
561
|
-
"delta": {"type": "thinking_delta", "thinking": reasoning},
|
|
562
|
-
}
|
|
563
|
-
continue
|
|
564
|
-
|
|
565
|
-
# Handle content
|
|
566
|
-
if delta.content:
|
|
567
|
-
if not content_block_started:
|
|
568
|
-
# Start text content block
|
|
569
|
-
yield {
|
|
570
|
-
"type": "content_block_start",
|
|
571
|
-
"index": content_block_index,
|
|
572
|
-
"content_block": {"type": "text", "text": ""},
|
|
573
|
-
}
|
|
574
|
-
content_block_started = True
|
|
575
|
-
|
|
576
|
-
if delta.content:
|
|
577
|
-
yield {
|
|
578
|
-
"type": "content_block_delta",
|
|
579
|
-
"index": content_block_index,
|
|
580
|
-
"delta": {"type": "text_delta", "text": delta.content},
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
# Handle tool calls
|
|
584
|
-
if delta.tool_calls:
|
|
585
|
-
tool_call = delta.tool_calls[0]
|
|
586
|
-
|
|
587
|
-
if tool_call.id:
|
|
588
|
-
# Close previous content block if any
|
|
589
|
-
if content_block_started:
|
|
590
|
-
yield {
|
|
591
|
-
"type": "content_block_stop",
|
|
592
|
-
"index": content_block_index,
|
|
593
|
-
}
|
|
594
|
-
content_block_started = False
|
|
595
|
-
content_block_index += 1
|
|
596
|
-
|
|
597
|
-
# Start new tool_use block
|
|
598
|
-
current_tool_call = {
|
|
599
|
-
"id": tool_call.id,
|
|
600
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
601
|
-
"arguments": "",
|
|
602
|
-
}
|
|
603
|
-
yield {
|
|
604
|
-
"type": "content_block_start",
|
|
605
|
-
"index": content_block_index,
|
|
606
|
-
"content_block": {
|
|
607
|
-
"type": "tool_use",
|
|
608
|
-
"id": tool_call.id,
|
|
609
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
610
|
-
"input": {},
|
|
611
|
-
},
|
|
612
|
-
}
|
|
613
|
-
content_block_started = True
|
|
614
|
-
|
|
615
|
-
elif tool_call.function and tool_call.function.arguments:
|
|
616
|
-
# Continue tool call arguments
|
|
617
|
-
args = tool_call.function.arguments
|
|
618
|
-
current_tool_call["arguments"] += args
|
|
619
|
-
yield {
|
|
620
|
-
"type": "content_block_delta",
|
|
621
|
-
"index": content_block_index,
|
|
622
|
-
"delta": {"type": "input_json_delta", "partial_json": args},
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
# Close final content block
|
|
626
|
-
if content_block_started:
|
|
627
|
-
yield {
|
|
628
|
-
"type": "content_block_stop",
|
|
629
|
-
"index": content_block_index,
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
# Message stop event
|
|
633
|
-
yield {"type": "message_stop"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""
|
|
3
|
+
OpenAI API type definitions for compatibility with vLLM/SGLang responses.
|
|
4
|
+
|
|
5
|
+
This module defines Pydantic models compatible with OpenAI API responses,
|
|
6
|
+
these models support additional fields like `reasoning_content` that are
|
|
7
|
+
returned by vLLM/SGLang but not present in the official OpenAI SDK.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Literal, Optional, TypedDict
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# TypedDict types for parameters (used as dict in code)
|
|
16
|
+
class ChatCompletionToolFunction(TypedDict):
|
|
17
|
+
"""Function definition for a tool."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
description: str
|
|
21
|
+
parameters: dict[str, Any]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ChatCompletionToolParam(TypedDict):
|
|
25
|
+
"""Tool parameter for chat completion."""
|
|
26
|
+
|
|
27
|
+
type: Literal["function"]
|
|
28
|
+
function: ChatCompletionToolFunction
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CompletionCreateParams(TypedDict, total=False):
|
|
32
|
+
"""Parameters for creating a chat completion."""
|
|
33
|
+
|
|
34
|
+
model: str
|
|
35
|
+
messages: list[dict[str, Any]]
|
|
36
|
+
max_tokens: int
|
|
37
|
+
temperature: float
|
|
38
|
+
top_p: float
|
|
39
|
+
top_k: int
|
|
40
|
+
stream: bool
|
|
41
|
+
stop: list[str]
|
|
42
|
+
tools: list[ChatCompletionToolParam]
|
|
43
|
+
tool_choice: str | dict[str, Any]
|
|
44
|
+
stream_options: dict[str, Any]
|
|
45
|
+
# Additional fields for vLLM/SGLang compatibility
|
|
46
|
+
chat_template_kwargs: dict[str, Any]
|
|
47
|
+
# Internal field for server tools config
|
|
48
|
+
_server_tools_config: dict[str, dict[str, Any]]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Pydantic models for API responses
|
|
52
|
+
class Function(BaseModel):
|
|
53
|
+
"""A function call."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
arguments: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ChatCompletionMessageToolCall(BaseModel):
|
|
60
|
+
"""A tool call in a chat completion message."""
|
|
61
|
+
|
|
62
|
+
id: str
|
|
63
|
+
type: str = "function"
|
|
64
|
+
function: Function
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChatCompletionMessage(BaseModel):
|
|
68
|
+
"""A chat completion message."""
|
|
69
|
+
|
|
70
|
+
role: str
|
|
71
|
+
content: Optional[str] = None
|
|
72
|
+
tool_calls: Optional[list[ChatCompletionMessageToolCall]] = None
|
|
73
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
74
|
+
reasoning_content: Optional[str] = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Choice(BaseModel):
|
|
78
|
+
"""A choice in a chat completion response."""
|
|
79
|
+
|
|
80
|
+
index: int = 0
|
|
81
|
+
message: ChatCompletionMessage
|
|
82
|
+
finish_reason: Optional[str] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class FunctionDelta(BaseModel):
|
|
86
|
+
"""A function call delta."""
|
|
87
|
+
|
|
88
|
+
name: Optional[str] = None
|
|
89
|
+
arguments: Optional[str] = None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ChatCompletionDeltaToolCall(BaseModel):
|
|
93
|
+
"""A tool call delta in a streaming response."""
|
|
94
|
+
|
|
95
|
+
index: int = 0
|
|
96
|
+
id: Optional[str] = None
|
|
97
|
+
type: Optional[str] = None
|
|
98
|
+
function: Optional[FunctionDelta] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ChoiceDelta(BaseModel):
|
|
102
|
+
"""A delta in a streaming chat completion response."""
|
|
103
|
+
|
|
104
|
+
role: Optional[str] = None
|
|
105
|
+
content: Optional[str] = None
|
|
106
|
+
tool_calls: Optional[list[ChatCompletionDeltaToolCall]] = None
|
|
107
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
108
|
+
reasoning_content: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class StreamingChoice(BaseModel):
|
|
112
|
+
"""A choice in a streaming chat completion response."""
|
|
113
|
+
|
|
114
|
+
index: int = 0
|
|
115
|
+
delta: ChoiceDelta
|
|
116
|
+
finish_reason: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CompletionUsage(BaseModel):
|
|
120
|
+
"""Usage statistics for a completion request."""
|
|
121
|
+
|
|
122
|
+
prompt_tokens: int
|
|
123
|
+
completion_tokens: int
|
|
124
|
+
total_tokens: int
|
|
125
|
+
# Optional cache-related fields
|
|
126
|
+
cache_creation_input_tokens: Optional[int] = None
|
|
127
|
+
cache_read_input_tokens: Optional[int] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ChatCompletion(BaseModel):
|
|
131
|
+
"""A chat completion response."""
|
|
132
|
+
|
|
133
|
+
id: str
|
|
134
|
+
object: str = "chat.completion"
|
|
135
|
+
created: int
|
|
136
|
+
model: str
|
|
137
|
+
choices: list[Choice]
|
|
138
|
+
usage: Optional[CompletionUsage] = None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ChatCompletionChunk(BaseModel):
|
|
142
|
+
"""A chunk in a streaming chat completion response."""
|
|
143
|
+
|
|
144
|
+
id: str
|
|
145
|
+
object: str = "chat.completion.chunk"
|
|
146
|
+
created: int
|
|
147
|
+
model: str
|
|
148
|
+
choices: list[StreamingChoice]
|
|
149
|
+
usage: Optional[CompletionUsage] = None
|
local_openai2anthropic/router.py
CHANGED
|
@@ -82,6 +82,7 @@ async def _stream_response(
|
|
|
82
82
|
first_chunk = True
|
|
83
83
|
content_block_started = False
|
|
84
84
|
content_block_index = 0
|
|
85
|
+
current_block_type = None # 'thinking', 'text', or 'tool_use'
|
|
85
86
|
finish_reason = None
|
|
86
87
|
input_tokens = 0
|
|
87
88
|
output_tokens = 0
|
|
@@ -97,13 +98,14 @@ async def _stream_response(
|
|
|
97
98
|
|
|
98
99
|
try:
|
|
99
100
|
chunk = json.loads(data)
|
|
101
|
+
logger.debug(f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}")
|
|
100
102
|
except json.JSONDecodeError:
|
|
101
103
|
continue
|
|
102
104
|
|
|
103
105
|
# First chunk: message_start
|
|
104
106
|
if first_chunk:
|
|
105
107
|
message_id = chunk.get("id", "")
|
|
106
|
-
usage = chunk.get("usage"
|
|
108
|
+
usage = chunk.get("usage") or {}
|
|
107
109
|
input_tokens = usage.get("prompt_tokens", 0)
|
|
108
110
|
|
|
109
111
|
start_event = {
|
|
@@ -124,54 +126,70 @@ async def _stream_response(
|
|
|
124
126
|
},
|
|
125
127
|
},
|
|
126
128
|
}
|
|
129
|
+
logger.debug(f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}")
|
|
127
130
|
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
128
131
|
first_chunk = False
|
|
129
132
|
continue
|
|
130
133
|
|
|
131
134
|
# Handle usage-only chunks
|
|
132
135
|
if not chunk.get("choices"):
|
|
133
|
-
usage = chunk.get("usage"
|
|
136
|
+
usage = chunk.get("usage") or {}
|
|
134
137
|
if usage:
|
|
135
138
|
if content_block_started:
|
|
136
139
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
137
140
|
content_block_started = False
|
|
138
141
|
|
|
139
142
|
stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
|
|
140
|
-
|
|
143
|
+
delta_event = {'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}}
|
|
144
|
+
logger.debug(f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}")
|
|
145
|
+
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
141
146
|
continue
|
|
142
147
|
|
|
143
148
|
choice = chunk["choices"][0]
|
|
144
149
|
delta = choice.get("delta", {})
|
|
145
150
|
|
|
146
|
-
# Track finish reason
|
|
151
|
+
# Track finish reason (but don't skip - content may also be present)
|
|
147
152
|
if choice.get("finish_reason"):
|
|
148
153
|
finish_reason = choice["finish_reason"]
|
|
149
|
-
continue
|
|
150
154
|
|
|
151
155
|
# Handle reasoning content (thinking)
|
|
152
156
|
if delta.get("reasoning_content"):
|
|
153
157
|
reasoning = delta["reasoning_content"]
|
|
154
158
|
# Start thinking content block if not already started
|
|
155
|
-
if not content_block_started or
|
|
156
|
-
#
|
|
157
|
-
# For simplicity, we treat thinking as a separate block before text
|
|
159
|
+
if not content_block_started or current_block_type != 'thinking':
|
|
160
|
+
# Close previous block if exists
|
|
158
161
|
if content_block_started:
|
|
159
|
-
|
|
160
|
-
|
|
162
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
163
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
164
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
161
165
|
content_block_index += 1
|
|
162
|
-
|
|
166
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'thinking', 'thinking': ''}}
|
|
167
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}")
|
|
168
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
163
169
|
content_block_started = True
|
|
170
|
+
current_block_type = 'thinking'
|
|
164
171
|
|
|
165
|
-
|
|
172
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'thinking_delta', 'thinking': reasoning}}
|
|
173
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
166
174
|
continue
|
|
167
175
|
|
|
168
176
|
# Handle content
|
|
169
177
|
if delta.get("content"):
|
|
170
|
-
if not content_block_started:
|
|
171
|
-
|
|
178
|
+
if not content_block_started or current_block_type != 'text':
|
|
179
|
+
# Close previous block if exists
|
|
180
|
+
if content_block_started:
|
|
181
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
182
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
183
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
184
|
+
content_block_index += 1
|
|
185
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}}
|
|
186
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}")
|
|
187
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
172
188
|
content_block_started = True
|
|
189
|
+
current_block_type = 'text'
|
|
173
190
|
|
|
174
|
-
|
|
191
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}}
|
|
192
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
175
193
|
|
|
176
194
|
# Handle tool calls
|
|
177
195
|
if delta.get("tool_calls"):
|
|
@@ -183,27 +201,34 @@ async def _stream_response(
|
|
|
183
201
|
content_block_started = False
|
|
184
202
|
content_block_index += 1
|
|
185
203
|
|
|
186
|
-
|
|
204
|
+
func = tool_call.get('function') or {}
|
|
205
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
187
206
|
content_block_started = True
|
|
207
|
+
current_block_type = 'tool_use'
|
|
188
208
|
|
|
189
|
-
elif tool_call.get(
|
|
190
|
-
args = tool_call
|
|
209
|
+
elif (tool_call.get('function') or {}).get("arguments"):
|
|
210
|
+
args = (tool_call.get('function') or {}).get("arguments", "")
|
|
191
211
|
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
192
212
|
|
|
193
213
|
# Close final content block
|
|
194
214
|
if content_block_started:
|
|
195
|
-
|
|
215
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
216
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
217
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
196
218
|
|
|
197
219
|
# Message stop
|
|
198
|
-
|
|
199
|
-
|
|
220
|
+
stop_event = {'type': 'message_stop'}
|
|
221
|
+
logger.debug(f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}")
|
|
222
|
+
yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
|
|
200
223
|
|
|
201
224
|
except Exception as e:
|
|
225
|
+
import traceback
|
|
226
|
+
error_msg = f"{str(e)}\n{traceback.format_exc()}"
|
|
227
|
+
logger.error(f"Stream error: {error_msg}")
|
|
202
228
|
error_event = AnthropicErrorResponse(
|
|
203
229
|
error=AnthropicError(type="internal_error", message=str(e))
|
|
204
230
|
)
|
|
205
231
|
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
206
|
-
yield "data: [DONE]\n\n"
|
|
207
232
|
|
|
208
233
|
|
|
209
234
|
async def _convert_result_to_stream(
|
|
@@ -272,6 +297,14 @@ async def _convert_result_to_stream(
|
|
|
272
297
|
|
|
273
298
|
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
274
299
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
300
|
+
|
|
301
|
+
elif block_type == "thinking":
|
|
302
|
+
# Handle thinking blocks (BetaThinkingBlock)
|
|
303
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
|
|
304
|
+
thinking_text = block.get("thinking", "")
|
|
305
|
+
if thinking_text:
|
|
306
|
+
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
|
|
307
|
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
275
308
|
|
|
276
309
|
# 3. message_delta with final usage
|
|
277
310
|
delta_event = {
|
|
@@ -289,7 +322,6 @@ async def _convert_result_to_stream(
|
|
|
289
322
|
|
|
290
323
|
# 4. message_stop
|
|
291
324
|
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
292
|
-
yield "data: [DONE]\n\n"
|
|
293
325
|
|
|
294
326
|
|
|
295
327
|
class ServerToolHandler:
|
|
@@ -579,7 +611,7 @@ async def create_message(
|
|
|
579
611
|
try:
|
|
580
612
|
body_bytes = await request.body()
|
|
581
613
|
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
582
|
-
logger.
|
|
614
|
+
logger.debug(f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}")
|
|
583
615
|
anthropic_params = body_json
|
|
584
616
|
except json.JSONDecodeError as e:
|
|
585
617
|
logger.error(f"Invalid JSON in request body: {e}")
|
|
@@ -636,6 +668,10 @@ async def create_message(
|
|
|
636
668
|
enabled_server_tools=enabled_server_tools if has_server_tools else None,
|
|
637
669
|
)
|
|
638
670
|
openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
|
|
671
|
+
|
|
672
|
+
# Log converted OpenAI request (remove internal fields)
|
|
673
|
+
log_params = {k: v for k, v in openai_params.items() if not k.startswith('_')}
|
|
674
|
+
logger.debug(f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}")
|
|
639
675
|
|
|
640
676
|
stream = openai_params.get("stream", False)
|
|
641
677
|
model = openai_params.get("model", "")
|
|
@@ -692,11 +728,16 @@ async def create_message(
|
|
|
692
728
|
)
|
|
693
729
|
|
|
694
730
|
openai_completion = response.json()
|
|
731
|
+
logger.debug(f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}")
|
|
732
|
+
|
|
695
733
|
from openai.types.chat import ChatCompletion
|
|
696
734
|
completion = ChatCompletion.model_validate(openai_completion)
|
|
697
735
|
anthropic_message = convert_openai_to_anthropic(completion, model)
|
|
736
|
+
|
|
737
|
+
anthropic_response = anthropic_message.model_dump()
|
|
738
|
+
logger.debug(f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}")
|
|
698
739
|
|
|
699
|
-
return JSONResponse(content=
|
|
740
|
+
return JSONResponse(content=anthropic_response)
|
|
700
741
|
|
|
701
742
|
except httpx.TimeoutException:
|
|
702
743
|
error_response = AnthropicErrorResponse(
|
|
@@ -745,6 +786,103 @@ async def list_models(
|
|
|
745
786
|
)
|
|
746
787
|
|
|
747
788
|
|
|
789
|
+
@router.post("/v1/messages/count_tokens")
|
|
790
|
+
async def count_tokens(
|
|
791
|
+
request: Request,
|
|
792
|
+
settings: Settings = Depends(get_request_settings),
|
|
793
|
+
) -> JSONResponse:
|
|
794
|
+
"""
|
|
795
|
+
Count tokens in messages without creating a message.
|
|
796
|
+
Uses tiktoken for local token counting.
|
|
797
|
+
"""
|
|
798
|
+
try:
|
|
799
|
+
body_bytes = await request.body()
|
|
800
|
+
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
801
|
+
logger.debug(f"[Count Tokens Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}")
|
|
802
|
+
except json.JSONDecodeError as e:
|
|
803
|
+
error_response = AnthropicErrorResponse(
|
|
804
|
+
error=AnthropicError(type="invalid_request_error", message=f"Invalid JSON: {e}")
|
|
805
|
+
)
|
|
806
|
+
return JSONResponse(status_code=422, content=error_response.model_dump())
|
|
807
|
+
except Exception as e:
|
|
808
|
+
error_response = AnthropicErrorResponse(
|
|
809
|
+
error=AnthropicError(type="invalid_request_error", message=str(e))
|
|
810
|
+
)
|
|
811
|
+
return JSONResponse(status_code=400, content=error_response.model_dump())
|
|
812
|
+
|
|
813
|
+
# Validate required fields
|
|
814
|
+
if not isinstance(body_json, dict):
|
|
815
|
+
error_response = AnthropicErrorResponse(
|
|
816
|
+
error=AnthropicError(type="invalid_request_error", message="Request body must be a JSON object")
|
|
817
|
+
)
|
|
818
|
+
return JSONResponse(status_code=422, content=error_response.model_dump())
|
|
819
|
+
|
|
820
|
+
messages = body_json.get("messages", [])
|
|
821
|
+
if not isinstance(messages, list):
|
|
822
|
+
error_response = AnthropicErrorResponse(
|
|
823
|
+
error=AnthropicError(type="invalid_request_error", message="messages must be a list")
|
|
824
|
+
)
|
|
825
|
+
return JSONResponse(status_code=422, content=error_response.model_dump())
|
|
826
|
+
|
|
827
|
+
model = body_json.get("model", "")
|
|
828
|
+
system = body_json.get("system")
|
|
829
|
+
tools = body_json.get("tools", [])
|
|
830
|
+
|
|
831
|
+
try:
|
|
832
|
+
# Use tiktoken for token counting
|
|
833
|
+
import tiktoken
|
|
834
|
+
|
|
835
|
+
# Map model names to tiktoken encoding
|
|
836
|
+
# Claude models don't have direct tiktoken encodings, so we use cl100k_base as approximation
|
|
837
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
838
|
+
|
|
839
|
+
total_tokens = 0
|
|
840
|
+
|
|
841
|
+
# Count system prompt tokens if present
|
|
842
|
+
if system:
|
|
843
|
+
if isinstance(system, str):
|
|
844
|
+
total_tokens += len(encoding.encode(system))
|
|
845
|
+
elif isinstance(system, list):
|
|
846
|
+
for block in system:
|
|
847
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
848
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
849
|
+
|
|
850
|
+
# Count message tokens
|
|
851
|
+
for msg in messages:
|
|
852
|
+
content = msg.get("content", "")
|
|
853
|
+
if isinstance(content, str):
|
|
854
|
+
total_tokens += len(encoding.encode(content))
|
|
855
|
+
elif isinstance(content, list):
|
|
856
|
+
for block in content:
|
|
857
|
+
if isinstance(block, dict):
|
|
858
|
+
if block.get("type") == "text":
|
|
859
|
+
total_tokens += len(encoding.encode(block.get("text", "")))
|
|
860
|
+
elif block.get("type") == "image":
|
|
861
|
+
# Images are typically counted as a fixed number of tokens
|
|
862
|
+
# This is an approximation
|
|
863
|
+
total_tokens += 85 # Standard approximation for images
|
|
864
|
+
|
|
865
|
+
# Count tool definitions tokens
|
|
866
|
+
if tools:
|
|
867
|
+
for tool in tools:
|
|
868
|
+
tool_def = tool if isinstance(tool, dict) else tool.model_dump()
|
|
869
|
+
# Rough approximation for tool definitions
|
|
870
|
+
total_tokens += len(encoding.encode(json.dumps(tool_def)))
|
|
871
|
+
|
|
872
|
+
logger.debug(f"[Count Tokens Response] input_tokens: {total_tokens}")
|
|
873
|
+
|
|
874
|
+
return JSONResponse(content={
|
|
875
|
+
"input_tokens": total_tokens
|
|
876
|
+
})
|
|
877
|
+
|
|
878
|
+
except Exception as e:
|
|
879
|
+
logger.error(f"Token counting error: {e}")
|
|
880
|
+
error_response = AnthropicErrorResponse(
|
|
881
|
+
error=AnthropicError(type="internal_error", message=f"Failed to count tokens: {str(e)}")
|
|
882
|
+
)
|
|
883
|
+
return JSONResponse(status_code=500, content=error_response.model_dump())
|
|
884
|
+
|
|
885
|
+
|
|
748
886
|
@router.get("/health")
|
|
749
887
|
async def health_check() -> dict[str, str]:
|
|
750
888
|
"""Health check endpoint."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: local-openai2anthropic
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
|
|
5
5
|
Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
|
|
6
6
|
Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
local_openai2anthropic/__init__.py,sha256=
|
|
1
|
+
local_openai2anthropic/__init__.py,sha256=xPWXxEgbns2l2aiZzW0BDbNBkjcfXK-Ee-2ukgjQPKc,1059
|
|
2
2
|
local_openai2anthropic/__main__.py,sha256=K21u5u7FN8-DbO67TT_XDF0neGqJeFrVNkteRauCRQk,179
|
|
3
|
-
local_openai2anthropic/config.py,sha256=
|
|
4
|
-
local_openai2anthropic/converter.py,sha256=
|
|
3
|
+
local_openai2anthropic/config.py,sha256=bnM7p5htd6rHgLn7Z0Ukmm2jVImLuVjIB5Cnfpf2ClY,1918
|
|
4
|
+
local_openai2anthropic/converter.py,sha256=qp0LPJBTP0uAb_5l9VINZ03RAjmumxdquP6JqWXiZkQ,15779
|
|
5
5
|
local_openai2anthropic/daemon.py,sha256=pZnRojGFcuIpR8yLDNjV-b0LJRBVhgRAa-dKeRRse44,10017
|
|
6
6
|
local_openai2anthropic/daemon_runner.py,sha256=rguOH0PgpbjqNsKYei0uCQX8JQOQ1wmtQH1CtW95Dbw,3274
|
|
7
7
|
local_openai2anthropic/main.py,sha256=5tdgPel8RSCn1iK0d7hYAmcTM9vYHlepgQujaEXA2ic,9866
|
|
8
|
+
local_openai2anthropic/openai_types.py,sha256=jFdCvLwtXYoo5gGRqOhbHQcVaxcsxNnCP_yFPIv7rG4,3823
|
|
8
9
|
local_openai2anthropic/protocol.py,sha256=vUEgxtRPFll6jEtLc4DyxTLCBjrWIEScZXhEqe4uibk,5185
|
|
9
|
-
local_openai2anthropic/router.py,sha256=
|
|
10
|
+
local_openai2anthropic/router.py,sha256=KDIsckdQLx78z5rmVX8Zhr5zWO9m_qB-BjQbTwWjj0s,40224
|
|
10
11
|
local_openai2anthropic/tavily_client.py,sha256=QsBhnyF8BFWPAxB4XtWCCpHCquNL5SW93-zjTTi4Meg,3774
|
|
11
12
|
local_openai2anthropic/server_tools/__init__.py,sha256=QlJfjEta-HOCtLe7NaY_fpbEKv-ZpInjAnfmSqE9tbk,615
|
|
12
13
|
local_openai2anthropic/server_tools/base.py,sha256=pNFsv-jSgxVrkY004AHAcYMNZgVSO8ZOeCzQBUtQ3vU,5633
|
|
13
14
|
local_openai2anthropic/server_tools/web_search.py,sha256=1C7lX_cm-tMaN3MsCjinEZYPJc_Hj4yAxYay9h8Zbvs,6543
|
|
14
|
-
local_openai2anthropic-0.2.
|
|
15
|
-
local_openai2anthropic-0.2.
|
|
16
|
-
local_openai2anthropic-0.2.
|
|
17
|
-
local_openai2anthropic-0.2.
|
|
18
|
-
local_openai2anthropic-0.2.
|
|
15
|
+
local_openai2anthropic-0.2.4.dist-info/METADATA,sha256=nWz75h6XmZzWk3BdkMhTZNT0xlUmUSNmx2jgyFONS10,10040
|
|
16
|
+
local_openai2anthropic-0.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
local_openai2anthropic-0.2.4.dist-info/entry_points.txt,sha256=hdc9tSJUNxyNLXcTYye5SuD2K0bEQhxBhGnWTFup6ZM,116
|
|
18
|
+
local_openai2anthropic-0.2.4.dist-info/licenses/LICENSE,sha256=X3_kZy3lJvd_xp8IeyUcIAO2Y367MXZc6aaRx8BYR_s,11369
|
|
19
|
+
local_openai2anthropic-0.2.4.dist-info/RECORD,,
|
|
File without changes
|
{local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{local_openai2anthropic-0.2.2.dist-info → local_openai2anthropic-0.2.4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|