local-openai2anthropic 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/.gitignore +1 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/PKG-INFO +1 -1
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/pyproject.toml +1 -1
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/__init__.py +1 -1
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/config.py +1 -1
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/converter.py +28 -220
- local_openai2anthropic-0.2.3/src/local_openai2anthropic/openai_types.py +149 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/router.py +66 -25
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/tests/test_converter.py +3 -3
- local_openai2anthropic-0.2.3/tests/test_upstream.sh +79 -0
- local_openai2anthropic-0.2.3/uv.lock +963 -0
- local_openai2anthropic-0.2.2/.env.example +0 -39
- local_openai2anthropic-0.2.2/debug_request.py +0 -38
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/.github/workflows/publish.yml +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/LICENSE +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/README.md +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/README_zh.md +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/basic_chat.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/streaming.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/thinking_mode.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/tool_calling.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/vision.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/examples/web_search.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/__main__.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/daemon.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/daemon_runner.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/main.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/protocol.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/__init__.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/base.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/server_tools/web_search.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/src/local_openai2anthropic/tavily_client.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/tests/__init__.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/tests/test_integration.py +0 -0
- {local_openai2anthropic-0.2.2 → local_openai2anthropic-0.2.3}/tests/test_router.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: local-openai2anthropic
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
|
|
5
5
|
Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
|
|
6
6
|
Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
|
|
@@ -5,23 +5,13 @@ Core conversion logic between Anthropic and OpenAI formats.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
import
|
|
9
|
-
from typing import Any, AsyncGenerator, Optional
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
8
|
+
from typing import Any, Optional
|
|
12
9
|
|
|
13
10
|
from anthropic.types import (
|
|
14
11
|
ContentBlock,
|
|
15
|
-
ContentBlockDeltaEvent,
|
|
16
|
-
ContentBlockStartEvent,
|
|
17
|
-
ContentBlockStopEvent,
|
|
18
12
|
Message,
|
|
19
|
-
MessageDeltaEvent,
|
|
20
13
|
MessageParam,
|
|
21
|
-
MessageStartEvent,
|
|
22
|
-
MessageStopEvent,
|
|
23
14
|
TextBlock,
|
|
24
|
-
TextDelta,
|
|
25
15
|
ToolUseBlock,
|
|
26
16
|
)
|
|
27
17
|
from anthropic.types.message_create_params import MessageCreateParams
|
|
@@ -175,11 +165,15 @@ def convert_anthropic_to_openai(
|
|
|
175
165
|
|
|
176
166
|
# Handle thinking parameter
|
|
177
167
|
# vLLM/SGLang use chat_template_kwargs.thinking to toggle thinking mode
|
|
168
|
+
# Some models use "thinking", others use "enable_thinking", so we include both
|
|
178
169
|
if thinking and isinstance(thinking, dict):
|
|
179
170
|
thinking_type = thinking.get("type")
|
|
180
171
|
if thinking_type == "enabled":
|
|
181
|
-
# Enable thinking mode for
|
|
182
|
-
params["chat_template_kwargs"] = {
|
|
172
|
+
# Enable thinking mode - include both variants for compatibility
|
|
173
|
+
params["chat_template_kwargs"] = {
|
|
174
|
+
"thinking": True,
|
|
175
|
+
"enable_thinking": True,
|
|
176
|
+
}
|
|
183
177
|
|
|
184
178
|
# Log if budget_tokens was provided but will be ignored
|
|
185
179
|
budget_tokens = thinking.get("budget_tokens")
|
|
@@ -191,10 +185,16 @@ def convert_anthropic_to_openai(
|
|
|
191
185
|
)
|
|
192
186
|
else:
|
|
193
187
|
# Default to disabled thinking mode if not explicitly enabled
|
|
194
|
-
params["chat_template_kwargs"] = {
|
|
188
|
+
params["chat_template_kwargs"] = {
|
|
189
|
+
"thinking": False,
|
|
190
|
+
"enable_thinking": False,
|
|
191
|
+
}
|
|
195
192
|
else:
|
|
196
193
|
# Default to disabled thinking mode when thinking is not provided
|
|
197
|
-
params["chat_template_kwargs"] = {
|
|
194
|
+
params["chat_template_kwargs"] = {
|
|
195
|
+
"thinking": False,
|
|
196
|
+
"enable_thinking": False,
|
|
197
|
+
}
|
|
198
198
|
|
|
199
199
|
# Store server tool configs for later use by router
|
|
200
200
|
if server_tools_config:
|
|
@@ -361,12 +361,25 @@ def convert_openai_to_anthropic(
|
|
|
361
361
|
Returns:
|
|
362
362
|
Anthropic Message response
|
|
363
363
|
"""
|
|
364
|
+
from anthropic.types.beta import BetaThinkingBlock
|
|
365
|
+
|
|
364
366
|
choice = completion.choices[0]
|
|
365
367
|
message = choice.message
|
|
366
368
|
|
|
367
369
|
# Convert content blocks
|
|
368
370
|
content: list[ContentBlock] = []
|
|
369
371
|
|
|
372
|
+
# Add reasoning content (thinking) first if present
|
|
373
|
+
reasoning_content = getattr(message, 'reasoning_content', None)
|
|
374
|
+
if reasoning_content:
|
|
375
|
+
content.append(
|
|
376
|
+
BetaThinkingBlock(
|
|
377
|
+
type="thinking",
|
|
378
|
+
thinking=reasoning_content,
|
|
379
|
+
signature="", # Signature not available from OpenAI format
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
370
383
|
# Add text content if present
|
|
371
384
|
if message.content:
|
|
372
385
|
if isinstance(message.content, str):
|
|
@@ -426,208 +439,3 @@ def convert_openai_to_anthropic(
|
|
|
426
439
|
}
|
|
427
440
|
|
|
428
441
|
return Message.model_validate(message_dict)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
async def convert_openai_stream_to_anthropic(
|
|
432
|
-
stream: AsyncGenerator[ChatCompletionChunk, None],
|
|
433
|
-
model: str,
|
|
434
|
-
enable_ping: bool = False,
|
|
435
|
-
ping_interval: float = 15.0,
|
|
436
|
-
) -> AsyncGenerator[dict, None]:
|
|
437
|
-
"""
|
|
438
|
-
Convert OpenAI streaming response to Anthropic streaming events.
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
stream: OpenAI chat completion stream
|
|
442
|
-
model: Model name
|
|
443
|
-
enable_ping: Whether to send periodic ping events
|
|
444
|
-
ping_interval: Interval between ping events in seconds
|
|
445
|
-
|
|
446
|
-
Yields:
|
|
447
|
-
Anthropic MessageStreamEvent objects as dicts
|
|
448
|
-
"""
|
|
449
|
-
message_id = f"msg_{int(time.time() * 1000)}"
|
|
450
|
-
first_chunk = True
|
|
451
|
-
content_block_started = False
|
|
452
|
-
content_block_index = 0
|
|
453
|
-
current_tool_call: Optional[dict[str, Any]] = None
|
|
454
|
-
finish_reason: Optional[str] = None
|
|
455
|
-
|
|
456
|
-
# Track usage for final message_delta
|
|
457
|
-
input_tokens = 0
|
|
458
|
-
output_tokens = 0
|
|
459
|
-
|
|
460
|
-
last_ping_time = time.time()
|
|
461
|
-
|
|
462
|
-
async for chunk in stream:
|
|
463
|
-
# Send ping events if enabled and interval has passed
|
|
464
|
-
if enable_ping:
|
|
465
|
-
current_time = time.time()
|
|
466
|
-
if current_time - last_ping_time >= ping_interval:
|
|
467
|
-
yield {"type": "ping"}
|
|
468
|
-
last_ping_time = current_time
|
|
469
|
-
|
|
470
|
-
# First chunk: message_start event
|
|
471
|
-
if first_chunk:
|
|
472
|
-
if chunk.usage:
|
|
473
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
474
|
-
output_tokens = chunk.usage.completion_tokens
|
|
475
|
-
|
|
476
|
-
yield {
|
|
477
|
-
"type": "message_start",
|
|
478
|
-
"message": {
|
|
479
|
-
"id": message_id,
|
|
480
|
-
"type": "message",
|
|
481
|
-
"role": "assistant",
|
|
482
|
-
"content": [],
|
|
483
|
-
"model": model,
|
|
484
|
-
"stop_reason": None,
|
|
485
|
-
"stop_sequence": None,
|
|
486
|
-
"usage": {
|
|
487
|
-
"input_tokens": input_tokens,
|
|
488
|
-
"output_tokens": 0,
|
|
489
|
-
"cache_creation_input_tokens": None,
|
|
490
|
-
"cache_read_input_tokens": None,
|
|
491
|
-
},
|
|
492
|
-
},
|
|
493
|
-
}
|
|
494
|
-
first_chunk = False
|
|
495
|
-
continue
|
|
496
|
-
|
|
497
|
-
# Handle usage-only chunks (last chunk)
|
|
498
|
-
if not chunk.choices:
|
|
499
|
-
if chunk.usage:
|
|
500
|
-
input_tokens = chunk.usage.prompt_tokens
|
|
501
|
-
output_tokens = chunk.usage.completion_tokens
|
|
502
|
-
|
|
503
|
-
# Close any open content block
|
|
504
|
-
if content_block_started:
|
|
505
|
-
yield {
|
|
506
|
-
"type": "content_block_stop",
|
|
507
|
-
"index": content_block_index,
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
# Message delta with final usage
|
|
511
|
-
stop_reason_map = {
|
|
512
|
-
"stop": "end_turn",
|
|
513
|
-
"length": "max_tokens",
|
|
514
|
-
"tool_calls": "tool_use",
|
|
515
|
-
}
|
|
516
|
-
yield {
|
|
517
|
-
"type": "message_delta",
|
|
518
|
-
"delta": {
|
|
519
|
-
"stop_reason": stop_reason_map.get(finish_reason or "stop", "end_turn"),
|
|
520
|
-
},
|
|
521
|
-
"usage": {
|
|
522
|
-
"input_tokens": input_tokens,
|
|
523
|
-
"output_tokens": output_tokens,
|
|
524
|
-
"cache_creation_input_tokens": getattr(chunk.usage, "cache_creation_input_tokens", None),
|
|
525
|
-
"cache_read_input_tokens": getattr(chunk.usage, "cache_read_input_tokens", None),
|
|
526
|
-
},
|
|
527
|
-
}
|
|
528
|
-
continue
|
|
529
|
-
|
|
530
|
-
choice = chunk.choices[0]
|
|
531
|
-
delta = choice.delta
|
|
532
|
-
|
|
533
|
-
# Track finish reason
|
|
534
|
-
if choice.finish_reason:
|
|
535
|
-
finish_reason = choice.finish_reason
|
|
536
|
-
continue
|
|
537
|
-
|
|
538
|
-
# Handle reasoning content (thinking)
|
|
539
|
-
if delta.reasoning_content:
|
|
540
|
-
reasoning = delta.reasoning_content
|
|
541
|
-
# Start thinking content block if not already started
|
|
542
|
-
if not content_block_started or content_block_index == 0:
|
|
543
|
-
# We need a separate index for thinking block
|
|
544
|
-
if content_block_started:
|
|
545
|
-
# Close previous block
|
|
546
|
-
yield {
|
|
547
|
-
"type": "content_block_stop",
|
|
548
|
-
"index": content_block_index,
|
|
549
|
-
}
|
|
550
|
-
content_block_index += 1
|
|
551
|
-
yield {
|
|
552
|
-
"type": "content_block_start",
|
|
553
|
-
"index": content_block_index,
|
|
554
|
-
"content_block": {"type": "thinking", "thinking": ""},
|
|
555
|
-
}
|
|
556
|
-
content_block_started = True
|
|
557
|
-
|
|
558
|
-
yield {
|
|
559
|
-
"type": "content_block_delta",
|
|
560
|
-
"index": content_block_index,
|
|
561
|
-
"delta": {"type": "thinking_delta", "thinking": reasoning},
|
|
562
|
-
}
|
|
563
|
-
continue
|
|
564
|
-
|
|
565
|
-
# Handle content
|
|
566
|
-
if delta.content:
|
|
567
|
-
if not content_block_started:
|
|
568
|
-
# Start text content block
|
|
569
|
-
yield {
|
|
570
|
-
"type": "content_block_start",
|
|
571
|
-
"index": content_block_index,
|
|
572
|
-
"content_block": {"type": "text", "text": ""},
|
|
573
|
-
}
|
|
574
|
-
content_block_started = True
|
|
575
|
-
|
|
576
|
-
if delta.content:
|
|
577
|
-
yield {
|
|
578
|
-
"type": "content_block_delta",
|
|
579
|
-
"index": content_block_index,
|
|
580
|
-
"delta": {"type": "text_delta", "text": delta.content},
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
# Handle tool calls
|
|
584
|
-
if delta.tool_calls:
|
|
585
|
-
tool_call = delta.tool_calls[0]
|
|
586
|
-
|
|
587
|
-
if tool_call.id:
|
|
588
|
-
# Close previous content block if any
|
|
589
|
-
if content_block_started:
|
|
590
|
-
yield {
|
|
591
|
-
"type": "content_block_stop",
|
|
592
|
-
"index": content_block_index,
|
|
593
|
-
}
|
|
594
|
-
content_block_started = False
|
|
595
|
-
content_block_index += 1
|
|
596
|
-
|
|
597
|
-
# Start new tool_use block
|
|
598
|
-
current_tool_call = {
|
|
599
|
-
"id": tool_call.id,
|
|
600
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
601
|
-
"arguments": "",
|
|
602
|
-
}
|
|
603
|
-
yield {
|
|
604
|
-
"type": "content_block_start",
|
|
605
|
-
"index": content_block_index,
|
|
606
|
-
"content_block": {
|
|
607
|
-
"type": "tool_use",
|
|
608
|
-
"id": tool_call.id,
|
|
609
|
-
"name": tool_call.function.name if tool_call.function else "",
|
|
610
|
-
"input": {},
|
|
611
|
-
},
|
|
612
|
-
}
|
|
613
|
-
content_block_started = True
|
|
614
|
-
|
|
615
|
-
elif tool_call.function and tool_call.function.arguments:
|
|
616
|
-
# Continue tool call arguments
|
|
617
|
-
args = tool_call.function.arguments
|
|
618
|
-
current_tool_call["arguments"] += args
|
|
619
|
-
yield {
|
|
620
|
-
"type": "content_block_delta",
|
|
621
|
-
"index": content_block_index,
|
|
622
|
-
"delta": {"type": "input_json_delta", "partial_json": args},
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
# Close final content block
|
|
626
|
-
if content_block_started:
|
|
627
|
-
yield {
|
|
628
|
-
"type": "content_block_stop",
|
|
629
|
-
"index": content_block_index,
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
# Message stop event
|
|
633
|
-
yield {"type": "message_stop"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""
|
|
3
|
+
OpenAI API type definitions for compatibility with vLLM/SGLang responses.
|
|
4
|
+
|
|
5
|
+
This module defines Pydantic models compatible with OpenAI API responses,
|
|
6
|
+
these models support additional fields like `reasoning_content` that are
|
|
7
|
+
returned by vLLM/SGLang but not present in the official OpenAI SDK.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Literal, Optional, TypedDict
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# TypedDict types for parameters (used as dict in code)
|
|
16
|
+
class ChatCompletionToolFunction(TypedDict):
|
|
17
|
+
"""Function definition for a tool."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
description: str
|
|
21
|
+
parameters: dict[str, Any]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ChatCompletionToolParam(TypedDict):
|
|
25
|
+
"""Tool parameter for chat completion."""
|
|
26
|
+
|
|
27
|
+
type: Literal["function"]
|
|
28
|
+
function: ChatCompletionToolFunction
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CompletionCreateParams(TypedDict, total=False):
|
|
32
|
+
"""Parameters for creating a chat completion."""
|
|
33
|
+
|
|
34
|
+
model: str
|
|
35
|
+
messages: list[dict[str, Any]]
|
|
36
|
+
max_tokens: int
|
|
37
|
+
temperature: float
|
|
38
|
+
top_p: float
|
|
39
|
+
top_k: int
|
|
40
|
+
stream: bool
|
|
41
|
+
stop: list[str]
|
|
42
|
+
tools: list[ChatCompletionToolParam]
|
|
43
|
+
tool_choice: str | dict[str, Any]
|
|
44
|
+
stream_options: dict[str, Any]
|
|
45
|
+
# Additional fields for vLLM/SGLang compatibility
|
|
46
|
+
chat_template_kwargs: dict[str, Any]
|
|
47
|
+
# Internal field for server tools config
|
|
48
|
+
_server_tools_config: dict[str, dict[str, Any]]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Pydantic models for API responses
|
|
52
|
+
class Function(BaseModel):
|
|
53
|
+
"""A function call."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
arguments: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ChatCompletionMessageToolCall(BaseModel):
|
|
60
|
+
"""A tool call in a chat completion message."""
|
|
61
|
+
|
|
62
|
+
id: str
|
|
63
|
+
type: str = "function"
|
|
64
|
+
function: Function
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChatCompletionMessage(BaseModel):
|
|
68
|
+
"""A chat completion message."""
|
|
69
|
+
|
|
70
|
+
role: str
|
|
71
|
+
content: Optional[str] = None
|
|
72
|
+
tool_calls: Optional[list[ChatCompletionMessageToolCall]] = None
|
|
73
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
74
|
+
reasoning_content: Optional[str] = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Choice(BaseModel):
|
|
78
|
+
"""A choice in a chat completion response."""
|
|
79
|
+
|
|
80
|
+
index: int = 0
|
|
81
|
+
message: ChatCompletionMessage
|
|
82
|
+
finish_reason: Optional[str] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class FunctionDelta(BaseModel):
|
|
86
|
+
"""A function call delta."""
|
|
87
|
+
|
|
88
|
+
name: Optional[str] = None
|
|
89
|
+
arguments: Optional[str] = None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ChatCompletionDeltaToolCall(BaseModel):
|
|
93
|
+
"""A tool call delta in a streaming response."""
|
|
94
|
+
|
|
95
|
+
index: int = 0
|
|
96
|
+
id: Optional[str] = None
|
|
97
|
+
type: Optional[str] = None
|
|
98
|
+
function: Optional[FunctionDelta] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ChoiceDelta(BaseModel):
|
|
102
|
+
"""A delta in a streaming chat completion response."""
|
|
103
|
+
|
|
104
|
+
role: Optional[str] = None
|
|
105
|
+
content: Optional[str] = None
|
|
106
|
+
tool_calls: Optional[list[ChatCompletionDeltaToolCall]] = None
|
|
107
|
+
# Additional field for reasoning content (thinking) from vLLM/SGLang
|
|
108
|
+
reasoning_content: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class StreamingChoice(BaseModel):
|
|
112
|
+
"""A choice in a streaming chat completion response."""
|
|
113
|
+
|
|
114
|
+
index: int = 0
|
|
115
|
+
delta: ChoiceDelta
|
|
116
|
+
finish_reason: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CompletionUsage(BaseModel):
|
|
120
|
+
"""Usage statistics for a completion request."""
|
|
121
|
+
|
|
122
|
+
prompt_tokens: int
|
|
123
|
+
completion_tokens: int
|
|
124
|
+
total_tokens: int
|
|
125
|
+
# Optional cache-related fields
|
|
126
|
+
cache_creation_input_tokens: Optional[int] = None
|
|
127
|
+
cache_read_input_tokens: Optional[int] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ChatCompletion(BaseModel):
|
|
131
|
+
"""A chat completion response."""
|
|
132
|
+
|
|
133
|
+
id: str
|
|
134
|
+
object: str = "chat.completion"
|
|
135
|
+
created: int
|
|
136
|
+
model: str
|
|
137
|
+
choices: list[Choice]
|
|
138
|
+
usage: Optional[CompletionUsage] = None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ChatCompletionChunk(BaseModel):
|
|
142
|
+
"""A chunk in a streaming chat completion response."""
|
|
143
|
+
|
|
144
|
+
id: str
|
|
145
|
+
object: str = "chat.completion.chunk"
|
|
146
|
+
created: int
|
|
147
|
+
model: str
|
|
148
|
+
choices: list[StreamingChoice]
|
|
149
|
+
usage: Optional[CompletionUsage] = None
|