letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/helpers.py +4 -0
- letta/agents/letta_agent.py +142 -5
- letta/constants.py +10 -7
- letta/data_sources/connectors.py +70 -53
- letta/embeddings.py +3 -240
- letta/errors.py +28 -0
- letta/functions/function_sets/base.py +4 -4
- letta/functions/functions.py +287 -32
- letta/functions/mcp_client/types.py +11 -0
- letta/functions/schema_validator.py +187 -0
- letta/functions/typescript_parser.py +196 -0
- letta/helpers/datetime_helpers.py +8 -4
- letta/helpers/tool_execution_helper.py +25 -2
- letta/llm_api/anthropic_client.py +23 -18
- letta/llm_api/azure_client.py +73 -0
- letta/llm_api/bedrock_client.py +8 -4
- letta/llm_api/google_vertex_client.py +14 -5
- letta/llm_api/llm_api_tools.py +2 -217
- letta/llm_api/llm_client.py +15 -1
- letta/llm_api/llm_client_base.py +32 -1
- letta/llm_api/openai.py +1 -0
- letta/llm_api/openai_client.py +18 -28
- letta/llm_api/together_client.py +55 -0
- letta/orm/provider.py +1 -0
- letta/orm/step_metrics.py +40 -1
- letta/otel/db_pool_monitoring.py +1 -1
- letta/schemas/agent.py +3 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -5
- letta/schemas/embedding_config.py +4 -5
- letta/schemas/enums.py +1 -1
- letta/schemas/job.py +2 -3
- letta/schemas/llm_config.py +79 -7
- letta/schemas/mcp.py +0 -24
- letta/schemas/message.py +0 -108
- letta/schemas/openai/chat_completion_request.py +1 -0
- letta/schemas/providers/__init__.py +0 -2
- letta/schemas/providers/anthropic.py +106 -8
- letta/schemas/providers/azure.py +102 -8
- letta/schemas/providers/base.py +10 -3
- letta/schemas/providers/bedrock.py +28 -16
- letta/schemas/providers/letta.py +3 -3
- letta/schemas/providers/ollama.py +2 -12
- letta/schemas/providers/openai.py +4 -4
- letta/schemas/providers/together.py +14 -2
- letta/schemas/sandbox_config.py +2 -1
- letta/schemas/tool.py +46 -22
- letta/server/rest_api/routers/v1/agents.py +179 -38
- letta/server/rest_api/routers/v1/folders.py +13 -8
- letta/server/rest_api/routers/v1/providers.py +10 -3
- letta/server/rest_api/routers/v1/sources.py +14 -8
- letta/server/rest_api/routers/v1/steps.py +17 -1
- letta/server/rest_api/routers/v1/tools.py +96 -5
- letta/server/rest_api/streaming_response.py +91 -45
- letta/server/server.py +27 -38
- letta/services/agent_manager.py +92 -20
- letta/services/agent_serialization_manager.py +11 -7
- letta/services/context_window_calculator/context_window_calculator.py +40 -2
- letta/services/helpers/agent_manager_helper.py +73 -12
- letta/services/mcp_manager.py +109 -15
- letta/services/passage_manager.py +28 -109
- letta/services/provider_manager.py +24 -0
- letta/services/step_manager.py +68 -0
- letta/services/summarizer/summarizer.py +1 -4
- letta/services/tool_executor/core_tool_executor.py +1 -1
- letta/services/tool_executor/sandbox_tool_executor.py +26 -9
- letta/services/tool_manager.py +82 -5
- letta/services/tool_sandbox/base.py +3 -11
- letta/services/tool_sandbox/modal_constants.py +17 -0
- letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
- letta/services/tool_sandbox/modal_sandbox.py +218 -3
- letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
- letta/services/tool_sandbox/modal_version_manager.py +273 -0
- letta/services/tool_sandbox/safe_pickle.py +193 -0
- letta/settings.py +5 -3
- letta/templates/sandbox_code_file.py.j2 +2 -4
- letta/templates/sandbox_code_file_async.py.j2 +2 -4
- letta/utils.py +1 -1
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
- letta/llm_api/anthropic.py +0 -1206
- letta/llm_api/aws_bedrock.py +0 -104
- letta/llm_api/azure_openai.py +0 -118
- letta/llm_api/azure_openai_constants.py +0 -11
- letta/llm_api/cohere.py +0 -391
- letta/schemas/providers/cohere.py +0 -18
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
letta/llm_api/anthropic.py
DELETED
@@ -1,1206 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import re
|
3
|
-
import time
|
4
|
-
import warnings
|
5
|
-
from typing import Generator, List, Optional, Union
|
6
|
-
|
7
|
-
import anthropic
|
8
|
-
from anthropic import PermissionDeniedError
|
9
|
-
from anthropic.types.beta import (
|
10
|
-
BetaRawContentBlockDeltaEvent,
|
11
|
-
BetaRawContentBlockStartEvent,
|
12
|
-
BetaRawContentBlockStopEvent,
|
13
|
-
BetaRawMessageDeltaEvent,
|
14
|
-
BetaRawMessageStartEvent,
|
15
|
-
BetaRawMessageStopEvent,
|
16
|
-
BetaRedactedThinkingBlock,
|
17
|
-
BetaTextBlock,
|
18
|
-
BetaThinkingBlock,
|
19
|
-
BetaToolUseBlock,
|
20
|
-
)
|
21
|
-
|
22
|
-
from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
|
23
|
-
from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
|
24
|
-
from letta.llm_api.aws_bedrock import get_bedrock_client
|
25
|
-
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
26
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
|
-
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
|
-
from letta.log import get_logger
|
29
|
-
from letta.otel.tracing import log_event
|
30
|
-
from letta.schemas.enums import ProviderCategory
|
31
|
-
from letta.schemas.message import Message as _Message
|
32
|
-
from letta.schemas.message import MessageRole as _MessageRole
|
33
|
-
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
34
|
-
from letta.schemas.openai.chat_completion_response import (
|
35
|
-
ChatCompletionChunkResponse,
|
36
|
-
ChatCompletionResponse,
|
37
|
-
Choice,
|
38
|
-
ChunkChoice,
|
39
|
-
FunctionCall,
|
40
|
-
FunctionCallDelta,
|
41
|
-
)
|
42
|
-
from letta.schemas.openai.chat_completion_response import Message
|
43
|
-
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
44
|
-
from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
|
45
|
-
from letta.services.provider_manager import ProviderManager
|
46
|
-
from letta.services.user_manager import UserManager
|
47
|
-
from letta.settings import model_settings
|
48
|
-
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
49
|
-
|
50
|
-
logger = get_logger(__name__)
|
51
|
-
|
52
|
-
BASE_URL = "https://api.anthropic.com/v1"
|
53
|
-
|
54
|
-
|
55
|
-
# https://docs.anthropic.com/claude/docs/models-overview
|
56
|
-
# Sadly hardcoded
|
57
|
-
MODEL_LIST = [
|
58
|
-
## Opus 4.1
|
59
|
-
{
|
60
|
-
"name": "claude-opus-4-1-20250805",
|
61
|
-
"context_window": 200000,
|
62
|
-
},
|
63
|
-
## Opus 3
|
64
|
-
{
|
65
|
-
"name": "claude-3-opus-20240229",
|
66
|
-
"context_window": 200000,
|
67
|
-
},
|
68
|
-
# 3 latest
|
69
|
-
{
|
70
|
-
"name": "claude-3-opus-latest",
|
71
|
-
"context_window": 200000,
|
72
|
-
},
|
73
|
-
# 4
|
74
|
-
{
|
75
|
-
"name": "claude-opus-4-20250514",
|
76
|
-
"context_window": 200000,
|
77
|
-
},
|
78
|
-
## Sonnet
|
79
|
-
# 3.0
|
80
|
-
{
|
81
|
-
"name": "claude-3-sonnet-20240229",
|
82
|
-
"context_window": 200000,
|
83
|
-
},
|
84
|
-
# 3.5
|
85
|
-
{
|
86
|
-
"name": "claude-3-5-sonnet-20240620",
|
87
|
-
"context_window": 200000,
|
88
|
-
},
|
89
|
-
# 3.5 new
|
90
|
-
{
|
91
|
-
"name": "claude-3-5-sonnet-20241022",
|
92
|
-
"context_window": 200000,
|
93
|
-
},
|
94
|
-
# 3.5 latest
|
95
|
-
{
|
96
|
-
"name": "claude-3-5-sonnet-latest",
|
97
|
-
"context_window": 200000,
|
98
|
-
},
|
99
|
-
# 3.7
|
100
|
-
{
|
101
|
-
"name": "claude-3-7-sonnet-20250219",
|
102
|
-
"context_window": 200000,
|
103
|
-
},
|
104
|
-
# 3.7 latest
|
105
|
-
{
|
106
|
-
"name": "claude-3-7-sonnet-latest",
|
107
|
-
"context_window": 200000,
|
108
|
-
},
|
109
|
-
# 4
|
110
|
-
{
|
111
|
-
"name": "claude-sonnet-4-20250514",
|
112
|
-
"context_window": 200000,
|
113
|
-
},
|
114
|
-
## Haiku
|
115
|
-
# 3.0
|
116
|
-
{
|
117
|
-
"name": "claude-3-haiku-20240307",
|
118
|
-
"context_window": 200000,
|
119
|
-
},
|
120
|
-
# 3.5
|
121
|
-
{
|
122
|
-
"name": "claude-3-5-haiku-20241022",
|
123
|
-
"context_window": 200000,
|
124
|
-
},
|
125
|
-
# 3.5 latest
|
126
|
-
{
|
127
|
-
"name": "claude-3-5-haiku-latest",
|
128
|
-
"context_window": 200000,
|
129
|
-
},
|
130
|
-
]
|
131
|
-
|
132
|
-
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
133
|
-
|
134
|
-
VALID_EVENT_TYPES = {"content_block_stop", "message_stop"}
|
135
|
-
|
136
|
-
|
137
|
-
def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
|
138
|
-
if api_key:
|
139
|
-
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
140
|
-
try:
|
141
|
-
# just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
|
142
|
-
anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
|
143
|
-
except anthropic.AuthenticationError as e:
|
144
|
-
raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
|
145
|
-
except Exception as e:
|
146
|
-
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
147
|
-
else:
|
148
|
-
raise ValueError("No API key provided")
|
149
|
-
|
150
|
-
|
151
|
-
def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
|
152
|
-
for model_dict in anthropic_get_model_list(api_key=api_key):
|
153
|
-
if model_dict["name"] == model:
|
154
|
-
return model_dict["context_window"]
|
155
|
-
raise ValueError(f"Can't find model '{model}' in Anthropic model list")
|
156
|
-
|
157
|
-
|
158
|
-
def anthropic_get_model_list(api_key: Optional[str]) -> dict:
|
159
|
-
"""https://docs.anthropic.com/claude/docs/models-overview"""
|
160
|
-
|
161
|
-
# NOTE: currently there is no GET /models, so we need to hardcode
|
162
|
-
# return MODEL_LIST
|
163
|
-
|
164
|
-
if api_key:
|
165
|
-
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
166
|
-
elif model_settings.anthropic_api_key:
|
167
|
-
anthropic_client = anthropic.Anthropic()
|
168
|
-
else:
|
169
|
-
raise ValueError("No API key provided")
|
170
|
-
|
171
|
-
models = anthropic_client.models.list()
|
172
|
-
models_json = models.model_dump()
|
173
|
-
assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
|
174
|
-
return models_json["data"]
|
175
|
-
|
176
|
-
|
177
|
-
async def anthropic_get_model_list_async(api_key: Optional[str]) -> dict:
|
178
|
-
"""https://docs.anthropic.com/claude/docs/models-overview"""
|
179
|
-
|
180
|
-
# NOTE: currently there is no GET /models, so we need to hardcode
|
181
|
-
# return MODEL_LIST
|
182
|
-
|
183
|
-
if api_key:
|
184
|
-
anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
|
185
|
-
elif model_settings.anthropic_api_key:
|
186
|
-
anthropic_client = anthropic.AsyncAnthropic()
|
187
|
-
else:
|
188
|
-
raise ValueError("No API key provided")
|
189
|
-
|
190
|
-
models = await anthropic_client.models.list()
|
191
|
-
models_json = models.model_dump()
|
192
|
-
assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
|
193
|
-
return models_json["data"]
|
194
|
-
|
195
|
-
|
196
|
-
def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
197
|
-
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
198
|
-
|
199
|
-
OpenAI style:
|
200
|
-
"tools": [{
|
201
|
-
"type": "function",
|
202
|
-
"function": {
|
203
|
-
"name": "find_movies",
|
204
|
-
"description": "find ....",
|
205
|
-
"parameters": {
|
206
|
-
"type": "object",
|
207
|
-
"properties": {
|
208
|
-
PARAM: {
|
209
|
-
"type": PARAM_TYPE, # eg "string"
|
210
|
-
"description": PARAM_DESCRIPTION,
|
211
|
-
},
|
212
|
-
...
|
213
|
-
},
|
214
|
-
"required": List[str],
|
215
|
-
}
|
216
|
-
}
|
217
|
-
}
|
218
|
-
]
|
219
|
-
|
220
|
-
Anthropic style:
|
221
|
-
"tools": [{
|
222
|
-
"name": "find_movies",
|
223
|
-
"description": "find ....",
|
224
|
-
"input_schema": {
|
225
|
-
"type": "object",
|
226
|
-
"properties": {
|
227
|
-
PARAM: {
|
228
|
-
"type": PARAM_TYPE, # eg "string"
|
229
|
-
"description": PARAM_DESCRIPTION,
|
230
|
-
},
|
231
|
-
...
|
232
|
-
},
|
233
|
-
"required": List[str],
|
234
|
-
}
|
235
|
-
}
|
236
|
-
]
|
237
|
-
|
238
|
-
Two small differences:
|
239
|
-
- 1 level less of nesting
|
240
|
-
- "parameters" -> "input_schema"
|
241
|
-
"""
|
242
|
-
formatted_tools = []
|
243
|
-
for tool in tools:
|
244
|
-
formatted_tool = {
|
245
|
-
"name": tool.function.name,
|
246
|
-
"description": tool.function.description,
|
247
|
-
"input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
|
248
|
-
}
|
249
|
-
formatted_tools.append(formatted_tool)
|
250
|
-
|
251
|
-
return formatted_tools
|
252
|
-
|
253
|
-
|
254
|
-
def merge_tool_results_into_user_messages(messages: List[dict]):
|
255
|
-
"""Anthropic API doesn't allow role 'tool'->'user' sequences
|
256
|
-
|
257
|
-
Example HTTP error:
|
258
|
-
messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
|
259
|
-
|
260
|
-
From: https://docs.anthropic.com/claude/docs/tool-use
|
261
|
-
You may be familiar with other APIs that return tool use as separate from the model's primary output,
|
262
|
-
or which use a special-purpose tool or function message role.
|
263
|
-
In contrast, Anthropic's models and API are built around alternating user and assistant messages,
|
264
|
-
where each message is an array of rich content blocks: text, image, tool_use, and tool_result.
|
265
|
-
"""
|
266
|
-
|
267
|
-
# TODO walk through the messages list
|
268
|
-
# When a dict (dict_A) with 'role' == 'user' is followed by a dict with 'role' == 'user' (dict B), do the following
|
269
|
-
# dict_A["content"] = dict_A["content"] + dict_B["content"]
|
270
|
-
|
271
|
-
# The result should be a new merged_messages list that doesn't have any back-to-back dicts with 'role' == 'user'
|
272
|
-
merged_messages = []
|
273
|
-
if not messages:
|
274
|
-
return merged_messages
|
275
|
-
|
276
|
-
# Start with the first message in the list
|
277
|
-
current_message = messages[0]
|
278
|
-
|
279
|
-
for next_message in messages[1:]:
|
280
|
-
if current_message["role"] == "user" and next_message["role"] == "user":
|
281
|
-
# Merge contents of the next user message into current one
|
282
|
-
current_content = (
|
283
|
-
current_message["content"]
|
284
|
-
if isinstance(current_message["content"], list)
|
285
|
-
else [{"type": "text", "text": current_message["content"]}]
|
286
|
-
)
|
287
|
-
next_content = (
|
288
|
-
next_message["content"]
|
289
|
-
if isinstance(next_message["content"], list)
|
290
|
-
else [{"type": "text", "text": next_message["content"]}]
|
291
|
-
)
|
292
|
-
merged_content = current_content + next_content
|
293
|
-
current_message["content"] = merged_content
|
294
|
-
else:
|
295
|
-
# Append the current message to result as it's complete
|
296
|
-
merged_messages.append(current_message)
|
297
|
-
# Move on to the next message
|
298
|
-
current_message = next_message
|
299
|
-
|
300
|
-
# Append the last processed message to the result
|
301
|
-
merged_messages.append(current_message)
|
302
|
-
|
303
|
-
return merged_messages
|
304
|
-
|
305
|
-
|
306
|
-
def remap_finish_reason(stop_reason: str) -> str:
|
307
|
-
"""Remap Anthropic's 'stop_reason' to OpenAI 'finish_reason'
|
308
|
-
|
309
|
-
OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
310
|
-
see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
311
|
-
|
312
|
-
From: https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#stop-reason
|
313
|
-
|
314
|
-
Messages have a stop_reason of one of the following values:
|
315
|
-
"end_turn": The conversational turn ended naturally.
|
316
|
-
"stop_sequence": One of your specified custom stop sequences was generated.
|
317
|
-
"max_tokens": (unchanged)
|
318
|
-
|
319
|
-
"""
|
320
|
-
if stop_reason == "end_turn":
|
321
|
-
return "stop"
|
322
|
-
elif stop_reason == "stop_sequence":
|
323
|
-
return "stop"
|
324
|
-
elif stop_reason == "max_tokens":
|
325
|
-
return "length"
|
326
|
-
elif stop_reason == "tool_use":
|
327
|
-
return "function_call"
|
328
|
-
else:
|
329
|
-
raise ValueError(f"Unexpected stop_reason: {stop_reason}")
|
330
|
-
|
331
|
-
|
332
|
-
def strip_xml_tags(string: str, tag: Optional[str]) -> str:
|
333
|
-
if tag is None:
|
334
|
-
return string
|
335
|
-
# Construct the regular expression pattern to find the start and end tags
|
336
|
-
tag_pattern = f"<{tag}.*?>|</{tag}>"
|
337
|
-
# Use the regular expression to replace the tags with an empty string
|
338
|
-
return re.sub(tag_pattern, "", string)
|
339
|
-
|
340
|
-
|
341
|
-
def strip_xml_tags_streaming(string: str, tag: Optional[str]) -> str:
|
342
|
-
if tag is None:
|
343
|
-
return string
|
344
|
-
|
345
|
-
# Handle common partial tag cases
|
346
|
-
parts_to_remove = [
|
347
|
-
"<", # Leftover start bracket
|
348
|
-
f"<{tag}", # Opening tag start
|
349
|
-
f"</{tag}", # Closing tag start
|
350
|
-
f"/{tag}>", # Closing tag end
|
351
|
-
f"{tag}>", # Opening tag end
|
352
|
-
f"/{tag}", # Partial closing tag without >
|
353
|
-
">", # Leftover end bracket
|
354
|
-
]
|
355
|
-
|
356
|
-
result = string
|
357
|
-
for part in parts_to_remove:
|
358
|
-
result = result.replace(part, "")
|
359
|
-
|
360
|
-
return result
|
361
|
-
|
362
|
-
|
363
|
-
def convert_anthropic_response_to_chatcompletion(
|
364
|
-
response: anthropic.types.Message,
|
365
|
-
inner_thoughts_xml_tag: Optional[str] = None,
|
366
|
-
) -> ChatCompletionResponse:
|
367
|
-
"""
|
368
|
-
Example response from Claude 3:
|
369
|
-
response.json = {
|
370
|
-
'id': 'msg_01W1xg9hdRzbeN2CfZM7zD2w',
|
371
|
-
'type': 'message',
|
372
|
-
'role': 'assistant',
|
373
|
-
'content': [
|
374
|
-
{
|
375
|
-
'type': 'text',
|
376
|
-
'text': "<thinking>Analyzing user login event. This is Chad's first
|
377
|
-
interaction with me. I will adjust my personality and rapport accordingly.</thinking>"
|
378
|
-
},
|
379
|
-
{
|
380
|
-
'type':
|
381
|
-
'tool_use',
|
382
|
-
'id': 'toolu_01Ka4AuCmfvxiidnBZuNfP1u',
|
383
|
-
'name': 'core_memory_append',
|
384
|
-
'input': {
|
385
|
-
'name': 'human',
|
386
|
-
'content': 'Chad is logging in for the first time. I will aim to build a warm
|
387
|
-
and welcoming rapport.',
|
388
|
-
'request_heartbeat': True
|
389
|
-
}
|
390
|
-
}
|
391
|
-
],
|
392
|
-
'model': 'claude-3-haiku-20240307',
|
393
|
-
'stop_reason': 'tool_use',
|
394
|
-
'stop_sequence': None,
|
395
|
-
'usage': {
|
396
|
-
'input_tokens': 3305,
|
397
|
-
'output_tokens': 141
|
398
|
-
}
|
399
|
-
}
|
400
|
-
"""
|
401
|
-
prompt_tokens = response.usage.input_tokens
|
402
|
-
completion_tokens = response.usage.output_tokens
|
403
|
-
finish_reason = remap_finish_reason(response.stop_reason)
|
404
|
-
|
405
|
-
content = None
|
406
|
-
reasoning_content = None
|
407
|
-
reasoning_content_signature = None
|
408
|
-
redacted_reasoning_content = None
|
409
|
-
tool_calls = None
|
410
|
-
|
411
|
-
if len(response.content) > 0:
|
412
|
-
for content_part in response.content:
|
413
|
-
if content_part.type == "text":
|
414
|
-
content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
|
415
|
-
if content_part.type == "tool_use":
|
416
|
-
tool_calls = [
|
417
|
-
ToolCall(
|
418
|
-
id=content_part.id,
|
419
|
-
type="function",
|
420
|
-
function=FunctionCall(
|
421
|
-
name=content_part.name,
|
422
|
-
arguments=json.dumps(content_part.input, indent=2),
|
423
|
-
),
|
424
|
-
)
|
425
|
-
]
|
426
|
-
if content_part.type == "thinking":
|
427
|
-
reasoning_content = content_part.thinking
|
428
|
-
reasoning_content_signature = content_part.signature
|
429
|
-
if content_part.type == "redacted_thinking":
|
430
|
-
redacted_reasoning_content = content_part.data
|
431
|
-
|
432
|
-
else:
|
433
|
-
raise RuntimeError("Unexpected empty content in response")
|
434
|
-
|
435
|
-
assert response.role == "assistant"
|
436
|
-
choice = Choice(
|
437
|
-
index=0,
|
438
|
-
finish_reason=finish_reason,
|
439
|
-
message=ChoiceMessage(
|
440
|
-
role=response.role,
|
441
|
-
content=content,
|
442
|
-
reasoning_content=reasoning_content,
|
443
|
-
reasoning_content_signature=reasoning_content_signature,
|
444
|
-
redacted_reasoning_content=redacted_reasoning_content,
|
445
|
-
tool_calls=tool_calls,
|
446
|
-
),
|
447
|
-
)
|
448
|
-
|
449
|
-
return ChatCompletionResponse(
|
450
|
-
id=response.id,
|
451
|
-
choices=[choice],
|
452
|
-
created=get_utc_time_int(),
|
453
|
-
model=response.model,
|
454
|
-
usage=UsageStatistics(
|
455
|
-
prompt_tokens=prompt_tokens,
|
456
|
-
completion_tokens=completion_tokens,
|
457
|
-
total_tokens=prompt_tokens + completion_tokens,
|
458
|
-
),
|
459
|
-
)
|
460
|
-
|
461
|
-
|
462
|
-
def convert_anthropic_stream_event_to_chatcompletion(
|
463
|
-
event: Union[
|
464
|
-
BetaRawMessageStartEvent,
|
465
|
-
BetaRawContentBlockStartEvent,
|
466
|
-
BetaRawContentBlockDeltaEvent,
|
467
|
-
BetaRawContentBlockStopEvent,
|
468
|
-
BetaRawMessageDeltaEvent,
|
469
|
-
BetaRawMessageStopEvent,
|
470
|
-
],
|
471
|
-
message_id: str,
|
472
|
-
model: str,
|
473
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
474
|
-
) -> ChatCompletionChunkResponse:
|
475
|
-
"""Convert Anthropic stream events to OpenAI ChatCompletionResponse format.
|
476
|
-
|
477
|
-
Args:
|
478
|
-
event: The event to convert
|
479
|
-
message_id: The ID of the message. Anthropic does not return this on every event, so we need to keep track of it
|
480
|
-
model: The model used. Anthropic does not return this on every event, so we need to keep track of it
|
481
|
-
|
482
|
-
Example response from OpenAI:
|
483
|
-
|
484
|
-
'id': 'MESSAGE_ID',
|
485
|
-
'choices': [
|
486
|
-
{
|
487
|
-
'finish_reason': None,
|
488
|
-
'index': 0,
|
489
|
-
'delta': {
|
490
|
-
'content': None,
|
491
|
-
'tool_calls': [
|
492
|
-
{
|
493
|
-
'index': 0,
|
494
|
-
'id': None,
|
495
|
-
'type': 'function',
|
496
|
-
'function': {
|
497
|
-
'name': None,
|
498
|
-
'arguments': '_th'
|
499
|
-
}
|
500
|
-
}
|
501
|
-
],
|
502
|
-
'function_call': None
|
503
|
-
},
|
504
|
-
'logprobs': None
|
505
|
-
}
|
506
|
-
],
|
507
|
-
'created': 1713216662,
|
508
|
-
'model': 'gpt-4o-mini-2024-07-18',
|
509
|
-
'system_fingerprint': 'fp_bd83329f63',
|
510
|
-
'object': 'chat.completion.chunk'
|
511
|
-
}
|
512
|
-
"""
|
513
|
-
# Get finish reason
|
514
|
-
finish_reason = None
|
515
|
-
completion_chunk_tokens = 0
|
516
|
-
|
517
|
-
# Get content and tool calls
|
518
|
-
content = None
|
519
|
-
reasoning_content = None
|
520
|
-
reasoning_content_signature = None
|
521
|
-
redacted_reasoning_content = None # NOTE called "data" in the stream
|
522
|
-
tool_calls = None
|
523
|
-
if isinstance(event, BetaRawMessageStartEvent):
|
524
|
-
"""
|
525
|
-
BetaRawMessageStartEvent(
|
526
|
-
message=BetaMessage(
|
527
|
-
content=[],
|
528
|
-
usage=BetaUsage(
|
529
|
-
input_tokens=3086,
|
530
|
-
output_tokens=1,
|
531
|
-
),
|
532
|
-
...,
|
533
|
-
),
|
534
|
-
type='message_start'
|
535
|
-
)
|
536
|
-
"""
|
537
|
-
completion_chunk_tokens += event.message.usage.output_tokens
|
538
|
-
|
539
|
-
elif isinstance(event, BetaRawMessageDeltaEvent):
|
540
|
-
"""
|
541
|
-
BetaRawMessageDeltaEvent(
|
542
|
-
delta=Delta(
|
543
|
-
stop_reason='tool_use',
|
544
|
-
stop_sequence=None
|
545
|
-
),
|
546
|
-
type='message_delta',
|
547
|
-
usage=BetaMessageDeltaUsage(output_tokens=45)
|
548
|
-
)
|
549
|
-
"""
|
550
|
-
finish_reason = remap_finish_reason(event.delta.stop_reason)
|
551
|
-
completion_chunk_tokens += event.usage.output_tokens
|
552
|
-
|
553
|
-
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
554
|
-
"""
|
555
|
-
BetaRawContentBlockDeltaEvent(
|
556
|
-
delta=BetaInputJSONDelta(
|
557
|
-
partial_json='lo',
|
558
|
-
type='input_json_delta'
|
559
|
-
),
|
560
|
-
index=0,
|
561
|
-
type='content_block_delta'
|
562
|
-
)
|
563
|
-
|
564
|
-
OR
|
565
|
-
|
566
|
-
BetaRawContentBlockDeltaEvent(
|
567
|
-
delta=BetaTextDelta(
|
568
|
-
text='👋 ',
|
569
|
-
type='text_delta'
|
570
|
-
),
|
571
|
-
index=0,
|
572
|
-
type='content_block_delta'
|
573
|
-
)
|
574
|
-
|
575
|
-
"""
|
576
|
-
# ReACT COT
|
577
|
-
if event.delta.type == "text_delta":
|
578
|
-
content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
|
579
|
-
|
580
|
-
# Extended thought COT
|
581
|
-
elif event.delta.type == "thinking_delta":
|
582
|
-
# Redacted doesn't come in the delta chunks, comes all at once
|
583
|
-
# "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
|
584
|
-
# Thinking might start with ""
|
585
|
-
if len(event.delta.thinking) > 0:
|
586
|
-
reasoning_content = event.delta.thinking
|
587
|
-
|
588
|
-
# Extended thought COT signature
|
589
|
-
elif event.delta.type == "signature_delta":
|
590
|
-
if len(event.delta.signature) > 0:
|
591
|
-
reasoning_content_signature = event.delta.signature
|
592
|
-
|
593
|
-
# Tool calling
|
594
|
-
elif event.delta.type == "input_json_delta":
|
595
|
-
tool_calls = [
|
596
|
-
ToolCallDelta(
|
597
|
-
index=0,
|
598
|
-
function=FunctionCallDelta(
|
599
|
-
name=None,
|
600
|
-
arguments=event.delta.partial_json,
|
601
|
-
),
|
602
|
-
)
|
603
|
-
]
|
604
|
-
else:
|
605
|
-
warnings.warn("Unexpected delta type: " + event.delta.type)
|
606
|
-
|
607
|
-
elif isinstance(event, BetaRawContentBlockStartEvent):
|
608
|
-
"""
|
609
|
-
BetaRawContentBlockStartEvent(
|
610
|
-
content_block=BetaToolUseBlock(
|
611
|
-
id='toolu_01LmpZhRhR3WdrRdUrfkKfFw',
|
612
|
-
input={},
|
613
|
-
name='get_weather',
|
614
|
-
type='tool_use'
|
615
|
-
),
|
616
|
-
index=0,
|
617
|
-
type='content_block_start'
|
618
|
-
)
|
619
|
-
|
620
|
-
OR
|
621
|
-
|
622
|
-
BetaRawContentBlockStartEvent(
|
623
|
-
content_block=BetaTextBlock(
|
624
|
-
text='',
|
625
|
-
type='text'
|
626
|
-
),
|
627
|
-
index=0,
|
628
|
-
type='content_block_start'
|
629
|
-
)
|
630
|
-
"""
|
631
|
-
if isinstance(event.content_block, BetaToolUseBlock):
|
632
|
-
tool_calls = [
|
633
|
-
ToolCallDelta(
|
634
|
-
index=0,
|
635
|
-
id=event.content_block.id,
|
636
|
-
function=FunctionCallDelta(
|
637
|
-
name=event.content_block.name,
|
638
|
-
arguments="",
|
639
|
-
),
|
640
|
-
)
|
641
|
-
]
|
642
|
-
elif isinstance(event.content_block, BetaTextBlock):
|
643
|
-
content = event.content_block.text
|
644
|
-
elif isinstance(event.content_block, BetaThinkingBlock):
|
645
|
-
reasoning_content = event.content_block.thinking
|
646
|
-
elif isinstance(event.content_block, BetaRedactedThinkingBlock):
|
647
|
-
redacted_reasoning_content = event.content_block.data
|
648
|
-
else:
|
649
|
-
warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
|
650
|
-
elif event.type in VALID_EVENT_TYPES:
|
651
|
-
pass
|
652
|
-
else:
|
653
|
-
warnings.warn("Unexpected event type: " + event.type)
|
654
|
-
|
655
|
-
# Initialize base response
|
656
|
-
choice = ChunkChoice(
|
657
|
-
index=0,
|
658
|
-
finish_reason=finish_reason,
|
659
|
-
delta=MessageDelta(
|
660
|
-
content=content,
|
661
|
-
reasoning_content=reasoning_content,
|
662
|
-
reasoning_content_signature=reasoning_content_signature,
|
663
|
-
redacted_reasoning_content=redacted_reasoning_content,
|
664
|
-
tool_calls=tool_calls,
|
665
|
-
),
|
666
|
-
)
|
667
|
-
return ChatCompletionChunkResponse(
|
668
|
-
id=message_id,
|
669
|
-
choices=[choice],
|
670
|
-
created=get_utc_time_int(),
|
671
|
-
model=model,
|
672
|
-
output_tokens=completion_chunk_tokens,
|
673
|
-
)
|
674
|
-
|
675
|
-
|
676
|
-
def _prepare_anthropic_request(
|
677
|
-
data: ChatCompletionRequest,
|
678
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
679
|
-
# if true, prefix fill the generation with the thinking tag
|
680
|
-
prefix_fill: bool = False,
|
681
|
-
# if true, put COT inside the tool calls instead of inside the content
|
682
|
-
put_inner_thoughts_in_kwargs: bool = True,
|
683
|
-
bedrock: bool = False,
|
684
|
-
# extended thinking related fields
|
685
|
-
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
686
|
-
extended_thinking: bool = False,
|
687
|
-
max_reasoning_tokens: Optional[int] = None,
|
688
|
-
) -> dict:
|
689
|
-
"""Prepare the request data for Anthropic API format."""
|
690
|
-
if extended_thinking:
|
691
|
-
assert (
|
692
|
-
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
693
|
-
), "max tokens must be greater than thinking budget"
|
694
|
-
if put_inner_thoughts_in_kwargs:
|
695
|
-
logger.warning("Extended thinking not compatible with put_inner_thoughts_in_kwargs")
|
696
|
-
put_inner_thoughts_in_kwargs = False
|
697
|
-
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
698
|
-
# Silently disable prefix_fill for now
|
699
|
-
prefix_fill = False
|
700
|
-
|
701
|
-
# if needed, put inner thoughts as a kwarg for all tools
|
702
|
-
if data.tools and put_inner_thoughts_in_kwargs:
|
703
|
-
functions = add_inner_thoughts_to_functions(
|
704
|
-
functions=[t.function.model_dump() for t in data.tools],
|
705
|
-
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
706
|
-
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
707
|
-
)
|
708
|
-
data.tools = [Tool(function=f) for f in functions]
|
709
|
-
|
710
|
-
# convert the tools to Anthropic's payload format
|
711
|
-
anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
|
712
|
-
|
713
|
-
# pydantic -> dict
|
714
|
-
data = data.model_dump(exclude_none=True)
|
715
|
-
|
716
|
-
if extended_thinking:
|
717
|
-
data["thinking"] = {
|
718
|
-
"type": "enabled",
|
719
|
-
"budget_tokens": max_reasoning_tokens,
|
720
|
-
}
|
721
|
-
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
722
|
-
data["temperature"] = 1.0
|
723
|
-
|
724
|
-
if "functions" in data:
|
725
|
-
raise ValueError("'functions' unexpected in Anthropic API payload")
|
726
|
-
|
727
|
-
# Handle tools
|
728
|
-
if "tools" in data and data["tools"] is None:
|
729
|
-
data.pop("tools")
|
730
|
-
data.pop("tool_choice", None)
|
731
|
-
elif anthropic_tools is not None:
|
732
|
-
# TODO eventually enable parallel tool use
|
733
|
-
data["tools"] = anthropic_tools
|
734
|
-
|
735
|
-
# Move 'system' to the top level
|
736
|
-
assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
|
737
|
-
data["system"] = data["messages"][0]["content"]
|
738
|
-
data["messages"] = data["messages"][1:]
|
739
|
-
|
740
|
-
# Process messages
|
741
|
-
for message in data["messages"]:
|
742
|
-
if "content" not in message:
|
743
|
-
message["content"] = None
|
744
|
-
|
745
|
-
# Convert to Anthropic format
|
746
|
-
msg_objs = [
|
747
|
-
_Message.dict_to_message(
|
748
|
-
agent_id=None,
|
749
|
-
openai_message_dict=m,
|
750
|
-
)
|
751
|
-
for m in data["messages"]
|
752
|
-
]
|
753
|
-
data["messages"] = [
|
754
|
-
m.to_anthropic_dict(
|
755
|
-
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
756
|
-
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
757
|
-
)
|
758
|
-
for m in msg_objs
|
759
|
-
]
|
760
|
-
|
761
|
-
# Ensure first message is user
|
762
|
-
if data["messages"][0]["role"] != "user":
|
763
|
-
data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
|
764
|
-
|
765
|
-
# Handle alternating messages
|
766
|
-
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
767
|
-
|
768
|
-
# Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
|
769
|
-
# https://docs.anthropic.com/en/api/messages#body-messages
|
770
|
-
# NOTE: cannot prefill with tools for opus:
|
771
|
-
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
772
|
-
if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
|
773
|
-
if not bedrock: # not support for bedrock
|
774
|
-
data["messages"].append(
|
775
|
-
# Start the thinking process for the assistant
|
776
|
-
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
777
|
-
)
|
778
|
-
|
779
|
-
# Validate max_tokens
|
780
|
-
assert "max_tokens" in data, data
|
781
|
-
|
782
|
-
# Remove OpenAI-specific fields
|
783
|
-
for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
|
784
|
-
data.pop(field, None)
|
785
|
-
|
786
|
-
return data
|
787
|
-
|
788
|
-
|
789
|
-
def anthropic_chat_completions_request(
|
790
|
-
data: ChatCompletionRequest,
|
791
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
792
|
-
put_inner_thoughts_in_kwargs: bool = False,
|
793
|
-
extended_thinking: bool = False,
|
794
|
-
max_reasoning_tokens: Optional[int] = None,
|
795
|
-
provider_name: Optional[str] = None,
|
796
|
-
provider_category: Optional[ProviderCategory] = None,
|
797
|
-
betas: List[str] = ["tools-2024-04-04"],
|
798
|
-
user_id: Optional[str] = None,
|
799
|
-
) -> ChatCompletionResponse:
|
800
|
-
"""https://docs.anthropic.com/claude/docs/tool-use"""
|
801
|
-
anthropic_client = None
|
802
|
-
if provider_category == ProviderCategory.byok:
|
803
|
-
actor = UserManager().get_user_or_default(user_id=user_id)
|
804
|
-
api_key = ProviderManager().get_override_key(provider_name, actor=actor)
|
805
|
-
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
806
|
-
elif model_settings.anthropic_api_key:
|
807
|
-
anthropic_client = anthropic.Anthropic()
|
808
|
-
else:
|
809
|
-
raise ValueError("No available Anthropic API key")
|
810
|
-
data = _prepare_anthropic_request(
|
811
|
-
data=data,
|
812
|
-
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
813
|
-
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
814
|
-
extended_thinking=extended_thinking,
|
815
|
-
max_reasoning_tokens=max_reasoning_tokens,
|
816
|
-
)
|
817
|
-
log_event(name="llm_request_sent", attributes=data)
|
818
|
-
response = anthropic_client.beta.messages.create(
|
819
|
-
**data,
|
820
|
-
betas=betas,
|
821
|
-
)
|
822
|
-
log_event(name="llm_response_received", attributes={"response": response.json()})
|
823
|
-
return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
|
824
|
-
|
825
|
-
|
826
|
-
def anthropic_bedrock_chat_completions_request(
|
827
|
-
data: ChatCompletionRequest,
|
828
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
829
|
-
provider_name: Optional[str] = None,
|
830
|
-
provider_category: Optional[ProviderCategory] = None,
|
831
|
-
user_id: Optional[str] = None,
|
832
|
-
) -> ChatCompletionResponse:
|
833
|
-
"""Make a chat completion request to Anthropic via AWS Bedrock."""
|
834
|
-
data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
|
835
|
-
|
836
|
-
# Get the client
|
837
|
-
if provider_category == ProviderCategory.byok:
|
838
|
-
actor = UserManager().get_user_or_default(user_id=user_id)
|
839
|
-
access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
|
840
|
-
client = get_bedrock_client(access_key, secret_key, region)
|
841
|
-
else:
|
842
|
-
client = get_bedrock_client()
|
843
|
-
|
844
|
-
# Make the request
|
845
|
-
try:
|
846
|
-
# bedrock does not support certain args
|
847
|
-
print("Warning: Tool rules not supported with Anthropic Bedrock")
|
848
|
-
data["tool_choice"] = {"type": "any"}
|
849
|
-
log_event(name="llm_request_sent", attributes=data)
|
850
|
-
response = client.messages.create(**data)
|
851
|
-
log_event(name="llm_response_received", attributes={"response": response.json()})
|
852
|
-
return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
|
853
|
-
except PermissionDeniedError:
|
854
|
-
raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}")
|
855
|
-
except Exception as e:
|
856
|
-
raise BedrockError(f"Bedrock error: {e}")
|
857
|
-
|
858
|
-
|
859
|
-
def anthropic_chat_completions_request_stream(
|
860
|
-
data: ChatCompletionRequest,
|
861
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
862
|
-
put_inner_thoughts_in_kwargs: bool = False,
|
863
|
-
extended_thinking: bool = False,
|
864
|
-
max_reasoning_tokens: Optional[int] = None,
|
865
|
-
provider_name: Optional[str] = None,
|
866
|
-
provider_category: Optional[ProviderCategory] = None,
|
867
|
-
betas: List[str] = ["tools-2024-04-04"],
|
868
|
-
user_id: Optional[str] = None,
|
869
|
-
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
870
|
-
"""Stream chat completions from Anthropic API.
|
871
|
-
|
872
|
-
Similar to OpenAI's streaming, but using Anthropic's native streaming support.
|
873
|
-
See: https://docs.anthropic.com/claude/reference/messages-streaming
|
874
|
-
"""
|
875
|
-
data = _prepare_anthropic_request(
|
876
|
-
data=data,
|
877
|
-
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
878
|
-
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
879
|
-
extended_thinking=extended_thinking,
|
880
|
-
max_reasoning_tokens=max_reasoning_tokens,
|
881
|
-
)
|
882
|
-
if provider_category == ProviderCategory.byok:
|
883
|
-
actor = UserManager().get_user_or_default(user_id=user_id)
|
884
|
-
api_key = ProviderManager().get_override_key(provider_name, actor=actor)
|
885
|
-
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
886
|
-
elif model_settings.anthropic_api_key:
|
887
|
-
anthropic_client = anthropic.Anthropic()
|
888
|
-
|
889
|
-
with anthropic_client.beta.messages.stream(
|
890
|
-
**data,
|
891
|
-
betas=betas,
|
892
|
-
) as stream:
|
893
|
-
# Stream: https://github.com/anthropics/anthropic-sdk-python/blob/d212ec9f6d5e956f13bc0ddc3d86b5888a954383/src/anthropic/lib/streaming/_beta_messages.py#L22
|
894
|
-
message_id = None
|
895
|
-
model = None
|
896
|
-
|
897
|
-
for chunk in stream._raw_stream:
|
898
|
-
time.sleep(0.01) # Anthropic is really fast, faster than frontend can upload.
|
899
|
-
if isinstance(chunk, BetaRawMessageStartEvent):
|
900
|
-
"""
|
901
|
-
BetaRawMessageStartEvent(
|
902
|
-
message=BetaMessage(
|
903
|
-
id='MESSAGE ID HERE',
|
904
|
-
content=[],
|
905
|
-
model='claude-3-5-sonnet-20241022',
|
906
|
-
role='assistant',
|
907
|
-
stop_reason=None,
|
908
|
-
stop_sequence=None,
|
909
|
-
type='message',
|
910
|
-
usage=BetaUsage(
|
911
|
-
cache_creation_input_tokens=0,
|
912
|
-
cache_read_input_tokens=0,
|
913
|
-
input_tokens=30,
|
914
|
-
output_tokens=4
|
915
|
-
)
|
916
|
-
),
|
917
|
-
type='message_start'
|
918
|
-
),
|
919
|
-
"""
|
920
|
-
message_id = chunk.message.id
|
921
|
-
model = chunk.message.model
|
922
|
-
yield convert_anthropic_stream_event_to_chatcompletion(chunk, message_id, model, inner_thoughts_xml_tag)
|
923
|
-
|
924
|
-
|
925
|
-
def anthropic_chat_completions_process_stream(
|
926
|
-
chat_completion_request: ChatCompletionRequest,
|
927
|
-
stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
|
928
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
929
|
-
put_inner_thoughts_in_kwargs: bool = False,
|
930
|
-
extended_thinking: bool = False,
|
931
|
-
max_reasoning_tokens: Optional[int] = None,
|
932
|
-
provider_name: Optional[str] = None,
|
933
|
-
provider_category: Optional[ProviderCategory] = None,
|
934
|
-
create_message_id: bool = True,
|
935
|
-
create_message_datetime: bool = True,
|
936
|
-
betas: List[str] = ["tools-2024-04-04"],
|
937
|
-
name: Optional[str] = None,
|
938
|
-
user_id: Optional[str] = None,
|
939
|
-
) -> ChatCompletionResponse:
|
940
|
-
"""Process a streaming completion response from Anthropic, similar to OpenAI's streaming.
|
941
|
-
|
942
|
-
Args:
|
943
|
-
api_key: The Anthropic API key
|
944
|
-
chat_completion_request: The chat completion request
|
945
|
-
stream_interface: Interface for handling streaming chunks
|
946
|
-
inner_thoughts_xml_tag: Tag for inner thoughts in the response
|
947
|
-
create_message_id: Whether to create a message ID
|
948
|
-
create_message_datetime: Whether to create message datetime
|
949
|
-
betas: Beta features to enable
|
950
|
-
|
951
|
-
Returns:
|
952
|
-
The final ChatCompletionResponse
|
953
|
-
"""
|
954
|
-
assert chat_completion_request.stream == True
|
955
|
-
assert stream_interface is not None, "Required"
|
956
|
-
|
957
|
-
# Count prompt tokens - we'll get completion tokens from the final response
|
958
|
-
chat_history = [m.model_dump(exclude_none=True) for m in chat_completion_request.messages]
|
959
|
-
prompt_tokens = num_tokens_from_messages(
|
960
|
-
messages=chat_history,
|
961
|
-
model=chat_completion_request.model,
|
962
|
-
)
|
963
|
-
|
964
|
-
# Add tokens for tools if present
|
965
|
-
if chat_completion_request.tools is not None:
|
966
|
-
assert chat_completion_request.functions is None
|
967
|
-
prompt_tokens += num_tokens_from_functions(
|
968
|
-
functions=[t.function.model_dump() for t in chat_completion_request.tools],
|
969
|
-
model=chat_completion_request.model,
|
970
|
-
)
|
971
|
-
elif chat_completion_request.functions is not None:
|
972
|
-
assert chat_completion_request.tools is None
|
973
|
-
prompt_tokens += num_tokens_from_functions(
|
974
|
-
functions=[f.model_dump() for f in chat_completion_request.functions],
|
975
|
-
model=chat_completion_request.model,
|
976
|
-
)
|
977
|
-
|
978
|
-
# Create a dummy message for ID/datetime if needed
|
979
|
-
dummy_message = _Message(
|
980
|
-
role=_MessageRole.assistant,
|
981
|
-
content=[],
|
982
|
-
agent_id="",
|
983
|
-
model="",
|
984
|
-
name=None,
|
985
|
-
tool_calls=None,
|
986
|
-
tool_call_id=None,
|
987
|
-
)
|
988
|
-
|
989
|
-
TEMP_STREAM_RESPONSE_ID = "temp_id"
|
990
|
-
TEMP_STREAM_FINISH_REASON = "temp_null"
|
991
|
-
TEMP_STREAM_TOOL_CALL_ID = "temp_id"
|
992
|
-
chat_completion_response = ChatCompletionResponse(
|
993
|
-
id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
|
994
|
-
choices=[],
|
995
|
-
created=int(dummy_message.created_at.timestamp()),
|
996
|
-
model=chat_completion_request.model,
|
997
|
-
usage=UsageStatistics(
|
998
|
-
prompt_tokens=prompt_tokens,
|
999
|
-
total_tokens=prompt_tokens,
|
1000
|
-
),
|
1001
|
-
)
|
1002
|
-
|
1003
|
-
log_event(name="llm_request_sent", attributes=chat_completion_request.model_dump())
|
1004
|
-
|
1005
|
-
if stream_interface:
|
1006
|
-
stream_interface.stream_start()
|
1007
|
-
|
1008
|
-
completion_tokens = 0
|
1009
|
-
prev_message_type = None
|
1010
|
-
message_idx = 0
|
1011
|
-
try:
|
1012
|
-
for chunk_idx, chat_completion_chunk in enumerate(
|
1013
|
-
anthropic_chat_completions_request_stream(
|
1014
|
-
data=chat_completion_request,
|
1015
|
-
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
1016
|
-
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
1017
|
-
extended_thinking=extended_thinking,
|
1018
|
-
max_reasoning_tokens=max_reasoning_tokens,
|
1019
|
-
provider_name=provider_name,
|
1020
|
-
provider_category=provider_category,
|
1021
|
-
betas=betas,
|
1022
|
-
user_id=user_id,
|
1023
|
-
)
|
1024
|
-
):
|
1025
|
-
assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
|
1026
|
-
|
1027
|
-
if stream_interface:
|
1028
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
1029
|
-
message_type = stream_interface.process_chunk(
|
1030
|
-
chat_completion_chunk,
|
1031
|
-
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
1032
|
-
message_date=(
|
1033
|
-
timestamp_to_datetime(chat_completion_response.created)
|
1034
|
-
if create_message_datetime
|
1035
|
-
else timestamp_to_datetime(chat_completion_chunk.created)
|
1036
|
-
),
|
1037
|
-
# if extended_thinking is on, then reasoning_content will be flowing as chunks
|
1038
|
-
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
1039
|
-
expect_reasoning_content=extended_thinking,
|
1040
|
-
name=name,
|
1041
|
-
message_index=message_idx,
|
1042
|
-
prev_message_type=prev_message_type,
|
1043
|
-
)
|
1044
|
-
if message_type != prev_message_type and message_type is not None and prev_message_type is not None:
|
1045
|
-
message_idx += 1
|
1046
|
-
if message_type is not None:
|
1047
|
-
prev_message_type = message_type
|
1048
|
-
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
1049
|
-
stream_interface.process_refresh(chat_completion_response)
|
1050
|
-
else:
|
1051
|
-
raise TypeError(stream_interface)
|
1052
|
-
|
1053
|
-
if chunk_idx == 0:
|
1054
|
-
# initialize the choice objects which we will increment with the deltas
|
1055
|
-
num_choices = len(chat_completion_chunk.choices)
|
1056
|
-
assert num_choices > 0
|
1057
|
-
chat_completion_response.choices = [
|
1058
|
-
Choice(
|
1059
|
-
finish_reason=TEMP_STREAM_FINISH_REASON, # NOTE: needs to be ovrerwritten
|
1060
|
-
index=i,
|
1061
|
-
message=Message(
|
1062
|
-
role="assistant",
|
1063
|
-
),
|
1064
|
-
)
|
1065
|
-
for i in range(len(chat_completion_chunk.choices))
|
1066
|
-
]
|
1067
|
-
|
1068
|
-
# add the choice delta
|
1069
|
-
assert len(chat_completion_chunk.choices) == len(chat_completion_response.choices), chat_completion_chunk
|
1070
|
-
for chunk_choice in chat_completion_chunk.choices:
|
1071
|
-
if chunk_choice.finish_reason is not None:
|
1072
|
-
chat_completion_response.choices[chunk_choice.index].finish_reason = chunk_choice.finish_reason
|
1073
|
-
|
1074
|
-
if chunk_choice.logprobs is not None:
|
1075
|
-
chat_completion_response.choices[chunk_choice.index].logprobs = chunk_choice.logprobs
|
1076
|
-
|
1077
|
-
accum_message = chat_completion_response.choices[chunk_choice.index].message
|
1078
|
-
message_delta = chunk_choice.delta
|
1079
|
-
|
1080
|
-
if message_delta.content is not None:
|
1081
|
-
content_delta = message_delta.content
|
1082
|
-
if accum_message.content is None:
|
1083
|
-
accum_message.content = content_delta
|
1084
|
-
else:
|
1085
|
-
accum_message.content += content_delta
|
1086
|
-
|
1087
|
-
# NOTE: for extended_thinking mode
|
1088
|
-
if extended_thinking and message_delta.reasoning_content is not None:
|
1089
|
-
reasoning_content_delta = message_delta.reasoning_content
|
1090
|
-
if accum_message.reasoning_content is None:
|
1091
|
-
accum_message.reasoning_content = reasoning_content_delta
|
1092
|
-
else:
|
1093
|
-
accum_message.reasoning_content += reasoning_content_delta
|
1094
|
-
|
1095
|
-
# NOTE: extended_thinking sends a signature
|
1096
|
-
if extended_thinking and message_delta.reasoning_content_signature is not None:
|
1097
|
-
reasoning_content_signature_delta = message_delta.reasoning_content_signature
|
1098
|
-
if accum_message.reasoning_content_signature is None:
|
1099
|
-
accum_message.reasoning_content_signature = reasoning_content_signature_delta
|
1100
|
-
else:
|
1101
|
-
accum_message.reasoning_content_signature += reasoning_content_signature_delta
|
1102
|
-
|
1103
|
-
# NOTE: extended_thinking also has the potential for redacted_reasoning_content
|
1104
|
-
if extended_thinking and message_delta.redacted_reasoning_content is not None:
|
1105
|
-
redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
|
1106
|
-
if accum_message.redacted_reasoning_content is None:
|
1107
|
-
accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
|
1108
|
-
else:
|
1109
|
-
accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
|
1110
|
-
|
1111
|
-
# TODO(charles) make sure this works for parallel tool calling?
|
1112
|
-
if message_delta.tool_calls is not None:
|
1113
|
-
tool_calls_delta = message_delta.tool_calls
|
1114
|
-
|
1115
|
-
# If this is the first tool call showing up in a chunk, initialize the list with it
|
1116
|
-
if accum_message.tool_calls is None:
|
1117
|
-
accum_message.tool_calls = [
|
1118
|
-
ToolCall(id=TEMP_STREAM_TOOL_CALL_ID, function=FunctionCall(name="", arguments=""))
|
1119
|
-
for _ in range(len(tool_calls_delta))
|
1120
|
-
]
|
1121
|
-
|
1122
|
-
# There may be many tool calls in a tool calls delta (e.g. parallel tool calls)
|
1123
|
-
for tool_call_delta in tool_calls_delta:
|
1124
|
-
if tool_call_delta.id is not None:
|
1125
|
-
# TODO assert that we're not overwriting?
|
1126
|
-
# TODO += instead of =?
|
1127
|
-
if tool_call_delta.index not in range(len(accum_message.tool_calls)):
|
1128
|
-
warnings.warn(
|
1129
|
-
f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
|
1130
|
-
)
|
1131
|
-
# force index 0
|
1132
|
-
# accum_message.tool_calls[0].id = tool_call_delta.id
|
1133
|
-
else:
|
1134
|
-
accum_message.tool_calls[tool_call_delta.index].id = tool_call_delta.id
|
1135
|
-
if tool_call_delta.function is not None:
|
1136
|
-
if tool_call_delta.function.name is not None:
|
1137
|
-
# TODO assert that we're not overwriting?
|
1138
|
-
# TODO += instead of =?
|
1139
|
-
if tool_call_delta.index not in range(len(accum_message.tool_calls)):
|
1140
|
-
warnings.warn(
|
1141
|
-
f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
|
1142
|
-
)
|
1143
|
-
# force index 0
|
1144
|
-
# accum_message.tool_calls[0].function.name = tool_call_delta.function.name
|
1145
|
-
else:
|
1146
|
-
accum_message.tool_calls[tool_call_delta.index].function.name = tool_call_delta.function.name
|
1147
|
-
if tool_call_delta.function.arguments is not None:
|
1148
|
-
if tool_call_delta.index not in range(len(accum_message.tool_calls)):
|
1149
|
-
warnings.warn(
|
1150
|
-
f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
|
1151
|
-
)
|
1152
|
-
# force index 0
|
1153
|
-
# accum_message.tool_calls[0].function.arguments += tool_call_delta.function.arguments
|
1154
|
-
else:
|
1155
|
-
accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
|
1156
|
-
|
1157
|
-
if message_delta.function_call is not None:
|
1158
|
-
raise NotImplementedError("Old function_call style not support with stream=True")
|
1159
|
-
|
1160
|
-
# overwrite response fields based on latest chunk
|
1161
|
-
if not create_message_id:
|
1162
|
-
chat_completion_response.id = chat_completion_chunk.id
|
1163
|
-
if not create_message_datetime:
|
1164
|
-
chat_completion_response.created = chat_completion_chunk.created
|
1165
|
-
chat_completion_response.model = chat_completion_chunk.model
|
1166
|
-
chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
|
1167
|
-
|
1168
|
-
# increment chunk counter
|
1169
|
-
if chat_completion_chunk.output_tokens is not None:
|
1170
|
-
completion_tokens += chat_completion_chunk.output_tokens
|
1171
|
-
|
1172
|
-
except Exception as e:
|
1173
|
-
if stream_interface:
|
1174
|
-
stream_interface.stream_end()
|
1175
|
-
print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
|
1176
|
-
raise e
|
1177
|
-
finally:
|
1178
|
-
if stream_interface:
|
1179
|
-
stream_interface.stream_end()
|
1180
|
-
|
1181
|
-
# make sure we didn't leave temp stuff in
|
1182
|
-
assert all([c.finish_reason != TEMP_STREAM_FINISH_REASON for c in chat_completion_response.choices])
|
1183
|
-
assert all(
|
1184
|
-
[
|
1185
|
-
all([tc.id != TEMP_STREAM_TOOL_CALL_ID for tc in c.message.tool_calls]) if c.message.tool_calls else True
|
1186
|
-
for c in chat_completion_response.choices
|
1187
|
-
]
|
1188
|
-
)
|
1189
|
-
if not create_message_id:
|
1190
|
-
assert chat_completion_response.id != dummy_message.id
|
1191
|
-
|
1192
|
-
# compute token usage before returning
|
1193
|
-
# TODO try actually computing the #tokens instead of assuming the chunks is the same
|
1194
|
-
chat_completion_response.usage.completion_tokens = completion_tokens
|
1195
|
-
chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
|
1196
|
-
|
1197
|
-
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
1198
|
-
|
1199
|
-
log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
|
1200
|
-
|
1201
|
-
for choice in chat_completion_response.choices:
|
1202
|
-
if choice.message.content is not None:
|
1203
|
-
choice.message.content = choice.message.content.replace(f"<{inner_thoughts_xml_tag}>", "")
|
1204
|
-
choice.message.content = choice.message.content.replace(f"</{inner_thoughts_xml_tag}>", "")
|
1205
|
-
|
1206
|
-
return chat_completion_response
|