letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/helpers.py +4 -0
  3. letta/agents/letta_agent.py +142 -5
  4. letta/constants.py +10 -7
  5. letta/data_sources/connectors.py +70 -53
  6. letta/embeddings.py +3 -240
  7. letta/errors.py +28 -0
  8. letta/functions/function_sets/base.py +4 -4
  9. letta/functions/functions.py +287 -32
  10. letta/functions/mcp_client/types.py +11 -0
  11. letta/functions/schema_validator.py +187 -0
  12. letta/functions/typescript_parser.py +196 -0
  13. letta/helpers/datetime_helpers.py +8 -4
  14. letta/helpers/tool_execution_helper.py +25 -2
  15. letta/llm_api/anthropic_client.py +23 -18
  16. letta/llm_api/azure_client.py +73 -0
  17. letta/llm_api/bedrock_client.py +8 -4
  18. letta/llm_api/google_vertex_client.py +14 -5
  19. letta/llm_api/llm_api_tools.py +2 -217
  20. letta/llm_api/llm_client.py +15 -1
  21. letta/llm_api/llm_client_base.py +32 -1
  22. letta/llm_api/openai.py +1 -0
  23. letta/llm_api/openai_client.py +18 -28
  24. letta/llm_api/together_client.py +55 -0
  25. letta/orm/provider.py +1 -0
  26. letta/orm/step_metrics.py +40 -1
  27. letta/otel/db_pool_monitoring.py +1 -1
  28. letta/schemas/agent.py +3 -4
  29. letta/schemas/agent_file.py +2 -0
  30. letta/schemas/block.py +11 -5
  31. letta/schemas/embedding_config.py +4 -5
  32. letta/schemas/enums.py +1 -1
  33. letta/schemas/job.py +2 -3
  34. letta/schemas/llm_config.py +79 -7
  35. letta/schemas/mcp.py +0 -24
  36. letta/schemas/message.py +0 -108
  37. letta/schemas/openai/chat_completion_request.py +1 -0
  38. letta/schemas/providers/__init__.py +0 -2
  39. letta/schemas/providers/anthropic.py +106 -8
  40. letta/schemas/providers/azure.py +102 -8
  41. letta/schemas/providers/base.py +10 -3
  42. letta/schemas/providers/bedrock.py +28 -16
  43. letta/schemas/providers/letta.py +3 -3
  44. letta/schemas/providers/ollama.py +2 -12
  45. letta/schemas/providers/openai.py +4 -4
  46. letta/schemas/providers/together.py +14 -2
  47. letta/schemas/sandbox_config.py +2 -1
  48. letta/schemas/tool.py +46 -22
  49. letta/server/rest_api/routers/v1/agents.py +179 -38
  50. letta/server/rest_api/routers/v1/folders.py +13 -8
  51. letta/server/rest_api/routers/v1/providers.py +10 -3
  52. letta/server/rest_api/routers/v1/sources.py +14 -8
  53. letta/server/rest_api/routers/v1/steps.py +17 -1
  54. letta/server/rest_api/routers/v1/tools.py +96 -5
  55. letta/server/rest_api/streaming_response.py +91 -45
  56. letta/server/server.py +27 -38
  57. letta/services/agent_manager.py +92 -20
  58. letta/services/agent_serialization_manager.py +11 -7
  59. letta/services/context_window_calculator/context_window_calculator.py +40 -2
  60. letta/services/helpers/agent_manager_helper.py +73 -12
  61. letta/services/mcp_manager.py +109 -15
  62. letta/services/passage_manager.py +28 -109
  63. letta/services/provider_manager.py +24 -0
  64. letta/services/step_manager.py +68 -0
  65. letta/services/summarizer/summarizer.py +1 -4
  66. letta/services/tool_executor/core_tool_executor.py +1 -1
  67. letta/services/tool_executor/sandbox_tool_executor.py +26 -9
  68. letta/services/tool_manager.py +82 -5
  69. letta/services/tool_sandbox/base.py +3 -11
  70. letta/services/tool_sandbox/modal_constants.py +17 -0
  71. letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
  72. letta/services/tool_sandbox/modal_sandbox.py +218 -3
  73. letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
  74. letta/services/tool_sandbox/modal_version_manager.py +273 -0
  75. letta/services/tool_sandbox/safe_pickle.py +193 -0
  76. letta/settings.py +5 -3
  77. letta/templates/sandbox_code_file.py.j2 +2 -4
  78. letta/templates/sandbox_code_file_async.py.j2 +2 -4
  79. letta/utils.py +1 -1
  80. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
  81. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
  82. letta/llm_api/anthropic.py +0 -1206
  83. letta/llm_api/aws_bedrock.py +0 -104
  84. letta/llm_api/azure_openai.py +0 -118
  85. letta/llm_api/azure_openai_constants.py +0 -11
  86. letta/llm_api/cohere.py +0 -391
  87. letta/schemas/providers/cohere.py +0 -18
  88. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
  89. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
  90. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
@@ -1,1206 +0,0 @@
1
- import json
2
- import re
3
- import time
4
- import warnings
5
- from typing import Generator, List, Optional, Union
6
-
7
- import anthropic
8
- from anthropic import PermissionDeniedError
9
- from anthropic.types.beta import (
10
- BetaRawContentBlockDeltaEvent,
11
- BetaRawContentBlockStartEvent,
12
- BetaRawContentBlockStopEvent,
13
- BetaRawMessageDeltaEvent,
14
- BetaRawMessageStartEvent,
15
- BetaRawMessageStopEvent,
16
- BetaRedactedThinkingBlock,
17
- BetaTextBlock,
18
- BetaThinkingBlock,
19
- BetaToolUseBlock,
20
- )
21
-
22
- from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
23
- from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
24
- from letta.llm_api.aws_bedrock import get_bedrock_client
25
- from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
- from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
- from letta.log import get_logger
29
- from letta.otel.tracing import log_event
30
- from letta.schemas.enums import ProviderCategory
31
- from letta.schemas.message import Message as _Message
32
- from letta.schemas.message import MessageRole as _MessageRole
33
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
34
- from letta.schemas.openai.chat_completion_response import (
35
- ChatCompletionChunkResponse,
36
- ChatCompletionResponse,
37
- Choice,
38
- ChunkChoice,
39
- FunctionCall,
40
- FunctionCallDelta,
41
- )
42
- from letta.schemas.openai.chat_completion_response import Message
43
- from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
44
- from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
45
- from letta.services.provider_manager import ProviderManager
46
- from letta.services.user_manager import UserManager
47
- from letta.settings import model_settings
48
- from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
49
-
50
- logger = get_logger(__name__)
51
-
52
- BASE_URL = "https://api.anthropic.com/v1"
53
-
54
-
55
- # https://docs.anthropic.com/claude/docs/models-overview
56
- # Sadly hardcoded
57
- MODEL_LIST = [
58
- ## Opus 4.1
59
- {
60
- "name": "claude-opus-4-1-20250805",
61
- "context_window": 200000,
62
- },
63
- ## Opus 3
64
- {
65
- "name": "claude-3-opus-20240229",
66
- "context_window": 200000,
67
- },
68
- # 3 latest
69
- {
70
- "name": "claude-3-opus-latest",
71
- "context_window": 200000,
72
- },
73
- # 4
74
- {
75
- "name": "claude-opus-4-20250514",
76
- "context_window": 200000,
77
- },
78
- ## Sonnet
79
- # 3.0
80
- {
81
- "name": "claude-3-sonnet-20240229",
82
- "context_window": 200000,
83
- },
84
- # 3.5
85
- {
86
- "name": "claude-3-5-sonnet-20240620",
87
- "context_window": 200000,
88
- },
89
- # 3.5 new
90
- {
91
- "name": "claude-3-5-sonnet-20241022",
92
- "context_window": 200000,
93
- },
94
- # 3.5 latest
95
- {
96
- "name": "claude-3-5-sonnet-latest",
97
- "context_window": 200000,
98
- },
99
- # 3.7
100
- {
101
- "name": "claude-3-7-sonnet-20250219",
102
- "context_window": 200000,
103
- },
104
- # 3.7 latest
105
- {
106
- "name": "claude-3-7-sonnet-latest",
107
- "context_window": 200000,
108
- },
109
- # 4
110
- {
111
- "name": "claude-sonnet-4-20250514",
112
- "context_window": 200000,
113
- },
114
- ## Haiku
115
- # 3.0
116
- {
117
- "name": "claude-3-haiku-20240307",
118
- "context_window": 200000,
119
- },
120
- # 3.5
121
- {
122
- "name": "claude-3-5-haiku-20241022",
123
- "context_window": 200000,
124
- },
125
- # 3.5 latest
126
- {
127
- "name": "claude-3-5-haiku-latest",
128
- "context_window": 200000,
129
- },
130
- ]
131
-
132
- DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
133
-
134
- VALID_EVENT_TYPES = {"content_block_stop", "message_stop"}
135
-
136
-
137
- def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
138
- if api_key:
139
- anthropic_client = anthropic.Anthropic(api_key=api_key)
140
- try:
141
- # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
142
- anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
143
- except anthropic.AuthenticationError as e:
144
- raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
145
- except Exception as e:
146
- raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
147
- else:
148
- raise ValueError("No API key provided")
149
-
150
-
151
- def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
152
- for model_dict in anthropic_get_model_list(api_key=api_key):
153
- if model_dict["name"] == model:
154
- return model_dict["context_window"]
155
- raise ValueError(f"Can't find model '{model}' in Anthropic model list")
156
-
157
-
158
- def anthropic_get_model_list(api_key: Optional[str]) -> dict:
159
- """https://docs.anthropic.com/claude/docs/models-overview"""
160
-
161
- # NOTE: currently there is no GET /models, so we need to hardcode
162
- # return MODEL_LIST
163
-
164
- if api_key:
165
- anthropic_client = anthropic.Anthropic(api_key=api_key)
166
- elif model_settings.anthropic_api_key:
167
- anthropic_client = anthropic.Anthropic()
168
- else:
169
- raise ValueError("No API key provided")
170
-
171
- models = anthropic_client.models.list()
172
- models_json = models.model_dump()
173
- assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
174
- return models_json["data"]
175
-
176
-
177
- async def anthropic_get_model_list_async(api_key: Optional[str]) -> dict:
178
- """https://docs.anthropic.com/claude/docs/models-overview"""
179
-
180
- # NOTE: currently there is no GET /models, so we need to hardcode
181
- # return MODEL_LIST
182
-
183
- if api_key:
184
- anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
185
- elif model_settings.anthropic_api_key:
186
- anthropic_client = anthropic.AsyncAnthropic()
187
- else:
188
- raise ValueError("No API key provided")
189
-
190
- models = await anthropic_client.models.list()
191
- models_json = models.model_dump()
192
- assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
193
- return models_json["data"]
194
-
195
-
196
- def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
197
- """See: https://docs.anthropic.com/claude/docs/tool-use
198
-
199
- OpenAI style:
200
- "tools": [{
201
- "type": "function",
202
- "function": {
203
- "name": "find_movies",
204
- "description": "find ....",
205
- "parameters": {
206
- "type": "object",
207
- "properties": {
208
- PARAM: {
209
- "type": PARAM_TYPE, # eg "string"
210
- "description": PARAM_DESCRIPTION,
211
- },
212
- ...
213
- },
214
- "required": List[str],
215
- }
216
- }
217
- }
218
- ]
219
-
220
- Anthropic style:
221
- "tools": [{
222
- "name": "find_movies",
223
- "description": "find ....",
224
- "input_schema": {
225
- "type": "object",
226
- "properties": {
227
- PARAM: {
228
- "type": PARAM_TYPE, # eg "string"
229
- "description": PARAM_DESCRIPTION,
230
- },
231
- ...
232
- },
233
- "required": List[str],
234
- }
235
- }
236
- ]
237
-
238
- Two small differences:
239
- - 1 level less of nesting
240
- - "parameters" -> "input_schema"
241
- """
242
- formatted_tools = []
243
- for tool in tools:
244
- formatted_tool = {
245
- "name": tool.function.name,
246
- "description": tool.function.description,
247
- "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
248
- }
249
- formatted_tools.append(formatted_tool)
250
-
251
- return formatted_tools
252
-
253
-
254
- def merge_tool_results_into_user_messages(messages: List[dict]):
255
- """Anthropic API doesn't allow role 'tool'->'user' sequences
256
-
257
- Example HTTP error:
258
- messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
259
-
260
- From: https://docs.anthropic.com/claude/docs/tool-use
261
- You may be familiar with other APIs that return tool use as separate from the model's primary output,
262
- or which use a special-purpose tool or function message role.
263
- In contrast, Anthropic's models and API are built around alternating user and assistant messages,
264
- where each message is an array of rich content blocks: text, image, tool_use, and tool_result.
265
- """
266
-
267
- # TODO walk through the messages list
268
- # When a dict (dict_A) with 'role' == 'user' is followed by a dict with 'role' == 'user' (dict B), do the following
269
- # dict_A["content"] = dict_A["content"] + dict_B["content"]
270
-
271
- # The result should be a new merged_messages list that doesn't have any back-to-back dicts with 'role' == 'user'
272
- merged_messages = []
273
- if not messages:
274
- return merged_messages
275
-
276
- # Start with the first message in the list
277
- current_message = messages[0]
278
-
279
- for next_message in messages[1:]:
280
- if current_message["role"] == "user" and next_message["role"] == "user":
281
- # Merge contents of the next user message into current one
282
- current_content = (
283
- current_message["content"]
284
- if isinstance(current_message["content"], list)
285
- else [{"type": "text", "text": current_message["content"]}]
286
- )
287
- next_content = (
288
- next_message["content"]
289
- if isinstance(next_message["content"], list)
290
- else [{"type": "text", "text": next_message["content"]}]
291
- )
292
- merged_content = current_content + next_content
293
- current_message["content"] = merged_content
294
- else:
295
- # Append the current message to result as it's complete
296
- merged_messages.append(current_message)
297
- # Move on to the next message
298
- current_message = next_message
299
-
300
- # Append the last processed message to the result
301
- merged_messages.append(current_message)
302
-
303
- return merged_messages
304
-
305
-
306
- def remap_finish_reason(stop_reason: str) -> str:
307
- """Remap Anthropic's 'stop_reason' to OpenAI 'finish_reason'
308
-
309
- OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
310
- see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
311
-
312
- From: https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#stop-reason
313
-
314
- Messages have a stop_reason of one of the following values:
315
- "end_turn": The conversational turn ended naturally.
316
- "stop_sequence": One of your specified custom stop sequences was generated.
317
- "max_tokens": (unchanged)
318
-
319
- """
320
- if stop_reason == "end_turn":
321
- return "stop"
322
- elif stop_reason == "stop_sequence":
323
- return "stop"
324
- elif stop_reason == "max_tokens":
325
- return "length"
326
- elif stop_reason == "tool_use":
327
- return "function_call"
328
- else:
329
- raise ValueError(f"Unexpected stop_reason: {stop_reason}")
330
-
331
-
332
- def strip_xml_tags(string: str, tag: Optional[str]) -> str:
333
- if tag is None:
334
- return string
335
- # Construct the regular expression pattern to find the start and end tags
336
- tag_pattern = f"<{tag}.*?>|</{tag}>"
337
- # Use the regular expression to replace the tags with an empty string
338
- return re.sub(tag_pattern, "", string)
339
-
340
-
341
- def strip_xml_tags_streaming(string: str, tag: Optional[str]) -> str:
342
- if tag is None:
343
- return string
344
-
345
- # Handle common partial tag cases
346
- parts_to_remove = [
347
- "<", # Leftover start bracket
348
- f"<{tag}", # Opening tag start
349
- f"</{tag}", # Closing tag start
350
- f"/{tag}>", # Closing tag end
351
- f"{tag}>", # Opening tag end
352
- f"/{tag}", # Partial closing tag without >
353
- ">", # Leftover end bracket
354
- ]
355
-
356
- result = string
357
- for part in parts_to_remove:
358
- result = result.replace(part, "")
359
-
360
- return result
361
-
362
-
363
- def convert_anthropic_response_to_chatcompletion(
364
- response: anthropic.types.Message,
365
- inner_thoughts_xml_tag: Optional[str] = None,
366
- ) -> ChatCompletionResponse:
367
- """
368
- Example response from Claude 3:
369
- response.json = {
370
- 'id': 'msg_01W1xg9hdRzbeN2CfZM7zD2w',
371
- 'type': 'message',
372
- 'role': 'assistant',
373
- 'content': [
374
- {
375
- 'type': 'text',
376
- 'text': "<thinking>Analyzing user login event. This is Chad's first
377
- interaction with me. I will adjust my personality and rapport accordingly.</thinking>"
378
- },
379
- {
380
- 'type':
381
- 'tool_use',
382
- 'id': 'toolu_01Ka4AuCmfvxiidnBZuNfP1u',
383
- 'name': 'core_memory_append',
384
- 'input': {
385
- 'name': 'human',
386
- 'content': 'Chad is logging in for the first time. I will aim to build a warm
387
- and welcoming rapport.',
388
- 'request_heartbeat': True
389
- }
390
- }
391
- ],
392
- 'model': 'claude-3-haiku-20240307',
393
- 'stop_reason': 'tool_use',
394
- 'stop_sequence': None,
395
- 'usage': {
396
- 'input_tokens': 3305,
397
- 'output_tokens': 141
398
- }
399
- }
400
- """
401
- prompt_tokens = response.usage.input_tokens
402
- completion_tokens = response.usage.output_tokens
403
- finish_reason = remap_finish_reason(response.stop_reason)
404
-
405
- content = None
406
- reasoning_content = None
407
- reasoning_content_signature = None
408
- redacted_reasoning_content = None
409
- tool_calls = None
410
-
411
- if len(response.content) > 0:
412
- for content_part in response.content:
413
- if content_part.type == "text":
414
- content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
415
- if content_part.type == "tool_use":
416
- tool_calls = [
417
- ToolCall(
418
- id=content_part.id,
419
- type="function",
420
- function=FunctionCall(
421
- name=content_part.name,
422
- arguments=json.dumps(content_part.input, indent=2),
423
- ),
424
- )
425
- ]
426
- if content_part.type == "thinking":
427
- reasoning_content = content_part.thinking
428
- reasoning_content_signature = content_part.signature
429
- if content_part.type == "redacted_thinking":
430
- redacted_reasoning_content = content_part.data
431
-
432
- else:
433
- raise RuntimeError("Unexpected empty content in response")
434
-
435
- assert response.role == "assistant"
436
- choice = Choice(
437
- index=0,
438
- finish_reason=finish_reason,
439
- message=ChoiceMessage(
440
- role=response.role,
441
- content=content,
442
- reasoning_content=reasoning_content,
443
- reasoning_content_signature=reasoning_content_signature,
444
- redacted_reasoning_content=redacted_reasoning_content,
445
- tool_calls=tool_calls,
446
- ),
447
- )
448
-
449
- return ChatCompletionResponse(
450
- id=response.id,
451
- choices=[choice],
452
- created=get_utc_time_int(),
453
- model=response.model,
454
- usage=UsageStatistics(
455
- prompt_tokens=prompt_tokens,
456
- completion_tokens=completion_tokens,
457
- total_tokens=prompt_tokens + completion_tokens,
458
- ),
459
- )
460
-
461
-
462
- def convert_anthropic_stream_event_to_chatcompletion(
463
- event: Union[
464
- BetaRawMessageStartEvent,
465
- BetaRawContentBlockStartEvent,
466
- BetaRawContentBlockDeltaEvent,
467
- BetaRawContentBlockStopEvent,
468
- BetaRawMessageDeltaEvent,
469
- BetaRawMessageStopEvent,
470
- ],
471
- message_id: str,
472
- model: str,
473
- inner_thoughts_xml_tag: Optional[str] = "thinking",
474
- ) -> ChatCompletionChunkResponse:
475
- """Convert Anthropic stream events to OpenAI ChatCompletionResponse format.
476
-
477
- Args:
478
- event: The event to convert
479
- message_id: The ID of the message. Anthropic does not return this on every event, so we need to keep track of it
480
- model: The model used. Anthropic does not return this on every event, so we need to keep track of it
481
-
482
- Example response from OpenAI:
483
-
484
- 'id': 'MESSAGE_ID',
485
- 'choices': [
486
- {
487
- 'finish_reason': None,
488
- 'index': 0,
489
- 'delta': {
490
- 'content': None,
491
- 'tool_calls': [
492
- {
493
- 'index': 0,
494
- 'id': None,
495
- 'type': 'function',
496
- 'function': {
497
- 'name': None,
498
- 'arguments': '_th'
499
- }
500
- }
501
- ],
502
- 'function_call': None
503
- },
504
- 'logprobs': None
505
- }
506
- ],
507
- 'created': 1713216662,
508
- 'model': 'gpt-4o-mini-2024-07-18',
509
- 'system_fingerprint': 'fp_bd83329f63',
510
- 'object': 'chat.completion.chunk'
511
- }
512
- """
513
- # Get finish reason
514
- finish_reason = None
515
- completion_chunk_tokens = 0
516
-
517
- # Get content and tool calls
518
- content = None
519
- reasoning_content = None
520
- reasoning_content_signature = None
521
- redacted_reasoning_content = None # NOTE called "data" in the stream
522
- tool_calls = None
523
- if isinstance(event, BetaRawMessageStartEvent):
524
- """
525
- BetaRawMessageStartEvent(
526
- message=BetaMessage(
527
- content=[],
528
- usage=BetaUsage(
529
- input_tokens=3086,
530
- output_tokens=1,
531
- ),
532
- ...,
533
- ),
534
- type='message_start'
535
- )
536
- """
537
- completion_chunk_tokens += event.message.usage.output_tokens
538
-
539
- elif isinstance(event, BetaRawMessageDeltaEvent):
540
- """
541
- BetaRawMessageDeltaEvent(
542
- delta=Delta(
543
- stop_reason='tool_use',
544
- stop_sequence=None
545
- ),
546
- type='message_delta',
547
- usage=BetaMessageDeltaUsage(output_tokens=45)
548
- )
549
- """
550
- finish_reason = remap_finish_reason(event.delta.stop_reason)
551
- completion_chunk_tokens += event.usage.output_tokens
552
-
553
- elif isinstance(event, BetaRawContentBlockDeltaEvent):
554
- """
555
- BetaRawContentBlockDeltaEvent(
556
- delta=BetaInputJSONDelta(
557
- partial_json='lo',
558
- type='input_json_delta'
559
- ),
560
- index=0,
561
- type='content_block_delta'
562
- )
563
-
564
- OR
565
-
566
- BetaRawContentBlockDeltaEvent(
567
- delta=BetaTextDelta(
568
- text='👋 ',
569
- type='text_delta'
570
- ),
571
- index=0,
572
- type='content_block_delta'
573
- )
574
-
575
- """
576
- # ReACT COT
577
- if event.delta.type == "text_delta":
578
- content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
579
-
580
- # Extended thought COT
581
- elif event.delta.type == "thinking_delta":
582
- # Redacted doesn't come in the delta chunks, comes all at once
583
- # "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
584
- # Thinking might start with ""
585
- if len(event.delta.thinking) > 0:
586
- reasoning_content = event.delta.thinking
587
-
588
- # Extended thought COT signature
589
- elif event.delta.type == "signature_delta":
590
- if len(event.delta.signature) > 0:
591
- reasoning_content_signature = event.delta.signature
592
-
593
- # Tool calling
594
- elif event.delta.type == "input_json_delta":
595
- tool_calls = [
596
- ToolCallDelta(
597
- index=0,
598
- function=FunctionCallDelta(
599
- name=None,
600
- arguments=event.delta.partial_json,
601
- ),
602
- )
603
- ]
604
- else:
605
- warnings.warn("Unexpected delta type: " + event.delta.type)
606
-
607
- elif isinstance(event, BetaRawContentBlockStartEvent):
608
- """
609
- BetaRawContentBlockStartEvent(
610
- content_block=BetaToolUseBlock(
611
- id='toolu_01LmpZhRhR3WdrRdUrfkKfFw',
612
- input={},
613
- name='get_weather',
614
- type='tool_use'
615
- ),
616
- index=0,
617
- type='content_block_start'
618
- )
619
-
620
- OR
621
-
622
- BetaRawContentBlockStartEvent(
623
- content_block=BetaTextBlock(
624
- text='',
625
- type='text'
626
- ),
627
- index=0,
628
- type='content_block_start'
629
- )
630
- """
631
- if isinstance(event.content_block, BetaToolUseBlock):
632
- tool_calls = [
633
- ToolCallDelta(
634
- index=0,
635
- id=event.content_block.id,
636
- function=FunctionCallDelta(
637
- name=event.content_block.name,
638
- arguments="",
639
- ),
640
- )
641
- ]
642
- elif isinstance(event.content_block, BetaTextBlock):
643
- content = event.content_block.text
644
- elif isinstance(event.content_block, BetaThinkingBlock):
645
- reasoning_content = event.content_block.thinking
646
- elif isinstance(event.content_block, BetaRedactedThinkingBlock):
647
- redacted_reasoning_content = event.content_block.data
648
- else:
649
- warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
650
- elif event.type in VALID_EVENT_TYPES:
651
- pass
652
- else:
653
- warnings.warn("Unexpected event type: " + event.type)
654
-
655
- # Initialize base response
656
- choice = ChunkChoice(
657
- index=0,
658
- finish_reason=finish_reason,
659
- delta=MessageDelta(
660
- content=content,
661
- reasoning_content=reasoning_content,
662
- reasoning_content_signature=reasoning_content_signature,
663
- redacted_reasoning_content=redacted_reasoning_content,
664
- tool_calls=tool_calls,
665
- ),
666
- )
667
- return ChatCompletionChunkResponse(
668
- id=message_id,
669
- choices=[choice],
670
- created=get_utc_time_int(),
671
- model=model,
672
- output_tokens=completion_chunk_tokens,
673
- )
674
-
675
-
676
- def _prepare_anthropic_request(
677
- data: ChatCompletionRequest,
678
- inner_thoughts_xml_tag: Optional[str] = "thinking",
679
- # if true, prefix fill the generation with the thinking tag
680
- prefix_fill: bool = False,
681
- # if true, put COT inside the tool calls instead of inside the content
682
- put_inner_thoughts_in_kwargs: bool = True,
683
- bedrock: bool = False,
684
- # extended thinking related fields
685
- # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
686
- extended_thinking: bool = False,
687
- max_reasoning_tokens: Optional[int] = None,
688
- ) -> dict:
689
- """Prepare the request data for Anthropic API format."""
690
- if extended_thinking:
691
- assert (
692
- max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
693
- ), "max tokens must be greater than thinking budget"
694
- if put_inner_thoughts_in_kwargs:
695
- logger.warning("Extended thinking not compatible with put_inner_thoughts_in_kwargs")
696
- put_inner_thoughts_in_kwargs = False
697
- # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
698
- # Silently disable prefix_fill for now
699
- prefix_fill = False
700
-
701
- # if needed, put inner thoughts as a kwarg for all tools
702
- if data.tools and put_inner_thoughts_in_kwargs:
703
- functions = add_inner_thoughts_to_functions(
704
- functions=[t.function.model_dump() for t in data.tools],
705
- inner_thoughts_key=INNER_THOUGHTS_KWARG,
706
- inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
707
- )
708
- data.tools = [Tool(function=f) for f in functions]
709
-
710
- # convert the tools to Anthropic's payload format
711
- anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
712
-
713
- # pydantic -> dict
714
- data = data.model_dump(exclude_none=True)
715
-
716
- if extended_thinking:
717
- data["thinking"] = {
718
- "type": "enabled",
719
- "budget_tokens": max_reasoning_tokens,
720
- }
721
- # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
722
- data["temperature"] = 1.0
723
-
724
- if "functions" in data:
725
- raise ValueError("'functions' unexpected in Anthropic API payload")
726
-
727
- # Handle tools
728
- if "tools" in data and data["tools"] is None:
729
- data.pop("tools")
730
- data.pop("tool_choice", None)
731
- elif anthropic_tools is not None:
732
- # TODO eventually enable parallel tool use
733
- data["tools"] = anthropic_tools
734
-
735
- # Move 'system' to the top level
736
- assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
737
- data["system"] = data["messages"][0]["content"]
738
- data["messages"] = data["messages"][1:]
739
-
740
- # Process messages
741
- for message in data["messages"]:
742
- if "content" not in message:
743
- message["content"] = None
744
-
745
- # Convert to Anthropic format
746
- msg_objs = [
747
- _Message.dict_to_message(
748
- agent_id=None,
749
- openai_message_dict=m,
750
- )
751
- for m in data["messages"]
752
- ]
753
- data["messages"] = [
754
- m.to_anthropic_dict(
755
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
756
- put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
757
- )
758
- for m in msg_objs
759
- ]
760
-
761
- # Ensure first message is user
762
- if data["messages"][0]["role"] != "user":
763
- data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
764
-
765
- # Handle alternating messages
766
- data["messages"] = merge_tool_results_into_user_messages(data["messages"])
767
-
768
- # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
769
- # https://docs.anthropic.com/en/api/messages#body-messages
770
- # NOTE: cannot prefill with tools for opus:
771
- # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
772
- if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
773
- if not bedrock: # not support for bedrock
774
- data["messages"].append(
775
- # Start the thinking process for the assistant
776
- {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
777
- )
778
-
779
- # Validate max_tokens
780
- assert "max_tokens" in data, data
781
-
782
- # Remove OpenAI-specific fields
783
- for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
784
- data.pop(field, None)
785
-
786
- return data
787
-
788
-
789
- def anthropic_chat_completions_request(
790
- data: ChatCompletionRequest,
791
- inner_thoughts_xml_tag: Optional[str] = "thinking",
792
- put_inner_thoughts_in_kwargs: bool = False,
793
- extended_thinking: bool = False,
794
- max_reasoning_tokens: Optional[int] = None,
795
- provider_name: Optional[str] = None,
796
- provider_category: Optional[ProviderCategory] = None,
797
- betas: List[str] = ["tools-2024-04-04"],
798
- user_id: Optional[str] = None,
799
- ) -> ChatCompletionResponse:
800
- """https://docs.anthropic.com/claude/docs/tool-use"""
801
- anthropic_client = None
802
- if provider_category == ProviderCategory.byok:
803
- actor = UserManager().get_user_or_default(user_id=user_id)
804
- api_key = ProviderManager().get_override_key(provider_name, actor=actor)
805
- anthropic_client = anthropic.Anthropic(api_key=api_key)
806
- elif model_settings.anthropic_api_key:
807
- anthropic_client = anthropic.Anthropic()
808
- else:
809
- raise ValueError("No available Anthropic API key")
810
- data = _prepare_anthropic_request(
811
- data=data,
812
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
813
- put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
814
- extended_thinking=extended_thinking,
815
- max_reasoning_tokens=max_reasoning_tokens,
816
- )
817
- log_event(name="llm_request_sent", attributes=data)
818
- response = anthropic_client.beta.messages.create(
819
- **data,
820
- betas=betas,
821
- )
822
- log_event(name="llm_response_received", attributes={"response": response.json()})
823
- return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
824
-
825
-
826
- def anthropic_bedrock_chat_completions_request(
827
- data: ChatCompletionRequest,
828
- inner_thoughts_xml_tag: Optional[str] = "thinking",
829
- provider_name: Optional[str] = None,
830
- provider_category: Optional[ProviderCategory] = None,
831
- user_id: Optional[str] = None,
832
- ) -> ChatCompletionResponse:
833
- """Make a chat completion request to Anthropic via AWS Bedrock."""
834
- data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
835
-
836
- # Get the client
837
- if provider_category == ProviderCategory.byok:
838
- actor = UserManager().get_user_or_default(user_id=user_id)
839
- access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
840
- client = get_bedrock_client(access_key, secret_key, region)
841
- else:
842
- client = get_bedrock_client()
843
-
844
- # Make the request
845
- try:
846
- # bedrock does not support certain args
847
- print("Warning: Tool rules not supported with Anthropic Bedrock")
848
- data["tool_choice"] = {"type": "any"}
849
- log_event(name="llm_request_sent", attributes=data)
850
- response = client.messages.create(**data)
851
- log_event(name="llm_response_received", attributes={"response": response.json()})
852
- return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
853
- except PermissionDeniedError:
854
- raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}")
855
- except Exception as e:
856
- raise BedrockError(f"Bedrock error: {e}")
857
-
858
-
859
- def anthropic_chat_completions_request_stream(
860
- data: ChatCompletionRequest,
861
- inner_thoughts_xml_tag: Optional[str] = "thinking",
862
- put_inner_thoughts_in_kwargs: bool = False,
863
- extended_thinking: bool = False,
864
- max_reasoning_tokens: Optional[int] = None,
865
- provider_name: Optional[str] = None,
866
- provider_category: Optional[ProviderCategory] = None,
867
- betas: List[str] = ["tools-2024-04-04"],
868
- user_id: Optional[str] = None,
869
- ) -> Generator[ChatCompletionChunkResponse, None, None]:
870
- """Stream chat completions from Anthropic API.
871
-
872
- Similar to OpenAI's streaming, but using Anthropic's native streaming support.
873
- See: https://docs.anthropic.com/claude/reference/messages-streaming
874
- """
875
- data = _prepare_anthropic_request(
876
- data=data,
877
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
878
- put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
879
- extended_thinking=extended_thinking,
880
- max_reasoning_tokens=max_reasoning_tokens,
881
- )
882
- if provider_category == ProviderCategory.byok:
883
- actor = UserManager().get_user_or_default(user_id=user_id)
884
- api_key = ProviderManager().get_override_key(provider_name, actor=actor)
885
- anthropic_client = anthropic.Anthropic(api_key=api_key)
886
- elif model_settings.anthropic_api_key:
887
- anthropic_client = anthropic.Anthropic()
888
-
889
- with anthropic_client.beta.messages.stream(
890
- **data,
891
- betas=betas,
892
- ) as stream:
893
- # Stream: https://github.com/anthropics/anthropic-sdk-python/blob/d212ec9f6d5e956f13bc0ddc3d86b5888a954383/src/anthropic/lib/streaming/_beta_messages.py#L22
894
- message_id = None
895
- model = None
896
-
897
- for chunk in stream._raw_stream:
898
- time.sleep(0.01) # Anthropic is really fast, faster than frontend can upload.
899
- if isinstance(chunk, BetaRawMessageStartEvent):
900
- """
901
- BetaRawMessageStartEvent(
902
- message=BetaMessage(
903
- id='MESSAGE ID HERE',
904
- content=[],
905
- model='claude-3-5-sonnet-20241022',
906
- role='assistant',
907
- stop_reason=None,
908
- stop_sequence=None,
909
- type='message',
910
- usage=BetaUsage(
911
- cache_creation_input_tokens=0,
912
- cache_read_input_tokens=0,
913
- input_tokens=30,
914
- output_tokens=4
915
- )
916
- ),
917
- type='message_start'
918
- ),
919
- """
920
- message_id = chunk.message.id
921
- model = chunk.message.model
922
- yield convert_anthropic_stream_event_to_chatcompletion(chunk, message_id, model, inner_thoughts_xml_tag)
923
-
924
-
925
- def anthropic_chat_completions_process_stream(
926
- chat_completion_request: ChatCompletionRequest,
927
- stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
928
- inner_thoughts_xml_tag: Optional[str] = "thinking",
929
- put_inner_thoughts_in_kwargs: bool = False,
930
- extended_thinking: bool = False,
931
- max_reasoning_tokens: Optional[int] = None,
932
- provider_name: Optional[str] = None,
933
- provider_category: Optional[ProviderCategory] = None,
934
- create_message_id: bool = True,
935
- create_message_datetime: bool = True,
936
- betas: List[str] = ["tools-2024-04-04"],
937
- name: Optional[str] = None,
938
- user_id: Optional[str] = None,
939
- ) -> ChatCompletionResponse:
940
- """Process a streaming completion response from Anthropic, similar to OpenAI's streaming.
941
-
942
- Args:
943
- api_key: The Anthropic API key
944
- chat_completion_request: The chat completion request
945
- stream_interface: Interface for handling streaming chunks
946
- inner_thoughts_xml_tag: Tag for inner thoughts in the response
947
- create_message_id: Whether to create a message ID
948
- create_message_datetime: Whether to create message datetime
949
- betas: Beta features to enable
950
-
951
- Returns:
952
- The final ChatCompletionResponse
953
- """
954
- assert chat_completion_request.stream == True
955
- assert stream_interface is not None, "Required"
956
-
957
- # Count prompt tokens - we'll get completion tokens from the final response
958
- chat_history = [m.model_dump(exclude_none=True) for m in chat_completion_request.messages]
959
- prompt_tokens = num_tokens_from_messages(
960
- messages=chat_history,
961
- model=chat_completion_request.model,
962
- )
963
-
964
- # Add tokens for tools if present
965
- if chat_completion_request.tools is not None:
966
- assert chat_completion_request.functions is None
967
- prompt_tokens += num_tokens_from_functions(
968
- functions=[t.function.model_dump() for t in chat_completion_request.tools],
969
- model=chat_completion_request.model,
970
- )
971
- elif chat_completion_request.functions is not None:
972
- assert chat_completion_request.tools is None
973
- prompt_tokens += num_tokens_from_functions(
974
- functions=[f.model_dump() for f in chat_completion_request.functions],
975
- model=chat_completion_request.model,
976
- )
977
-
978
- # Create a dummy message for ID/datetime if needed
979
- dummy_message = _Message(
980
- role=_MessageRole.assistant,
981
- content=[],
982
- agent_id="",
983
- model="",
984
- name=None,
985
- tool_calls=None,
986
- tool_call_id=None,
987
- )
988
-
989
- TEMP_STREAM_RESPONSE_ID = "temp_id"
990
- TEMP_STREAM_FINISH_REASON = "temp_null"
991
- TEMP_STREAM_TOOL_CALL_ID = "temp_id"
992
- chat_completion_response = ChatCompletionResponse(
993
- id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
994
- choices=[],
995
- created=int(dummy_message.created_at.timestamp()),
996
- model=chat_completion_request.model,
997
- usage=UsageStatistics(
998
- prompt_tokens=prompt_tokens,
999
- total_tokens=prompt_tokens,
1000
- ),
1001
- )
1002
-
1003
- log_event(name="llm_request_sent", attributes=chat_completion_request.model_dump())
1004
-
1005
- if stream_interface:
1006
- stream_interface.stream_start()
1007
-
1008
- completion_tokens = 0
1009
- prev_message_type = None
1010
- message_idx = 0
1011
- try:
1012
- for chunk_idx, chat_completion_chunk in enumerate(
1013
- anthropic_chat_completions_request_stream(
1014
- data=chat_completion_request,
1015
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
1016
- put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
1017
- extended_thinking=extended_thinking,
1018
- max_reasoning_tokens=max_reasoning_tokens,
1019
- provider_name=provider_name,
1020
- provider_category=provider_category,
1021
- betas=betas,
1022
- user_id=user_id,
1023
- )
1024
- ):
1025
- assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
1026
-
1027
- if stream_interface:
1028
- if isinstance(stream_interface, AgentChunkStreamingInterface):
1029
- message_type = stream_interface.process_chunk(
1030
- chat_completion_chunk,
1031
- message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
1032
- message_date=(
1033
- timestamp_to_datetime(chat_completion_response.created)
1034
- if create_message_datetime
1035
- else timestamp_to_datetime(chat_completion_chunk.created)
1036
- ),
1037
- # if extended_thinking is on, then reasoning_content will be flowing as chunks
1038
- # TODO handle emitting redacted reasoning content (e.g. as concat?)
1039
- expect_reasoning_content=extended_thinking,
1040
- name=name,
1041
- message_index=message_idx,
1042
- prev_message_type=prev_message_type,
1043
- )
1044
- if message_type != prev_message_type and message_type is not None and prev_message_type is not None:
1045
- message_idx += 1
1046
- if message_type is not None:
1047
- prev_message_type = message_type
1048
- elif isinstance(stream_interface, AgentRefreshStreamingInterface):
1049
- stream_interface.process_refresh(chat_completion_response)
1050
- else:
1051
- raise TypeError(stream_interface)
1052
-
1053
- if chunk_idx == 0:
1054
- # initialize the choice objects which we will increment with the deltas
1055
- num_choices = len(chat_completion_chunk.choices)
1056
- assert num_choices > 0
1057
- chat_completion_response.choices = [
1058
- Choice(
1059
- finish_reason=TEMP_STREAM_FINISH_REASON, # NOTE: needs to be ovrerwritten
1060
- index=i,
1061
- message=Message(
1062
- role="assistant",
1063
- ),
1064
- )
1065
- for i in range(len(chat_completion_chunk.choices))
1066
- ]
1067
-
1068
- # add the choice delta
1069
- assert len(chat_completion_chunk.choices) == len(chat_completion_response.choices), chat_completion_chunk
1070
- for chunk_choice in chat_completion_chunk.choices:
1071
- if chunk_choice.finish_reason is not None:
1072
- chat_completion_response.choices[chunk_choice.index].finish_reason = chunk_choice.finish_reason
1073
-
1074
- if chunk_choice.logprobs is not None:
1075
- chat_completion_response.choices[chunk_choice.index].logprobs = chunk_choice.logprobs
1076
-
1077
- accum_message = chat_completion_response.choices[chunk_choice.index].message
1078
- message_delta = chunk_choice.delta
1079
-
1080
- if message_delta.content is not None:
1081
- content_delta = message_delta.content
1082
- if accum_message.content is None:
1083
- accum_message.content = content_delta
1084
- else:
1085
- accum_message.content += content_delta
1086
-
1087
- # NOTE: for extended_thinking mode
1088
- if extended_thinking and message_delta.reasoning_content is not None:
1089
- reasoning_content_delta = message_delta.reasoning_content
1090
- if accum_message.reasoning_content is None:
1091
- accum_message.reasoning_content = reasoning_content_delta
1092
- else:
1093
- accum_message.reasoning_content += reasoning_content_delta
1094
-
1095
- # NOTE: extended_thinking sends a signature
1096
- if extended_thinking and message_delta.reasoning_content_signature is not None:
1097
- reasoning_content_signature_delta = message_delta.reasoning_content_signature
1098
- if accum_message.reasoning_content_signature is None:
1099
- accum_message.reasoning_content_signature = reasoning_content_signature_delta
1100
- else:
1101
- accum_message.reasoning_content_signature += reasoning_content_signature_delta
1102
-
1103
- # NOTE: extended_thinking also has the potential for redacted_reasoning_content
1104
- if extended_thinking and message_delta.redacted_reasoning_content is not None:
1105
- redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
1106
- if accum_message.redacted_reasoning_content is None:
1107
- accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
1108
- else:
1109
- accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
1110
-
1111
- # TODO(charles) make sure this works for parallel tool calling?
1112
- if message_delta.tool_calls is not None:
1113
- tool_calls_delta = message_delta.tool_calls
1114
-
1115
- # If this is the first tool call showing up in a chunk, initialize the list with it
1116
- if accum_message.tool_calls is None:
1117
- accum_message.tool_calls = [
1118
- ToolCall(id=TEMP_STREAM_TOOL_CALL_ID, function=FunctionCall(name="", arguments=""))
1119
- for _ in range(len(tool_calls_delta))
1120
- ]
1121
-
1122
- # There may be many tool calls in a tool calls delta (e.g. parallel tool calls)
1123
- for tool_call_delta in tool_calls_delta:
1124
- if tool_call_delta.id is not None:
1125
- # TODO assert that we're not overwriting?
1126
- # TODO += instead of =?
1127
- if tool_call_delta.index not in range(len(accum_message.tool_calls)):
1128
- warnings.warn(
1129
- f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
1130
- )
1131
- # force index 0
1132
- # accum_message.tool_calls[0].id = tool_call_delta.id
1133
- else:
1134
- accum_message.tool_calls[tool_call_delta.index].id = tool_call_delta.id
1135
- if tool_call_delta.function is not None:
1136
- if tool_call_delta.function.name is not None:
1137
- # TODO assert that we're not overwriting?
1138
- # TODO += instead of =?
1139
- if tool_call_delta.index not in range(len(accum_message.tool_calls)):
1140
- warnings.warn(
1141
- f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
1142
- )
1143
- # force index 0
1144
- # accum_message.tool_calls[0].function.name = tool_call_delta.function.name
1145
- else:
1146
- accum_message.tool_calls[tool_call_delta.index].function.name = tool_call_delta.function.name
1147
- if tool_call_delta.function.arguments is not None:
1148
- if tool_call_delta.index not in range(len(accum_message.tool_calls)):
1149
- warnings.warn(
1150
- f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
1151
- )
1152
- # force index 0
1153
- # accum_message.tool_calls[0].function.arguments += tool_call_delta.function.arguments
1154
- else:
1155
- accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
1156
-
1157
- if message_delta.function_call is not None:
1158
- raise NotImplementedError("Old function_call style not support with stream=True")
1159
-
1160
- # overwrite response fields based on latest chunk
1161
- if not create_message_id:
1162
- chat_completion_response.id = chat_completion_chunk.id
1163
- if not create_message_datetime:
1164
- chat_completion_response.created = chat_completion_chunk.created
1165
- chat_completion_response.model = chat_completion_chunk.model
1166
- chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
1167
-
1168
- # increment chunk counter
1169
- if chat_completion_chunk.output_tokens is not None:
1170
- completion_tokens += chat_completion_chunk.output_tokens
1171
-
1172
- except Exception as e:
1173
- if stream_interface:
1174
- stream_interface.stream_end()
1175
- print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
1176
- raise e
1177
- finally:
1178
- if stream_interface:
1179
- stream_interface.stream_end()
1180
-
1181
- # make sure we didn't leave temp stuff in
1182
- assert all([c.finish_reason != TEMP_STREAM_FINISH_REASON for c in chat_completion_response.choices])
1183
- assert all(
1184
- [
1185
- all([tc.id != TEMP_STREAM_TOOL_CALL_ID for tc in c.message.tool_calls]) if c.message.tool_calls else True
1186
- for c in chat_completion_response.choices
1187
- ]
1188
- )
1189
- if not create_message_id:
1190
- assert chat_completion_response.id != dummy_message.id
1191
-
1192
- # compute token usage before returning
1193
- # TODO try actually computing the #tokens instead of assuming the chunks is the same
1194
- chat_completion_response.usage.completion_tokens = completion_tokens
1195
- chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
1196
-
1197
- assert len(chat_completion_response.choices) > 0, chat_completion_response
1198
-
1199
- log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
1200
-
1201
- for choice in chat_completion_response.choices:
1202
- if choice.message.content is not None:
1203
- choice.message.content = choice.message.content.replace(f"<{inner_thoughts_xml_tag}>", "")
1204
- choice.message.content = choice.message.content.replace(f"</{inner_thoughts_xml_tag}>", "")
1205
-
1206
- return chat_completion_response