letta-nightly 0.6.43.dev20250324104208__py3-none-any.whl → 0.6.44.dev20250325050316__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +106 -104
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +3 -1
- letta/functions/function_sets/base.py +2 -1
- letta/functions/function_sets/multi_agent.py +51 -40
- letta/functions/helpers.py +26 -22
- letta/helpers/message_helper.py +41 -0
- letta/llm_api/anthropic.py +150 -44
- letta/llm_api/aws_bedrock.py +5 -3
- letta/llm_api/azure_openai.py +0 -1
- letta/llm_api/llm_api_tools.py +4 -0
- letta/orm/organization.py +1 -0
- letta/orm/sqlalchemy_base.py +2 -4
- letta/schemas/agent.py +8 -0
- letta/schemas/letta_message.py +8 -4
- letta/schemas/llm_config.py +6 -0
- letta/schemas/message.py +143 -24
- letta/schemas/openai/chat_completion_response.py +5 -0
- letta/schemas/organization.py +7 -0
- letta/schemas/providers.py +17 -0
- letta/schemas/tool.py +5 -1
- letta/schemas/usage.py +5 -1
- letta/serialize_schemas/pydantic_agent_schema.py +1 -1
- letta/server/rest_api/interface.py +44 -7
- letta/server/rest_api/routers/v1/agents.py +13 -2
- letta/server/rest_api/routers/v1/organizations.py +19 -1
- letta/server/rest_api/utils.py +1 -1
- letta/server/server.py +49 -70
- letta/services/agent_manager.py +6 -2
- letta/services/helpers/agent_manager_helper.py +24 -38
- letta/services/message_manager.py +7 -6
- letta/services/organization_manager.py +13 -0
- letta/services/tool_execution_sandbox.py +5 -1
- letta/services/tool_executor/__init__.py +0 -0
- letta/services/tool_executor/tool_execution_manager.py +74 -0
- letta/services/tool_executor/tool_executor.py +380 -0
- {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/METADATA +2 -3
- {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/RECORD +42 -38
- {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/entry_points.txt +0 -0
letta/llm_api/anthropic.py
CHANGED
|
@@ -13,7 +13,9 @@ from anthropic.types.beta import (
|
|
|
13
13
|
BetaRawMessageDeltaEvent,
|
|
14
14
|
BetaRawMessageStartEvent,
|
|
15
15
|
BetaRawMessageStopEvent,
|
|
16
|
+
BetaRedactedThinkingBlock,
|
|
16
17
|
BetaTextBlock,
|
|
18
|
+
BetaThinkingBlock,
|
|
17
19
|
BetaToolUseBlock,
|
|
18
20
|
)
|
|
19
21
|
|
|
@@ -345,43 +347,32 @@ def convert_anthropic_response_to_chatcompletion(
|
|
|
345
347
|
finish_reason = remap_finish_reason(response.stop_reason)
|
|
346
348
|
|
|
347
349
|
content = None
|
|
350
|
+
reasoning_content = None
|
|
351
|
+
reasoning_content_signature = None
|
|
352
|
+
redacted_reasoning_content = None
|
|
348
353
|
tool_calls = None
|
|
349
354
|
|
|
350
355
|
if len(response.content) > 1:
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
# function call only
|
|
372
|
-
tool_calls = [
|
|
373
|
-
ToolCall(
|
|
374
|
-
id=block.id,
|
|
375
|
-
type="function",
|
|
376
|
-
function=FunctionCall(
|
|
377
|
-
name=block.name,
|
|
378
|
-
arguments=json.dumps(block.input, indent=2),
|
|
379
|
-
),
|
|
380
|
-
)
|
|
381
|
-
]
|
|
382
|
-
else:
|
|
383
|
-
# inner mono only
|
|
384
|
-
content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag)
|
|
356
|
+
for content_part in response.content:
|
|
357
|
+
if content_part.type == "text":
|
|
358
|
+
content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
|
|
359
|
+
if content_part.type == "tool_use":
|
|
360
|
+
tool_calls = [
|
|
361
|
+
ToolCall(
|
|
362
|
+
id=content_part.id,
|
|
363
|
+
type="function",
|
|
364
|
+
function=FunctionCall(
|
|
365
|
+
name=content_part.name,
|
|
366
|
+
arguments=json.dumps(content_part.input, indent=2),
|
|
367
|
+
),
|
|
368
|
+
)
|
|
369
|
+
]
|
|
370
|
+
if content_part.type == "thinking":
|
|
371
|
+
reasoning_content = content_part.thinking
|
|
372
|
+
reasoning_content_signature = content_part.signature
|
|
373
|
+
if content_part.type == "redacted_thinking":
|
|
374
|
+
redacted_reasoning_content = content_part.data
|
|
375
|
+
|
|
385
376
|
else:
|
|
386
377
|
raise RuntimeError("Unexpected empty content in response")
|
|
387
378
|
|
|
@@ -392,6 +383,9 @@ def convert_anthropic_response_to_chatcompletion(
|
|
|
392
383
|
message=ChoiceMessage(
|
|
393
384
|
role=response.role,
|
|
394
385
|
content=content,
|
|
386
|
+
reasoning_content=reasoning_content,
|
|
387
|
+
reasoning_content_signature=reasoning_content_signature,
|
|
388
|
+
redacted_reasoning_content=redacted_reasoning_content,
|
|
395
389
|
tool_calls=tool_calls,
|
|
396
390
|
),
|
|
397
391
|
)
|
|
@@ -462,7 +456,31 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
462
456
|
"""
|
|
463
457
|
# Get finish reason
|
|
464
458
|
finish_reason = None
|
|
465
|
-
|
|
459
|
+
completion_chunk_tokens = 0
|
|
460
|
+
|
|
461
|
+
# Get content and tool calls
|
|
462
|
+
content = None
|
|
463
|
+
reasoning_content = None
|
|
464
|
+
reasoning_content_signature = None
|
|
465
|
+
redacted_reasoning_content = None # NOTE called "data" in the stream
|
|
466
|
+
tool_calls = None
|
|
467
|
+
if isinstance(event, BetaRawMessageStartEvent):
|
|
468
|
+
"""
|
|
469
|
+
BetaRawMessageStartEvent(
|
|
470
|
+
message=BetaMessage(
|
|
471
|
+
content=[],
|
|
472
|
+
usage=BetaUsage(
|
|
473
|
+
input_tokens=3086,
|
|
474
|
+
output_tokens=1,
|
|
475
|
+
),
|
|
476
|
+
...,
|
|
477
|
+
),
|
|
478
|
+
type='message_start'
|
|
479
|
+
)
|
|
480
|
+
"""
|
|
481
|
+
completion_chunk_tokens += event.message.usage.output_tokens
|
|
482
|
+
|
|
483
|
+
elif isinstance(event, BetaRawMessageDeltaEvent):
|
|
466
484
|
"""
|
|
467
485
|
BetaRawMessageDeltaEvent(
|
|
468
486
|
delta=Delta(
|
|
@@ -474,11 +492,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
474
492
|
)
|
|
475
493
|
"""
|
|
476
494
|
finish_reason = remap_finish_reason(event.delta.stop_reason)
|
|
495
|
+
completion_chunk_tokens += event.usage.output_tokens
|
|
477
496
|
|
|
478
|
-
|
|
479
|
-
content = None
|
|
480
|
-
tool_calls = None
|
|
481
|
-
if isinstance(event, BetaRawContentBlockDeltaEvent):
|
|
497
|
+
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
|
482
498
|
"""
|
|
483
499
|
BetaRawContentBlockDeltaEvent(
|
|
484
500
|
delta=BetaInputJSONDelta(
|
|
@@ -501,9 +517,24 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
501
517
|
)
|
|
502
518
|
|
|
503
519
|
"""
|
|
520
|
+
# ReACT COT
|
|
504
521
|
if event.delta.type == "text_delta":
|
|
505
522
|
content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
|
|
506
523
|
|
|
524
|
+
# Extended thought COT
|
|
525
|
+
elif event.delta.type == "thinking_delta":
|
|
526
|
+
# Redacted doesn't come in the delta chunks, comes all at once
|
|
527
|
+
# "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
|
|
528
|
+
# Thinking might start with ""
|
|
529
|
+
if len(event.delta.thinking) > 0:
|
|
530
|
+
reasoning_content = event.delta.thinking
|
|
531
|
+
|
|
532
|
+
# Extended thought COT signature
|
|
533
|
+
elif event.delta.type == "signature_delta":
|
|
534
|
+
if len(event.delta.signature) > 0:
|
|
535
|
+
reasoning_content_signature = event.delta.signature
|
|
536
|
+
|
|
537
|
+
# Tool calling
|
|
507
538
|
elif event.delta.type == "input_json_delta":
|
|
508
539
|
tool_calls = [
|
|
509
540
|
ToolCallDelta(
|
|
@@ -514,6 +545,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
514
545
|
),
|
|
515
546
|
)
|
|
516
547
|
]
|
|
548
|
+
else:
|
|
549
|
+
warnings.warn("Unexpected delta type: " + event.delta.type)
|
|
550
|
+
|
|
517
551
|
elif isinstance(event, BetaRawContentBlockStartEvent):
|
|
518
552
|
"""
|
|
519
553
|
BetaRawContentBlockStartEvent(
|
|
@@ -551,6 +585,15 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
551
585
|
]
|
|
552
586
|
elif isinstance(event.content_block, BetaTextBlock):
|
|
553
587
|
content = event.content_block.text
|
|
588
|
+
elif isinstance(event.content_block, BetaThinkingBlock):
|
|
589
|
+
reasoning_content = event.content_block.thinking
|
|
590
|
+
elif isinstance(event.content_block, BetaRedactedThinkingBlock):
|
|
591
|
+
redacted_reasoning_content = event.content_block.data
|
|
592
|
+
else:
|
|
593
|
+
warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
|
|
594
|
+
|
|
595
|
+
else:
|
|
596
|
+
warnings.warn("Unexpected event type: " + event.type)
|
|
554
597
|
|
|
555
598
|
# Initialize base response
|
|
556
599
|
choice = ChunkChoice(
|
|
@@ -558,6 +601,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
558
601
|
finish_reason=finish_reason,
|
|
559
602
|
delta=MessageDelta(
|
|
560
603
|
content=content,
|
|
604
|
+
reasoning_content=reasoning_content,
|
|
605
|
+
reasoning_content_signature=reasoning_content_signature,
|
|
606
|
+
redacted_reasoning_content=redacted_reasoning_content,
|
|
561
607
|
tool_calls=tool_calls,
|
|
562
608
|
),
|
|
563
609
|
)
|
|
@@ -566,6 +612,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
|
566
612
|
choices=[choice],
|
|
567
613
|
created=get_utc_time(),
|
|
568
614
|
model=model,
|
|
615
|
+
output_tokens=completion_chunk_tokens,
|
|
569
616
|
)
|
|
570
617
|
|
|
571
618
|
|
|
@@ -577,8 +624,20 @@ def _prepare_anthropic_request(
|
|
|
577
624
|
# if true, put COT inside the tool calls instead of inside the content
|
|
578
625
|
put_inner_thoughts_in_kwargs: bool = False,
|
|
579
626
|
bedrock: bool = False,
|
|
627
|
+
# extended thinking related fields
|
|
628
|
+
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
|
629
|
+
extended_thinking: bool = False,
|
|
630
|
+
max_reasoning_tokens: Optional[int] = None,
|
|
580
631
|
) -> dict:
|
|
581
632
|
"""Prepare the request data for Anthropic API format."""
|
|
633
|
+
if extended_thinking:
|
|
634
|
+
assert (
|
|
635
|
+
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
|
636
|
+
), "max tokens must be greater than thinking budget"
|
|
637
|
+
assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
|
638
|
+
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
|
639
|
+
# Silently disable prefix_fill for now
|
|
640
|
+
prefix_fill = False
|
|
582
641
|
|
|
583
642
|
# if needed, put inner thoughts as a kwarg for all tools
|
|
584
643
|
if data.tools and put_inner_thoughts_in_kwargs:
|
|
@@ -595,6 +654,14 @@ def _prepare_anthropic_request(
|
|
|
595
654
|
# pydantic -> dict
|
|
596
655
|
data = data.model_dump(exclude_none=True)
|
|
597
656
|
|
|
657
|
+
if extended_thinking:
|
|
658
|
+
data["thinking"] = {
|
|
659
|
+
"type": "enabled",
|
|
660
|
+
"budget_tokens": max_reasoning_tokens,
|
|
661
|
+
}
|
|
662
|
+
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
|
663
|
+
data["temperature"] = 1.0
|
|
664
|
+
|
|
598
665
|
if "functions" in data:
|
|
599
666
|
raise ValueError(f"'functions' unexpected in Anthropic API payload")
|
|
600
667
|
|
|
@@ -665,6 +732,8 @@ def anthropic_chat_completions_request(
|
|
|
665
732
|
data: ChatCompletionRequest,
|
|
666
733
|
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
|
667
734
|
put_inner_thoughts_in_kwargs: bool = False,
|
|
735
|
+
extended_thinking: bool = False,
|
|
736
|
+
max_reasoning_tokens: Optional[int] = None,
|
|
668
737
|
betas: List[str] = ["tools-2024-04-04"],
|
|
669
738
|
) -> ChatCompletionResponse:
|
|
670
739
|
"""https://docs.anthropic.com/claude/docs/tool-use"""
|
|
@@ -678,6 +747,8 @@ def anthropic_chat_completions_request(
|
|
|
678
747
|
data=data,
|
|
679
748
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
680
749
|
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
750
|
+
extended_thinking=extended_thinking,
|
|
751
|
+
max_reasoning_tokens=max_reasoning_tokens,
|
|
681
752
|
)
|
|
682
753
|
log_event(name="llm_request_sent", attributes=data)
|
|
683
754
|
response = anthropic_client.beta.messages.create(
|
|
@@ -717,6 +788,8 @@ def anthropic_chat_completions_request_stream(
|
|
|
717
788
|
data: ChatCompletionRequest,
|
|
718
789
|
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
|
719
790
|
put_inner_thoughts_in_kwargs: bool = False,
|
|
791
|
+
extended_thinking: bool = False,
|
|
792
|
+
max_reasoning_tokens: Optional[int] = None,
|
|
720
793
|
betas: List[str] = ["tools-2024-04-04"],
|
|
721
794
|
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
|
722
795
|
"""Stream chat completions from Anthropic API.
|
|
@@ -728,6 +801,8 @@ def anthropic_chat_completions_request_stream(
|
|
|
728
801
|
data=data,
|
|
729
802
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
730
803
|
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
804
|
+
extended_thinking=extended_thinking,
|
|
805
|
+
max_reasoning_tokens=max_reasoning_tokens,
|
|
731
806
|
)
|
|
732
807
|
|
|
733
808
|
anthropic_override_key = ProviderManager().get_anthropic_override_key()
|
|
@@ -777,6 +852,8 @@ def anthropic_chat_completions_process_stream(
|
|
|
777
852
|
stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
|
|
778
853
|
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
|
779
854
|
put_inner_thoughts_in_kwargs: bool = False,
|
|
855
|
+
extended_thinking: bool = False,
|
|
856
|
+
max_reasoning_tokens: Optional[int] = None,
|
|
780
857
|
create_message_id: bool = True,
|
|
781
858
|
create_message_datetime: bool = True,
|
|
782
859
|
betas: List[str] = ["tools-2024-04-04"],
|
|
@@ -839,7 +916,6 @@ def anthropic_chat_completions_process_stream(
|
|
|
839
916
|
created=dummy_message.created_at,
|
|
840
917
|
model=chat_completion_request.model,
|
|
841
918
|
usage=UsageStatistics(
|
|
842
|
-
completion_tokens=0,
|
|
843
919
|
prompt_tokens=prompt_tokens,
|
|
844
920
|
total_tokens=prompt_tokens,
|
|
845
921
|
),
|
|
@@ -850,13 +926,15 @@ def anthropic_chat_completions_process_stream(
|
|
|
850
926
|
if stream_interface:
|
|
851
927
|
stream_interface.stream_start()
|
|
852
928
|
|
|
853
|
-
|
|
929
|
+
completion_tokens = 0
|
|
854
930
|
try:
|
|
855
931
|
for chunk_idx, chat_completion_chunk in enumerate(
|
|
856
932
|
anthropic_chat_completions_request_stream(
|
|
857
933
|
data=chat_completion_request,
|
|
858
934
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
859
935
|
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
936
|
+
extended_thinking=extended_thinking,
|
|
937
|
+
max_reasoning_tokens=max_reasoning_tokens,
|
|
860
938
|
betas=betas,
|
|
861
939
|
)
|
|
862
940
|
):
|
|
@@ -868,6 +946,9 @@ def anthropic_chat_completions_process_stream(
|
|
|
868
946
|
chat_completion_chunk,
|
|
869
947
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
|
870
948
|
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
|
949
|
+
# if extended_thinking is on, then reasoning_content will be flowing as chunks
|
|
950
|
+
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
|
951
|
+
expect_reasoning_content=extended_thinking,
|
|
871
952
|
)
|
|
872
953
|
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
|
873
954
|
stream_interface.process_refresh(chat_completion_response)
|
|
@@ -908,6 +989,30 @@ def anthropic_chat_completions_process_stream(
|
|
|
908
989
|
else:
|
|
909
990
|
accum_message.content += content_delta
|
|
910
991
|
|
|
992
|
+
# NOTE: for extended_thinking mode
|
|
993
|
+
if extended_thinking and message_delta.reasoning_content is not None:
|
|
994
|
+
reasoning_content_delta = message_delta.reasoning_content
|
|
995
|
+
if accum_message.reasoning_content is None:
|
|
996
|
+
accum_message.reasoning_content = reasoning_content_delta
|
|
997
|
+
else:
|
|
998
|
+
accum_message.reasoning_content += reasoning_content_delta
|
|
999
|
+
|
|
1000
|
+
# NOTE: extended_thinking sends a signature
|
|
1001
|
+
if extended_thinking and message_delta.reasoning_content_signature is not None:
|
|
1002
|
+
reasoning_content_signature_delta = message_delta.reasoning_content_signature
|
|
1003
|
+
if accum_message.reasoning_content_signature is None:
|
|
1004
|
+
accum_message.reasoning_content_signature = reasoning_content_signature_delta
|
|
1005
|
+
else:
|
|
1006
|
+
accum_message.reasoning_content_signature += reasoning_content_signature_delta
|
|
1007
|
+
|
|
1008
|
+
# NOTE: extended_thinking also has the potential for redacted_reasoning_content
|
|
1009
|
+
if extended_thinking and message_delta.redacted_reasoning_content is not None:
|
|
1010
|
+
redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
|
|
1011
|
+
if accum_message.redacted_reasoning_content is None:
|
|
1012
|
+
accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
|
|
1013
|
+
else:
|
|
1014
|
+
accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
|
|
1015
|
+
|
|
911
1016
|
# TODO(charles) make sure this works for parallel tool calling?
|
|
912
1017
|
if message_delta.tool_calls is not None:
|
|
913
1018
|
tool_calls_delta = message_delta.tool_calls
|
|
@@ -966,7 +1071,8 @@ def anthropic_chat_completions_process_stream(
|
|
|
966
1071
|
chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
|
|
967
1072
|
|
|
968
1073
|
# increment chunk counter
|
|
969
|
-
|
|
1074
|
+
if chat_completion_chunk.output_tokens is not None:
|
|
1075
|
+
completion_tokens += chat_completion_chunk.output_tokens
|
|
970
1076
|
|
|
971
1077
|
except Exception as e:
|
|
972
1078
|
if stream_interface:
|
|
@@ -990,8 +1096,8 @@ def anthropic_chat_completions_process_stream(
|
|
|
990
1096
|
|
|
991
1097
|
# compute token usage before returning
|
|
992
1098
|
# TODO try actually computing the #tokens instead of assuming the chunks is the same
|
|
993
|
-
chat_completion_response.usage.completion_tokens =
|
|
994
|
-
chat_completion_response.usage.total_tokens = prompt_tokens +
|
|
1099
|
+
chat_completion_response.usage.completion_tokens = completion_tokens
|
|
1100
|
+
chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
|
|
995
1101
|
|
|
996
1102
|
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
|
997
1103
|
|
letta/llm_api/aws_bedrock.py
CHANGED
|
@@ -3,17 +3,19 @@ from typing import Any, Dict, List
|
|
|
3
3
|
|
|
4
4
|
from anthropic import AnthropicBedrock
|
|
5
5
|
|
|
6
|
-
from letta.settings import model_settings
|
|
7
|
-
|
|
8
6
|
from letta.log import get_logger
|
|
7
|
+
from letta.settings import model_settings
|
|
9
8
|
|
|
10
9
|
logger = get_logger(__name__)
|
|
11
10
|
|
|
11
|
+
|
|
12
12
|
def has_valid_aws_credentials() -> bool:
|
|
13
13
|
"""
|
|
14
14
|
Check if AWS credentials are properly configured.
|
|
15
15
|
"""
|
|
16
|
-
valid_aws_credentials =
|
|
16
|
+
valid_aws_credentials = (
|
|
17
|
+
os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
|
|
18
|
+
)
|
|
17
19
|
return valid_aws_credentials
|
|
18
20
|
|
|
19
21
|
|
letta/llm_api/azure_openai.py
CHANGED
|
@@ -3,7 +3,6 @@ from collections import defaultdict
|
|
|
3
3
|
import requests
|
|
4
4
|
from openai import AzureOpenAI
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
from letta.llm_api.openai import prepare_openai_payload
|
|
8
7
|
from letta.schemas.llm_config import LLMConfig
|
|
9
8
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -406,6 +406,8 @@ def create(
|
|
|
406
406
|
chat_completion_request=chat_completion_request,
|
|
407
407
|
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
408
408
|
stream_interface=stream_interface,
|
|
409
|
+
extended_thinking=llm_config.enable_reasoner,
|
|
410
|
+
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
|
409
411
|
)
|
|
410
412
|
|
|
411
413
|
else:
|
|
@@ -413,6 +415,8 @@ def create(
|
|
|
413
415
|
response = anthropic_chat_completions_request(
|
|
414
416
|
data=chat_completion_request,
|
|
415
417
|
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
418
|
+
extended_thinking=llm_config.enable_reasoner,
|
|
419
|
+
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
|
416
420
|
)
|
|
417
421
|
|
|
418
422
|
if llm_config.put_inner_thoughts_in_kwargs:
|
letta/orm/organization.py
CHANGED
|
@@ -23,6 +23,7 @@ class Organization(SqlalchemyBase):
|
|
|
23
23
|
__pydantic_model__ = PydanticOrganization
|
|
24
24
|
|
|
25
25
|
name: Mapped[str] = mapped_column(doc="The display name of the organization.")
|
|
26
|
+
privileged_tools: Mapped[bool] = mapped_column(doc="Whether the organization has access to privileged tools.")
|
|
26
27
|
|
|
27
28
|
# relationships
|
|
28
29
|
users: Mapped[List["User"]] = relationship("User", back_populates="organization", cascade="all, delete-orphan")
|
letta/orm/sqlalchemy_base.py
CHANGED
|
@@ -361,14 +361,12 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
|
|
|
361
361
|
if identifier_set != results_set:
|
|
362
362
|
# Construct a detailed error message based on query conditions
|
|
363
363
|
conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
|
|
364
|
-
logger.
|
|
365
|
-
f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}"
|
|
366
|
-
)
|
|
364
|
+
logger.debug(f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}")
|
|
367
365
|
return results
|
|
368
366
|
|
|
369
367
|
# Construct a detailed error message based on query conditions
|
|
370
368
|
conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
|
|
371
|
-
logger.
|
|
369
|
+
logger.debug(f"{cls.__name__} not found with {conditions_str}")
|
|
372
370
|
return []
|
|
373
371
|
|
|
374
372
|
@handle_db_timeout
|
letta/schemas/agent.py
CHANGED
|
@@ -147,6 +147,14 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
|
|
147
147
|
)
|
|
148
148
|
context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
|
|
149
149
|
embedding_chunk_size: Optional[int] = Field(DEFAULT_EMBEDDING_CHUNK_SIZE, description="The embedding chunk size used by the agent.")
|
|
150
|
+
max_tokens: Optional[int] = Field(
|
|
151
|
+
None,
|
|
152
|
+
description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
|
|
153
|
+
)
|
|
154
|
+
max_reasoning_tokens: Optional[int] = Field(
|
|
155
|
+
None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
|
|
156
|
+
)
|
|
157
|
+
enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
|
|
150
158
|
from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
|
|
151
159
|
template: bool = Field(False, description="Whether the agent is a template")
|
|
152
160
|
project: Optional[str] = Field(
|
letta/schemas/letta_message.py
CHANGED
|
@@ -88,11 +88,13 @@ class ReasoningMessage(LettaMessage):
|
|
|
88
88
|
source (Literal["reasoner_model", "non_reasoner_model"]): Whether the reasoning
|
|
89
89
|
content was generated natively by a reasoner model or derived via prompting
|
|
90
90
|
reasoning (str): The internal reasoning of the agent
|
|
91
|
+
signature (Optional[str]): The model-generated signature of the reasoning step
|
|
91
92
|
"""
|
|
92
93
|
|
|
93
94
|
message_type: Literal["reasoning_message"] = "reasoning_message"
|
|
94
95
|
source: Literal["reasoner_model", "non_reasoner_model"] = "non_reasoner_model"
|
|
95
96
|
reasoning: str
|
|
97
|
+
signature: Optional[str] = None
|
|
96
98
|
|
|
97
99
|
|
|
98
100
|
class HiddenReasoningMessage(LettaMessage):
|
|
@@ -106,12 +108,12 @@ class HiddenReasoningMessage(LettaMessage):
|
|
|
106
108
|
name (Optional[str]): The name of the sender of the message
|
|
107
109
|
state (Literal["redacted", "omitted"]): Whether the reasoning
|
|
108
110
|
content was redacted by the provider or simply omitted by the API
|
|
109
|
-
|
|
111
|
+
hidden_reasoning (Optional[str]): The internal reasoning of the agent
|
|
110
112
|
"""
|
|
111
113
|
|
|
112
|
-
message_type: Literal["
|
|
114
|
+
message_type: Literal["hidden_reasoning_message"] = "hidden_reasoning_message"
|
|
113
115
|
state: Literal["redacted", "omitted"]
|
|
114
|
-
|
|
116
|
+
hidden_reasoning: Optional[str] = None
|
|
115
117
|
|
|
116
118
|
|
|
117
119
|
class ToolCall(BaseModel):
|
|
@@ -229,7 +231,7 @@ class AssistantMessage(LettaMessage):
|
|
|
229
231
|
|
|
230
232
|
# NOTE: use Pydantic's discriminated unions feature: https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions
|
|
231
233
|
LettaMessageUnion = Annotated[
|
|
232
|
-
Union[SystemMessage, UserMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
|
|
234
|
+
Union[SystemMessage, UserMessage, ReasoningMessage, HiddenReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
|
|
233
235
|
Field(discriminator="message_type"),
|
|
234
236
|
]
|
|
235
237
|
|
|
@@ -240,6 +242,7 @@ def create_letta_message_union_schema():
|
|
|
240
242
|
{"$ref": "#/components/schemas/SystemMessage"},
|
|
241
243
|
{"$ref": "#/components/schemas/UserMessage"},
|
|
242
244
|
{"$ref": "#/components/schemas/ReasoningMessage"},
|
|
245
|
+
{"$ref": "#/components/schemas/HiddenReasoningMessage"},
|
|
243
246
|
{"$ref": "#/components/schemas/ToolCallMessage"},
|
|
244
247
|
{"$ref": "#/components/schemas/ToolReturnMessage"},
|
|
245
248
|
{"$ref": "#/components/schemas/AssistantMessage"},
|
|
@@ -250,6 +253,7 @@ def create_letta_message_union_schema():
|
|
|
250
253
|
"system_message": "#/components/schemas/SystemMessage",
|
|
251
254
|
"user_message": "#/components/schemas/UserMessage",
|
|
252
255
|
"reasoning_message": "#/components/schemas/ReasoningMessage",
|
|
256
|
+
"hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
|
|
253
257
|
"tool_call_message": "#/components/schemas/ToolCallMessage",
|
|
254
258
|
"tool_return_message": "#/components/schemas/ToolReturnMessage",
|
|
255
259
|
"assistant_message": "#/components/schemas/AssistantMessage",
|
letta/schemas/llm_config.py
CHANGED
|
@@ -60,6 +60,12 @@ class LLMConfig(BaseModel):
|
|
|
60
60
|
4096,
|
|
61
61
|
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
|
|
62
62
|
)
|
|
63
|
+
enable_reasoner: bool = Field(
|
|
64
|
+
False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
|
|
65
|
+
)
|
|
66
|
+
max_reasoning_tokens: int = Field(
|
|
67
|
+
0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
|
|
68
|
+
)
|
|
63
69
|
|
|
64
70
|
# FIXME hack to silence pydantic protected namespace warning
|
|
65
71
|
model_config = ConfigDict(protected_namespaces=())
|