letta-nightly 0.6.43.dev20250324104208__py3-none-any.whl → 0.6.44.dev20250325050316__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (42) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +106 -104
  3. letta/agents/voice_agent.py +1 -1
  4. letta/client/streaming.py +3 -1
  5. letta/functions/function_sets/base.py +2 -1
  6. letta/functions/function_sets/multi_agent.py +51 -40
  7. letta/functions/helpers.py +26 -22
  8. letta/helpers/message_helper.py +41 -0
  9. letta/llm_api/anthropic.py +150 -44
  10. letta/llm_api/aws_bedrock.py +5 -3
  11. letta/llm_api/azure_openai.py +0 -1
  12. letta/llm_api/llm_api_tools.py +4 -0
  13. letta/orm/organization.py +1 -0
  14. letta/orm/sqlalchemy_base.py +2 -4
  15. letta/schemas/agent.py +8 -0
  16. letta/schemas/letta_message.py +8 -4
  17. letta/schemas/llm_config.py +6 -0
  18. letta/schemas/message.py +143 -24
  19. letta/schemas/openai/chat_completion_response.py +5 -0
  20. letta/schemas/organization.py +7 -0
  21. letta/schemas/providers.py +17 -0
  22. letta/schemas/tool.py +5 -1
  23. letta/schemas/usage.py +5 -1
  24. letta/serialize_schemas/pydantic_agent_schema.py +1 -1
  25. letta/server/rest_api/interface.py +44 -7
  26. letta/server/rest_api/routers/v1/agents.py +13 -2
  27. letta/server/rest_api/routers/v1/organizations.py +19 -1
  28. letta/server/rest_api/utils.py +1 -1
  29. letta/server/server.py +49 -70
  30. letta/services/agent_manager.py +6 -2
  31. letta/services/helpers/agent_manager_helper.py +24 -38
  32. letta/services/message_manager.py +7 -6
  33. letta/services/organization_manager.py +13 -0
  34. letta/services/tool_execution_sandbox.py +5 -1
  35. letta/services/tool_executor/__init__.py +0 -0
  36. letta/services/tool_executor/tool_execution_manager.py +74 -0
  37. letta/services/tool_executor/tool_executor.py +380 -0
  38. {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/METADATA +2 -3
  39. {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/RECORD +42 -38
  40. {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/LICENSE +0 -0
  41. {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/WHEEL +0 -0
  42. {letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/entry_points.txt +0 -0
@@ -13,7 +13,9 @@ from anthropic.types.beta import (
13
13
  BetaRawMessageDeltaEvent,
14
14
  BetaRawMessageStartEvent,
15
15
  BetaRawMessageStopEvent,
16
+ BetaRedactedThinkingBlock,
16
17
  BetaTextBlock,
18
+ BetaThinkingBlock,
17
19
  BetaToolUseBlock,
18
20
  )
19
21
 
@@ -345,43 +347,32 @@ def convert_anthropic_response_to_chatcompletion(
345
347
  finish_reason = remap_finish_reason(response.stop_reason)
346
348
 
347
349
  content = None
350
+ reasoning_content = None
351
+ reasoning_content_signature = None
352
+ redacted_reasoning_content = None
348
353
  tool_calls = None
349
354
 
350
355
  if len(response.content) > 1:
351
- # inner mono + function call
352
- assert len(response.content) == 2
353
- text_block = response.content[0]
354
- tool_block = response.content[1]
355
- assert text_block.type == "text"
356
- assert tool_block.type == "tool_use"
357
- content = strip_xml_tags(string=text_block.text, tag=inner_thoughts_xml_tag)
358
- tool_calls = [
359
- ToolCall(
360
- id=tool_block.id,
361
- type="function",
362
- function=FunctionCall(
363
- name=tool_block.name,
364
- arguments=json.dumps(tool_block.input, indent=2),
365
- ),
366
- )
367
- ]
368
- elif len(response.content) == 1:
369
- block = response.content[0]
370
- if block.type == "tool_use":
371
- # function call only
372
- tool_calls = [
373
- ToolCall(
374
- id=block.id,
375
- type="function",
376
- function=FunctionCall(
377
- name=block.name,
378
- arguments=json.dumps(block.input, indent=2),
379
- ),
380
- )
381
- ]
382
- else:
383
- # inner mono only
384
- content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag)
356
+ for content_part in response.content:
357
+ if content_part.type == "text":
358
+ content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
359
+ if content_part.type == "tool_use":
360
+ tool_calls = [
361
+ ToolCall(
362
+ id=content_part.id,
363
+ type="function",
364
+ function=FunctionCall(
365
+ name=content_part.name,
366
+ arguments=json.dumps(content_part.input, indent=2),
367
+ ),
368
+ )
369
+ ]
370
+ if content_part.type == "thinking":
371
+ reasoning_content = content_part.thinking
372
+ reasoning_content_signature = content_part.signature
373
+ if content_part.type == "redacted_thinking":
374
+ redacted_reasoning_content = content_part.data
375
+
385
376
  else:
386
377
  raise RuntimeError("Unexpected empty content in response")
387
378
 
@@ -392,6 +383,9 @@ def convert_anthropic_response_to_chatcompletion(
392
383
  message=ChoiceMessage(
393
384
  role=response.role,
394
385
  content=content,
386
+ reasoning_content=reasoning_content,
387
+ reasoning_content_signature=reasoning_content_signature,
388
+ redacted_reasoning_content=redacted_reasoning_content,
395
389
  tool_calls=tool_calls,
396
390
  ),
397
391
  )
@@ -462,7 +456,31 @@ def convert_anthropic_stream_event_to_chatcompletion(
462
456
  """
463
457
  # Get finish reason
464
458
  finish_reason = None
465
- if isinstance(event, BetaRawMessageDeltaEvent):
459
+ completion_chunk_tokens = 0
460
+
461
+ # Get content and tool calls
462
+ content = None
463
+ reasoning_content = None
464
+ reasoning_content_signature = None
465
+ redacted_reasoning_content = None # NOTE called "data" in the stream
466
+ tool_calls = None
467
+ if isinstance(event, BetaRawMessageStartEvent):
468
+ """
469
+ BetaRawMessageStartEvent(
470
+ message=BetaMessage(
471
+ content=[],
472
+ usage=BetaUsage(
473
+ input_tokens=3086,
474
+ output_tokens=1,
475
+ ),
476
+ ...,
477
+ ),
478
+ type='message_start'
479
+ )
480
+ """
481
+ completion_chunk_tokens += event.message.usage.output_tokens
482
+
483
+ elif isinstance(event, BetaRawMessageDeltaEvent):
466
484
  """
467
485
  BetaRawMessageDeltaEvent(
468
486
  delta=Delta(
@@ -474,11 +492,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
474
492
  )
475
493
  """
476
494
  finish_reason = remap_finish_reason(event.delta.stop_reason)
495
+ completion_chunk_tokens += event.usage.output_tokens
477
496
 
478
- # Get content and tool calls
479
- content = None
480
- tool_calls = None
481
- if isinstance(event, BetaRawContentBlockDeltaEvent):
497
+ elif isinstance(event, BetaRawContentBlockDeltaEvent):
482
498
  """
483
499
  BetaRawContentBlockDeltaEvent(
484
500
  delta=BetaInputJSONDelta(
@@ -501,9 +517,24 @@ def convert_anthropic_stream_event_to_chatcompletion(
501
517
  )
502
518
 
503
519
  """
520
+ # ReACT COT
504
521
  if event.delta.type == "text_delta":
505
522
  content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
506
523
 
524
+ # Extended thought COT
525
+ elif event.delta.type == "thinking_delta":
526
+ # Redacted doesn't come in the delta chunks, comes all at once
527
+ # "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
528
+ # Thinking might start with ""
529
+ if len(event.delta.thinking) > 0:
530
+ reasoning_content = event.delta.thinking
531
+
532
+ # Extended thought COT signature
533
+ elif event.delta.type == "signature_delta":
534
+ if len(event.delta.signature) > 0:
535
+ reasoning_content_signature = event.delta.signature
536
+
537
+ # Tool calling
507
538
  elif event.delta.type == "input_json_delta":
508
539
  tool_calls = [
509
540
  ToolCallDelta(
@@ -514,6 +545,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
514
545
  ),
515
546
  )
516
547
  ]
548
+ else:
549
+ warnings.warn("Unexpected delta type: " + event.delta.type)
550
+
517
551
  elif isinstance(event, BetaRawContentBlockStartEvent):
518
552
  """
519
553
  BetaRawContentBlockStartEvent(
@@ -551,6 +585,15 @@ def convert_anthropic_stream_event_to_chatcompletion(
551
585
  ]
552
586
  elif isinstance(event.content_block, BetaTextBlock):
553
587
  content = event.content_block.text
588
+ elif isinstance(event.content_block, BetaThinkingBlock):
589
+ reasoning_content = event.content_block.thinking
590
+ elif isinstance(event.content_block, BetaRedactedThinkingBlock):
591
+ redacted_reasoning_content = event.content_block.data
592
+ else:
593
+ warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
594
+
595
+ else:
596
+ warnings.warn("Unexpected event type: " + event.type)
554
597
 
555
598
  # Initialize base response
556
599
  choice = ChunkChoice(
@@ -558,6 +601,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
558
601
  finish_reason=finish_reason,
559
602
  delta=MessageDelta(
560
603
  content=content,
604
+ reasoning_content=reasoning_content,
605
+ reasoning_content_signature=reasoning_content_signature,
606
+ redacted_reasoning_content=redacted_reasoning_content,
561
607
  tool_calls=tool_calls,
562
608
  ),
563
609
  )
@@ -566,6 +612,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
566
612
  choices=[choice],
567
613
  created=get_utc_time(),
568
614
  model=model,
615
+ output_tokens=completion_chunk_tokens,
569
616
  )
570
617
 
571
618
 
@@ -577,8 +624,20 @@ def _prepare_anthropic_request(
577
624
  # if true, put COT inside the tool calls instead of inside the content
578
625
  put_inner_thoughts_in_kwargs: bool = False,
579
626
  bedrock: bool = False,
627
+ # extended thinking related fields
628
+ # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
629
+ extended_thinking: bool = False,
630
+ max_reasoning_tokens: Optional[int] = None,
580
631
  ) -> dict:
581
632
  """Prepare the request data for Anthropic API format."""
633
+ if extended_thinking:
634
+ assert (
635
+ max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
636
+ ), "max tokens must be greater than thinking budget"
637
+ assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
638
+ # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
639
+ # Silently disable prefix_fill for now
640
+ prefix_fill = False
582
641
 
583
642
  # if needed, put inner thoughts as a kwarg for all tools
584
643
  if data.tools and put_inner_thoughts_in_kwargs:
@@ -595,6 +654,14 @@ def _prepare_anthropic_request(
595
654
  # pydantic -> dict
596
655
  data = data.model_dump(exclude_none=True)
597
656
 
657
+ if extended_thinking:
658
+ data["thinking"] = {
659
+ "type": "enabled",
660
+ "budget_tokens": max_reasoning_tokens,
661
+ }
662
+ # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
663
+ data["temperature"] = 1.0
664
+
598
665
  if "functions" in data:
599
666
  raise ValueError(f"'functions' unexpected in Anthropic API payload")
600
667
 
@@ -665,6 +732,8 @@ def anthropic_chat_completions_request(
665
732
  data: ChatCompletionRequest,
666
733
  inner_thoughts_xml_tag: Optional[str] = "thinking",
667
734
  put_inner_thoughts_in_kwargs: bool = False,
735
+ extended_thinking: bool = False,
736
+ max_reasoning_tokens: Optional[int] = None,
668
737
  betas: List[str] = ["tools-2024-04-04"],
669
738
  ) -> ChatCompletionResponse:
670
739
  """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -678,6 +747,8 @@ def anthropic_chat_completions_request(
678
747
  data=data,
679
748
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
680
749
  put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
750
+ extended_thinking=extended_thinking,
751
+ max_reasoning_tokens=max_reasoning_tokens,
681
752
  )
682
753
  log_event(name="llm_request_sent", attributes=data)
683
754
  response = anthropic_client.beta.messages.create(
@@ -717,6 +788,8 @@ def anthropic_chat_completions_request_stream(
717
788
  data: ChatCompletionRequest,
718
789
  inner_thoughts_xml_tag: Optional[str] = "thinking",
719
790
  put_inner_thoughts_in_kwargs: bool = False,
791
+ extended_thinking: bool = False,
792
+ max_reasoning_tokens: Optional[int] = None,
720
793
  betas: List[str] = ["tools-2024-04-04"],
721
794
  ) -> Generator[ChatCompletionChunkResponse, None, None]:
722
795
  """Stream chat completions from Anthropic API.
@@ -728,6 +801,8 @@ def anthropic_chat_completions_request_stream(
728
801
  data=data,
729
802
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
730
803
  put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
804
+ extended_thinking=extended_thinking,
805
+ max_reasoning_tokens=max_reasoning_tokens,
731
806
  )
732
807
 
733
808
  anthropic_override_key = ProviderManager().get_anthropic_override_key()
@@ -777,6 +852,8 @@ def anthropic_chat_completions_process_stream(
777
852
  stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
778
853
  inner_thoughts_xml_tag: Optional[str] = "thinking",
779
854
  put_inner_thoughts_in_kwargs: bool = False,
855
+ extended_thinking: bool = False,
856
+ max_reasoning_tokens: Optional[int] = None,
780
857
  create_message_id: bool = True,
781
858
  create_message_datetime: bool = True,
782
859
  betas: List[str] = ["tools-2024-04-04"],
@@ -839,7 +916,6 @@ def anthropic_chat_completions_process_stream(
839
916
  created=dummy_message.created_at,
840
917
  model=chat_completion_request.model,
841
918
  usage=UsageStatistics(
842
- completion_tokens=0,
843
919
  prompt_tokens=prompt_tokens,
844
920
  total_tokens=prompt_tokens,
845
921
  ),
@@ -850,13 +926,15 @@ def anthropic_chat_completions_process_stream(
850
926
  if stream_interface:
851
927
  stream_interface.stream_start()
852
928
 
853
- n_chunks = 0
929
+ completion_tokens = 0
854
930
  try:
855
931
  for chunk_idx, chat_completion_chunk in enumerate(
856
932
  anthropic_chat_completions_request_stream(
857
933
  data=chat_completion_request,
858
934
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
859
935
  put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
936
+ extended_thinking=extended_thinking,
937
+ max_reasoning_tokens=max_reasoning_tokens,
860
938
  betas=betas,
861
939
  )
862
940
  ):
@@ -868,6 +946,9 @@ def anthropic_chat_completions_process_stream(
868
946
  chat_completion_chunk,
869
947
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
870
948
  message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
949
+ # if extended_thinking is on, then reasoning_content will be flowing as chunks
950
+ # TODO handle emitting redacted reasoning content (e.g. as concat?)
951
+ expect_reasoning_content=extended_thinking,
871
952
  )
872
953
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
873
954
  stream_interface.process_refresh(chat_completion_response)
@@ -908,6 +989,30 @@ def anthropic_chat_completions_process_stream(
908
989
  else:
909
990
  accum_message.content += content_delta
910
991
 
992
+ # NOTE: for extended_thinking mode
993
+ if extended_thinking and message_delta.reasoning_content is not None:
994
+ reasoning_content_delta = message_delta.reasoning_content
995
+ if accum_message.reasoning_content is None:
996
+ accum_message.reasoning_content = reasoning_content_delta
997
+ else:
998
+ accum_message.reasoning_content += reasoning_content_delta
999
+
1000
+ # NOTE: extended_thinking sends a signature
1001
+ if extended_thinking and message_delta.reasoning_content_signature is not None:
1002
+ reasoning_content_signature_delta = message_delta.reasoning_content_signature
1003
+ if accum_message.reasoning_content_signature is None:
1004
+ accum_message.reasoning_content_signature = reasoning_content_signature_delta
1005
+ else:
1006
+ accum_message.reasoning_content_signature += reasoning_content_signature_delta
1007
+
1008
+ # NOTE: extended_thinking also has the potential for redacted_reasoning_content
1009
+ if extended_thinking and message_delta.redacted_reasoning_content is not None:
1010
+ redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
1011
+ if accum_message.redacted_reasoning_content is None:
1012
+ accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
1013
+ else:
1014
+ accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
1015
+
911
1016
  # TODO(charles) make sure this works for parallel tool calling?
912
1017
  if message_delta.tool_calls is not None:
913
1018
  tool_calls_delta = message_delta.tool_calls
@@ -966,7 +1071,8 @@ def anthropic_chat_completions_process_stream(
966
1071
  chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
967
1072
 
968
1073
  # increment chunk counter
969
- n_chunks += 1
1074
+ if chat_completion_chunk.output_tokens is not None:
1075
+ completion_tokens += chat_completion_chunk.output_tokens
970
1076
 
971
1077
  except Exception as e:
972
1078
  if stream_interface:
@@ -990,8 +1096,8 @@ def anthropic_chat_completions_process_stream(
990
1096
 
991
1097
  # compute token usage before returning
992
1098
  # TODO try actually computing the #tokens instead of assuming the chunks is the same
993
- chat_completion_response.usage.completion_tokens = n_chunks
994
- chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
1099
+ chat_completion_response.usage.completion_tokens = completion_tokens
1100
+ chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
995
1101
 
996
1102
  assert len(chat_completion_response.choices) > 0, chat_completion_response
997
1103
 
@@ -3,17 +3,19 @@ from typing import Any, Dict, List
3
3
 
4
4
  from anthropic import AnthropicBedrock
5
5
 
6
- from letta.settings import model_settings
7
-
8
6
  from letta.log import get_logger
7
+ from letta.settings import model_settings
9
8
 
10
9
  logger = get_logger(__name__)
11
10
 
11
+
12
12
  def has_valid_aws_credentials() -> bool:
13
13
  """
14
14
  Check if AWS credentials are properly configured.
15
15
  """
16
- valid_aws_credentials = os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
16
+ valid_aws_credentials = (
17
+ os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
18
+ )
17
19
  return valid_aws_credentials
18
20
 
19
21
 
@@ -3,7 +3,6 @@ from collections import defaultdict
3
3
  import requests
4
4
  from openai import AzureOpenAI
5
5
 
6
-
7
6
  from letta.llm_api.openai import prepare_openai_payload
8
7
  from letta.schemas.llm_config import LLMConfig
9
8
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
@@ -406,6 +406,8 @@ def create(
406
406
  chat_completion_request=chat_completion_request,
407
407
  put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
408
408
  stream_interface=stream_interface,
409
+ extended_thinking=llm_config.enable_reasoner,
410
+ max_reasoning_tokens=llm_config.max_reasoning_tokens,
409
411
  )
410
412
 
411
413
  else:
@@ -413,6 +415,8 @@ def create(
413
415
  response = anthropic_chat_completions_request(
414
416
  data=chat_completion_request,
415
417
  put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
418
+ extended_thinking=llm_config.enable_reasoner,
419
+ max_reasoning_tokens=llm_config.max_reasoning_tokens,
416
420
  )
417
421
 
418
422
  if llm_config.put_inner_thoughts_in_kwargs:
letta/orm/organization.py CHANGED
@@ -23,6 +23,7 @@ class Organization(SqlalchemyBase):
23
23
  __pydantic_model__ = PydanticOrganization
24
24
 
25
25
  name: Mapped[str] = mapped_column(doc="The display name of the organization.")
26
+ privileged_tools: Mapped[bool] = mapped_column(doc="Whether the organization has access to privileged tools.")
26
27
 
27
28
  # relationships
28
29
  users: Mapped[List["User"]] = relationship("User", back_populates="organization", cascade="all, delete-orphan")
@@ -361,14 +361,12 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
361
361
  if identifier_set != results_set:
362
362
  # Construct a detailed error message based on query conditions
363
363
  conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
364
- logger.warning(
365
- f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}"
366
- )
364
+ logger.debug(f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}")
367
365
  return results
368
366
 
369
367
  # Construct a detailed error message based on query conditions
370
368
  conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
371
- logger.warning(f"{cls.__name__} not found with {conditions_str}")
369
+ logger.debug(f"{cls.__name__} not found with {conditions_str}")
372
370
  return []
373
371
 
374
372
  @handle_db_timeout
letta/schemas/agent.py CHANGED
@@ -147,6 +147,14 @@ class CreateAgent(BaseModel, validate_assignment=True): #
147
147
  )
148
148
  context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
149
149
  embedding_chunk_size: Optional[int] = Field(DEFAULT_EMBEDDING_CHUNK_SIZE, description="The embedding chunk size used by the agent.")
150
+ max_tokens: Optional[int] = Field(
151
+ None,
152
+ description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
153
+ )
154
+ max_reasoning_tokens: Optional[int] = Field(
155
+ None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
156
+ )
157
+ enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
150
158
  from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
151
159
  template: bool = Field(False, description="Whether the agent is a template")
152
160
  project: Optional[str] = Field(
@@ -88,11 +88,13 @@ class ReasoningMessage(LettaMessage):
88
88
  source (Literal["reasoner_model", "non_reasoner_model"]): Whether the reasoning
89
89
  content was generated natively by a reasoner model or derived via prompting
90
90
  reasoning (str): The internal reasoning of the agent
91
+ signature (Optional[str]): The model-generated signature of the reasoning step
91
92
  """
92
93
 
93
94
  message_type: Literal["reasoning_message"] = "reasoning_message"
94
95
  source: Literal["reasoner_model", "non_reasoner_model"] = "non_reasoner_model"
95
96
  reasoning: str
97
+ signature: Optional[str] = None
96
98
 
97
99
 
98
100
  class HiddenReasoningMessage(LettaMessage):
@@ -106,12 +108,12 @@ class HiddenReasoningMessage(LettaMessage):
106
108
  name (Optional[str]): The name of the sender of the message
107
109
  state (Literal["redacted", "omitted"]): Whether the reasoning
108
110
  content was redacted by the provider or simply omitted by the API
109
- reasoning (str): The internal reasoning of the agent
111
+ hidden_reasoning (Optional[str]): The internal reasoning of the agent
110
112
  """
111
113
 
112
- message_type: Literal["reasoning_message"] = "reasoning_message"
114
+ message_type: Literal["hidden_reasoning_message"] = "hidden_reasoning_message"
113
115
  state: Literal["redacted", "omitted"]
114
- reasoning: str
116
+ hidden_reasoning: Optional[str] = None
115
117
 
116
118
 
117
119
  class ToolCall(BaseModel):
@@ -229,7 +231,7 @@ class AssistantMessage(LettaMessage):
229
231
 
230
232
  # NOTE: use Pydantic's discriminated unions feature: https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions
231
233
  LettaMessageUnion = Annotated[
232
- Union[SystemMessage, UserMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
234
+ Union[SystemMessage, UserMessage, ReasoningMessage, HiddenReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
233
235
  Field(discriminator="message_type"),
234
236
  ]
235
237
 
@@ -240,6 +242,7 @@ def create_letta_message_union_schema():
240
242
  {"$ref": "#/components/schemas/SystemMessage"},
241
243
  {"$ref": "#/components/schemas/UserMessage"},
242
244
  {"$ref": "#/components/schemas/ReasoningMessage"},
245
+ {"$ref": "#/components/schemas/HiddenReasoningMessage"},
243
246
  {"$ref": "#/components/schemas/ToolCallMessage"},
244
247
  {"$ref": "#/components/schemas/ToolReturnMessage"},
245
248
  {"$ref": "#/components/schemas/AssistantMessage"},
@@ -250,6 +253,7 @@ def create_letta_message_union_schema():
250
253
  "system_message": "#/components/schemas/SystemMessage",
251
254
  "user_message": "#/components/schemas/UserMessage",
252
255
  "reasoning_message": "#/components/schemas/ReasoningMessage",
256
+ "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
253
257
  "tool_call_message": "#/components/schemas/ToolCallMessage",
254
258
  "tool_return_message": "#/components/schemas/ToolReturnMessage",
255
259
  "assistant_message": "#/components/schemas/AssistantMessage",
@@ -60,6 +60,12 @@ class LLMConfig(BaseModel):
60
60
  4096,
61
61
  description="The maximum number of tokens to generate. If not set, the model will use its default value.",
62
62
  )
63
+ enable_reasoner: bool = Field(
64
+ False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
65
+ )
66
+ max_reasoning_tokens: int = Field(
67
+ 0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
68
+ )
63
69
 
64
70
  # FIXME hack to silence pydantic protected namespace warning
65
71
  model_config = ConfigDict(protected_namespaces=())