letta-nightly 0.6.37.dev20250311104150__py3-none-any.whl → 0.6.39.dev20250313104142__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (58) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +83 -23
  3. letta/agents/low_latency_agent.py +3 -2
  4. letta/client/client.py +1 -50
  5. letta/constants.py +4 -1
  6. letta/functions/function_sets/base.py +1 -1
  7. letta/functions/function_sets/multi_agent.py +9 -8
  8. letta/functions/helpers.py +47 -6
  9. letta/functions/schema_generator.py +47 -0
  10. letta/helpers/mcp_helpers.py +108 -0
  11. letta/llm_api/cohere.py +1 -1
  12. letta/llm_api/google_ai_client.py +332 -0
  13. letta/llm_api/google_vertex_client.py +214 -0
  14. letta/llm_api/helpers.py +1 -2
  15. letta/llm_api/llm_api_tools.py +0 -1
  16. letta/llm_api/llm_client.py +48 -0
  17. letta/llm_api/llm_client_base.py +129 -0
  18. letta/local_llm/utils.py +30 -20
  19. letta/log.py +1 -1
  20. letta/memory.py +1 -1
  21. letta/orm/__init__.py +1 -0
  22. letta/orm/block.py +8 -0
  23. letta/orm/enums.py +2 -0
  24. letta/orm/identities_blocks.py +13 -0
  25. letta/orm/identity.py +9 -0
  26. letta/orm/sqlalchemy_base.py +4 -4
  27. letta/orm/step.py +1 -0
  28. letta/schemas/block.py +4 -48
  29. letta/schemas/identity.py +3 -0
  30. letta/schemas/letta_message.py +26 -0
  31. letta/schemas/message.py +69 -63
  32. letta/schemas/step.py +1 -0
  33. letta/schemas/tool.py +39 -2
  34. letta/serialize_schemas/agent.py +8 -1
  35. letta/server/rest_api/app.py +15 -0
  36. letta/server/rest_api/chat_completions_interface.py +2 -0
  37. letta/server/rest_api/interface.py +46 -13
  38. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
  39. letta/server/rest_api/routers/v1/agents.py +14 -10
  40. letta/server/rest_api/routers/v1/blocks.py +5 -1
  41. letta/server/rest_api/routers/v1/steps.py +2 -0
  42. letta/server/rest_api/routers/v1/tools.py +71 -1
  43. letta/server/rest_api/routers/v1/voice.py +3 -6
  44. letta/server/server.py +102 -5
  45. letta/services/agent_manager.py +58 -3
  46. letta/services/block_manager.py +10 -1
  47. letta/services/helpers/agent_manager_helper.py +12 -1
  48. letta/services/identity_manager.py +61 -15
  49. letta/services/message_manager.py +40 -0
  50. letta/services/step_manager.py +8 -1
  51. letta/services/summarizer/summarizer.py +1 -1
  52. letta/services/tool_manager.py +6 -0
  53. letta/settings.py +11 -12
  54. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/METADATA +20 -18
  55. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/RECORD +58 -52
  56. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/LICENSE +0 -0
  57. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/WHEEL +0 -0
  58. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py CHANGED
@@ -74,7 +74,7 @@ class MessageUpdate(BaseModel):
74
74
  """Request to update a message"""
75
75
 
76
76
  role: Optional[MessageRole] = Field(None, description="The role of the participant.")
77
- content: Optional[Union[str, List[MessageContentUnion]]] = Field(..., description="The content of the message.")
77
+ content: Optional[Union[str, List[MessageContentUnion]]] = Field(None, description="The content of the message.")
78
78
  # NOTE: probably doesn't make sense to allow remapping user_id or agent_id (vs creating a new message)
79
79
  # user_id: Optional[str] = Field(None, description="The unique identifier of the user.")
80
80
  # agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
@@ -158,19 +158,6 @@ class Message(BaseMessage):
158
158
  del data["content"]
159
159
  return data
160
160
 
161
- @property
162
- def text(self) -> Optional[str]:
163
- """
164
- Retrieve the first text content's text.
165
-
166
- Returns:
167
- str: The text content, or None if no text content exists
168
- """
169
- if not self.content:
170
- return None
171
- text_content = [content.text for content in self.content if content.type == MessageContentType.text]
172
- return text_content[0] if text_content else None
173
-
174
161
  def to_json(self):
175
162
  json_message = vars(self)
176
163
  if json_message["tool_calls"] is not None:
@@ -227,17 +214,21 @@ class Message(BaseMessage):
227
214
  assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
228
215
  ) -> List[LettaMessage]:
229
216
  """Convert message object (in DB format) to the style used by the original Letta API"""
217
+ if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
218
+ text_content = self.content[0].text
219
+ else:
220
+ text_content = None
230
221
 
231
222
  messages = []
232
223
 
233
224
  if self.role == MessageRole.assistant:
234
- if self.text is not None:
225
+ if text_content is not None:
235
226
  # This is type InnerThoughts
236
227
  messages.append(
237
228
  ReasoningMessage(
238
229
  id=self.id,
239
230
  date=self.created_at,
240
- reasoning=self.text,
231
+ reasoning=text_content,
241
232
  )
242
233
  )
243
234
  if self.tool_calls is not None:
@@ -281,9 +272,9 @@ class Message(BaseMessage):
281
272
  # "message": response_string,
282
273
  # "time": formatted_time,
283
274
  # }
284
- assert self.text is not None, self
275
+ assert text_content is not None, self
285
276
  try:
286
- function_return = json.loads(self.text)
277
+ function_return = json.loads(text_content)
287
278
  status = function_return["status"]
288
279
  if status == "OK":
289
280
  status_enum = "success"
@@ -292,7 +283,7 @@ class Message(BaseMessage):
292
283
  else:
293
284
  raise ValueError(f"Invalid status: {status}")
294
285
  except json.JSONDecodeError:
295
- raise ValueError(f"Failed to decode function return: {self.text}")
286
+ raise ValueError(f"Failed to decode function return: {text_content}")
296
287
  assert self.tool_call_id is not None
297
288
  messages.append(
298
289
  # TODO make sure this is what the API returns
@@ -300,7 +291,7 @@ class Message(BaseMessage):
300
291
  ToolReturnMessage(
301
292
  id=self.id,
302
293
  date=self.created_at,
303
- tool_return=self.text,
294
+ tool_return=text_content,
304
295
  status=self.tool_returns[0].status if self.tool_returns else status_enum,
305
296
  tool_call_id=self.tool_call_id,
306
297
  stdout=self.tool_returns[0].stdout if self.tool_returns else None,
@@ -309,23 +300,23 @@ class Message(BaseMessage):
309
300
  )
310
301
  elif self.role == MessageRole.user:
311
302
  # This is type UserMessage
312
- assert self.text is not None, self
313
- message_str = unpack_message(self.text)
303
+ assert text_content is not None, self
304
+ message_str = unpack_message(text_content)
314
305
  messages.append(
315
306
  UserMessage(
316
307
  id=self.id,
317
308
  date=self.created_at,
318
- content=message_str or self.text,
309
+ content=message_str or text_content,
319
310
  )
320
311
  )
321
312
  elif self.role == MessageRole.system:
322
313
  # This is type SystemMessage
323
- assert self.text is not None, self
314
+ assert text_content is not None, self
324
315
  messages.append(
325
316
  SystemMessage(
326
317
  id=self.id,
327
318
  date=self.created_at,
328
- content=self.text,
319
+ content=text_content,
329
320
  )
330
321
  )
331
322
  else:
@@ -494,11 +485,15 @@ class Message(BaseMessage):
494
485
  """Go from Message class to ChatCompletion message object"""
495
486
 
496
487
  # TODO change to pydantic casting, eg `return SystemMessageModel(self)`
488
+ if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
489
+ text_content = self.content[0].text
490
+ else:
491
+ text_content = None
497
492
 
498
493
  if self.role == "system":
499
494
  assert all([v is not None for v in [self.role]]), vars(self)
500
495
  openai_message = {
501
- "content": self.text,
496
+ "content": text_content,
502
497
  "role": self.role,
503
498
  }
504
499
  # Optional field, do not include if null
@@ -506,9 +501,9 @@ class Message(BaseMessage):
506
501
  openai_message["name"] = self.name
507
502
 
508
503
  elif self.role == "user":
509
- assert all([v is not None for v in [self.text, self.role]]), vars(self)
504
+ assert all([v is not None for v in [text_content, self.role]]), vars(self)
510
505
  openai_message = {
511
- "content": self.text,
506
+ "content": text_content,
512
507
  "role": self.role,
513
508
  }
514
509
  # Optional field, do not include if null
@@ -516,9 +511,9 @@ class Message(BaseMessage):
516
511
  openai_message["name"] = self.name
517
512
 
518
513
  elif self.role == "assistant":
519
- assert self.tool_calls is not None or self.text is not None
514
+ assert self.tool_calls is not None or text_content is not None
520
515
  openai_message = {
521
- "content": None if put_inner_thoughts_in_kwargs else self.text,
516
+ "content": None if put_inner_thoughts_in_kwargs else text_content,
522
517
  "role": self.role,
523
518
  }
524
519
  # Optional fields, do not include if null
@@ -530,7 +525,7 @@ class Message(BaseMessage):
530
525
  openai_message["tool_calls"] = [
531
526
  add_inner_thoughts_to_tool_call(
532
527
  tool_call,
533
- inner_thoughts=self.text,
528
+ inner_thoughts=text_content,
534
529
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
535
530
  ).model_dump()
536
531
  for tool_call in self.tool_calls
@@ -544,7 +539,7 @@ class Message(BaseMessage):
544
539
  elif self.role == "tool":
545
540
  assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
546
541
  openai_message = {
547
- "content": self.text,
542
+ "content": text_content,
548
543
  "role": self.role,
549
544
  "tool_call_id": self.tool_call_id[:max_tool_id_length] if max_tool_id_length else self.tool_call_id,
550
545
  }
@@ -565,6 +560,10 @@ class Message(BaseMessage):
565
560
  Args:
566
561
  inner_thoughts_xml_tag (str): The XML tag to wrap around inner thoughts
567
562
  """
563
+ if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
564
+ text_content = self.content[0].text
565
+ else:
566
+ text_content = None
568
567
 
569
568
  def add_xml_tag(string: str, xml_tag: Optional[str]):
570
569
  # NOTE: Anthropic docs recommends using <thinking> tag when using CoT + tool use
@@ -573,34 +572,34 @@ class Message(BaseMessage):
573
572
  if self.role == "system":
574
573
  # NOTE: this is not for system instructions, but instead system "events"
575
574
 
576
- assert all([v is not None for v in [self.text, self.role]]), vars(self)
575
+ assert all([v is not None for v in [text_content, self.role]]), vars(self)
577
576
  # Two options here, we would use system.package_system_message,
578
577
  # or use a more Anthropic-specific packaging ie xml tags
579
- user_system_event = add_xml_tag(string=f"SYSTEM ALERT: {self.text}", xml_tag="event")
578
+ user_system_event = add_xml_tag(string=f"SYSTEM ALERT: {text_content}", xml_tag="event")
580
579
  anthropic_message = {
581
580
  "content": user_system_event,
582
581
  "role": "user",
583
582
  }
584
583
 
585
584
  elif self.role == "user":
586
- assert all([v is not None for v in [self.text, self.role]]), vars(self)
585
+ assert all([v is not None for v in [text_content, self.role]]), vars(self)
587
586
  anthropic_message = {
588
- "content": self.text,
587
+ "content": text_content,
589
588
  "role": self.role,
590
589
  }
591
590
 
592
591
  elif self.role == "assistant":
593
- assert self.tool_calls is not None or self.text is not None
592
+ assert self.tool_calls is not None or text_content is not None
594
593
  anthropic_message = {
595
594
  "role": self.role,
596
595
  }
597
596
  content = []
598
597
  # COT / reasoning / thinking
599
- if self.text is not None and not put_inner_thoughts_in_kwargs:
598
+ if text_content is not None and not put_inner_thoughts_in_kwargs:
600
599
  content.append(
601
600
  {
602
601
  "type": "text",
603
- "text": add_xml_tag(string=self.text, xml_tag=inner_thoughts_xml_tag),
602
+ "text": add_xml_tag(string=text_content, xml_tag=inner_thoughts_xml_tag),
604
603
  }
605
604
  )
606
605
  # Tool calling
@@ -610,7 +609,7 @@ class Message(BaseMessage):
610
609
  if put_inner_thoughts_in_kwargs:
611
610
  tool_call_input = add_inner_thoughts_to_tool_call(
612
611
  tool_call,
613
- inner_thoughts=self.text,
612
+ inner_thoughts=text_content,
614
613
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
615
614
  ).model_dump()
616
615
  else:
@@ -639,7 +638,7 @@ class Message(BaseMessage):
639
638
  {
640
639
  "type": "tool_result",
641
640
  "tool_use_id": self.tool_call_id,
642
- "content": self.text,
641
+ "content": text_content,
643
642
  }
644
643
  ],
645
644
  }
@@ -656,6 +655,10 @@ class Message(BaseMessage):
656
655
  # type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
657
656
  # parts[]: Part
658
657
  # role: str ('user' or 'model')
658
+ if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
659
+ text_content = self.content[0].text
660
+ else:
661
+ text_content = None
659
662
 
660
663
  if self.role != "tool" and self.name is not None:
661
664
  warnings.warn(f"Using Google AI with non-null 'name' field ({self.name}) not yet supported.")
@@ -665,18 +668,18 @@ class Message(BaseMessage):
665
668
  # https://www.reddit.com/r/Bard/comments/1b90i8o/does_gemini_have_a_system_prompt_option_while/
666
669
  google_ai_message = {
667
670
  "role": "user", # NOTE: no 'system'
668
- "parts": [{"text": self.text}],
671
+ "parts": [{"text": text_content}],
669
672
  }
670
673
 
671
674
  elif self.role == "user":
672
- assert all([v is not None for v in [self.text, self.role]]), vars(self)
675
+ assert all([v is not None for v in [text_content, self.role]]), vars(self)
673
676
  google_ai_message = {
674
677
  "role": "user",
675
- "parts": [{"text": self.text}],
678
+ "parts": [{"text": text_content}],
676
679
  }
677
680
 
678
681
  elif self.role == "assistant":
679
- assert self.tool_calls is not None or self.text is not None
682
+ assert self.tool_calls is not None or text_content is not None
680
683
  google_ai_message = {
681
684
  "role": "model", # NOTE: different
682
685
  }
@@ -684,10 +687,10 @@ class Message(BaseMessage):
684
687
  # NOTE: Google AI API doesn't allow non-null content + function call
685
688
  # To get around this, just two a two part message, inner thoughts first then
686
689
  parts = []
687
- if not put_inner_thoughts_in_kwargs and self.text is not None:
690
+ if not put_inner_thoughts_in_kwargs and text_content is not None:
688
691
  # NOTE: ideally we do multi-part for CoT / inner thoughts + function call, but Google AI API doesn't allow it
689
692
  raise NotImplementedError
690
- parts.append({"text": self.text})
693
+ parts.append({"text": text_content})
691
694
 
692
695
  if self.tool_calls is not None:
693
696
  # NOTE: implied support for multiple calls
@@ -701,10 +704,10 @@ class Message(BaseMessage):
701
704
  raise UserWarning(f"Failed to parse JSON function args: {function_args}")
702
705
  function_args = {"args": function_args}
703
706
 
704
- if put_inner_thoughts_in_kwargs and self.text is not None:
707
+ if put_inner_thoughts_in_kwargs and text_content is not None:
705
708
  assert "inner_thoughts" not in function_args, function_args
706
709
  assert len(self.tool_calls) == 1
707
- function_args[INNER_THOUGHTS_KWARG] = self.text
710
+ function_args[INNER_THOUGHTS_KWARG] = text_content
708
711
 
709
712
  parts.append(
710
713
  {
@@ -715,8 +718,8 @@ class Message(BaseMessage):
715
718
  }
716
719
  )
717
720
  else:
718
- assert self.text is not None
719
- parts.append({"text": self.text})
721
+ assert text_content is not None
722
+ parts.append({"text": text_content})
720
723
  google_ai_message["parts"] = parts
721
724
 
722
725
  elif self.role == "tool":
@@ -731,9 +734,9 @@ class Message(BaseMessage):
731
734
 
732
735
  # NOTE: Google AI API wants the function response as JSON only, no string
733
736
  try:
734
- function_response = json.loads(self.text)
737
+ function_response = json.loads(text_content)
735
738
  except:
736
- function_response = {"function_response": self.text}
739
+ function_response = {"function_response": text_content}
737
740
 
738
741
  google_ai_message = {
739
742
  "role": "function",
@@ -778,7 +781,10 @@ class Message(BaseMessage):
778
781
 
779
782
  # TODO: update this prompt style once guidance from Cohere on
780
783
  # embedded function calls in multi-turn conversation become more clear
781
-
784
+ if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
785
+ text_content = self.content[0].text
786
+ else:
787
+ text_content = None
782
788
  if self.role == "system":
783
789
  """
784
790
  The chat_history parameter should not be used for SYSTEM messages in most cases.
@@ -787,26 +793,26 @@ class Message(BaseMessage):
787
793
  raise UserWarning(f"role 'system' messages should go in 'preamble' field for Cohere API")
788
794
 
789
795
  elif self.role == "user":
790
- assert all([v is not None for v in [self.text, self.role]]), vars(self)
796
+ assert all([v is not None for v in [text_content, self.role]]), vars(self)
791
797
  cohere_message = [
792
798
  {
793
799
  "role": "USER",
794
- "message": self.text,
800
+ "message": text_content,
795
801
  }
796
802
  ]
797
803
 
798
804
  elif self.role == "assistant":
799
805
  # NOTE: we may break this into two message - an inner thought and a function call
800
806
  # Optionally, we could just make this a function call with the inner thought inside
801
- assert self.tool_calls is not None or self.text is not None
807
+ assert self.tool_calls is not None or text_content is not None
802
808
 
803
- if self.text and self.tool_calls:
809
+ if text_content and self.tool_calls:
804
810
  if inner_thoughts_as_kwarg:
805
811
  raise NotImplementedError
806
812
  cohere_message = [
807
813
  {
808
814
  "role": "CHATBOT",
809
- "message": self.text,
815
+ "message": text_content,
810
816
  },
811
817
  ]
812
818
  for tc in self.tool_calls:
@@ -820,7 +826,7 @@ class Message(BaseMessage):
820
826
  "message": f"{function_call_prefix} {function_call_text}",
821
827
  }
822
828
  )
823
- elif not self.text and self.tool_calls:
829
+ elif not text_content and self.tool_calls:
824
830
  cohere_message = []
825
831
  for tc in self.tool_calls:
826
832
  # TODO better way to pack?
@@ -831,11 +837,11 @@ class Message(BaseMessage):
831
837
  "message": f"{function_call_prefix} {function_call_text}",
832
838
  }
833
839
  )
834
- elif self.text and not self.tool_calls:
840
+ elif text_content and not self.tool_calls:
835
841
  cohere_message = [
836
842
  {
837
843
  "role": "CHATBOT",
838
- "message": self.text,
844
+ "message": text_content,
839
845
  }
840
846
  ]
841
847
  else:
@@ -843,7 +849,7 @@ class Message(BaseMessage):
843
849
 
844
850
  elif self.role == "tool":
845
851
  assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
846
- function_response_text = self.text
852
+ function_response_text = text_content
847
853
  cohere_message = [
848
854
  {
849
855
  "role": function_response_role,
letta/schemas/step.py CHANGED
@@ -18,6 +18,7 @@ class Step(StepBase):
18
18
  job_id: Optional[str] = Field(
19
19
  None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
20
20
  )
21
+ agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
21
22
  provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
22
23
  model: Optional[str] = Field(None, description="The name of the model used for this step.")
23
24
  model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
letta/schemas/tool.py CHANGED
@@ -7,11 +7,22 @@ from letta.constants import (
7
7
  FUNCTION_RETURN_CHAR_LIMIT,
8
8
  LETTA_CORE_TOOL_MODULE_NAME,
9
9
  LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
10
+ MCP_TOOL_TAG_NAME_PREFIX,
10
11
  )
11
12
  from letta.functions.ast_parsers import get_function_name_and_description
12
13
  from letta.functions.functions import derive_openai_json_schema, get_json_schema_from_module
13
- from letta.functions.helpers import generate_composio_tool_wrapper, generate_langchain_tool_wrapper, generate_model_from_args_json_schema
14
- from letta.functions.schema_generator import generate_schema_from_args_schema_v2, generate_tool_schema_for_composio
14
+ from letta.functions.helpers import (
15
+ generate_composio_tool_wrapper,
16
+ generate_langchain_tool_wrapper,
17
+ generate_mcp_tool_wrapper,
18
+ generate_model_from_args_json_schema,
19
+ )
20
+ from letta.functions.schema_generator import (
21
+ generate_schema_from_args_schema_v2,
22
+ generate_tool_schema_for_composio,
23
+ generate_tool_schema_for_mcp,
24
+ )
25
+ from letta.helpers.mcp_helpers import MCPTool
15
26
  from letta.log import get_logger
16
27
  from letta.orm.enums import ToolType
17
28
  from letta.schemas.letta_base import LettaBase
@@ -121,6 +132,32 @@ class ToolCreate(LettaBase):
121
132
  args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
122
133
  return_char_limit: int = Field(FUNCTION_RETURN_CHAR_LIMIT, description="The maximum number of characters in the response.")
123
134
 
135
+ # TODO should we put the HTTP / API fetch inside from_mcp?
136
+ # async def from_mcp(cls, mcp_server: str, mcp_tool_name: str) -> "ToolCreate":
137
+
138
+ @classmethod
139
+ def from_mcp(cls, mcp_server_name: str, mcp_tool: MCPTool) -> "ToolCreate":
140
+
141
+ # Get the MCP tool from the MCP server
142
+ # NVM
143
+
144
+ # Pass the MCP tool to the schema generator
145
+ json_schema = generate_tool_schema_for_mcp(mcp_tool=mcp_tool)
146
+
147
+ # Return a ToolCreate instance
148
+ description = mcp_tool.description
149
+ source_type = "python"
150
+ tags = [f"{MCP_TOOL_TAG_NAME_PREFIX}:{mcp_server_name}"]
151
+ wrapper_func_name, wrapper_function_str = generate_mcp_tool_wrapper(mcp_tool.name)
152
+
153
+ return cls(
154
+ description=description,
155
+ source_type=source_type,
156
+ tags=tags,
157
+ source_code=wrapper_function_str,
158
+ json_schema=json_schema,
159
+ )
160
+
124
161
  @classmethod
125
162
  def from_composio(cls, action_name: str) -> "ToolCreate":
126
163
  """
@@ -70,4 +70,11 @@ class SerializedAgentSchema(BaseSchema):
70
70
  class Meta(BaseSchema.Meta):
71
71
  model = Agent
72
72
  # TODO: Serialize these as well...
73
- exclude = BaseSchema.Meta.exclude + ("sources", "source_passages", "agent_passages")
73
+ exclude = BaseSchema.Meta.exclude + (
74
+ "project_id",
75
+ "template_id",
76
+ "base_template_id",
77
+ "sources",
78
+ "source_passages",
79
+ "agent_passages",
80
+ )
@@ -136,6 +136,21 @@ def create_application() -> "FastAPI":
136
136
  debug=debug_mode, # if True, the stack trace will be printed in the response
137
137
  )
138
138
 
139
+ @app.on_event("shutdown")
140
+ def shutdown_mcp_clients():
141
+ global server
142
+ import threading
143
+
144
+ def cleanup_clients():
145
+ if hasattr(server, "mcp_clients"):
146
+ for client in server.mcp_clients.values():
147
+ client.cleanup()
148
+ server.mcp_clients.clear()
149
+
150
+ t = threading.Thread(target=cleanup_clients)
151
+ t.start()
152
+ t.join()
153
+
139
154
  @app.exception_handler(Exception)
140
155
  async def generic_error_handler(request: Request, exc: Exception):
141
156
  # Log the actual error for debugging
@@ -267,3 +267,5 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
267
267
  """Clears internal buffers for function call name/args."""
268
268
  self.current_function_name = ""
269
269
  self.current_function_arguments = []
270
+ self.current_json_parse_result = {}
271
+ self._found_message_tool_kwarg = False
@@ -24,6 +24,7 @@ from letta.schemas.letta_message import (
24
24
  )
25
25
  from letta.schemas.message import Message
26
26
  from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
27
+ from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
27
28
  from letta.streaming_interface import AgentChunkStreamingInterface
28
29
  from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
29
30
 
@@ -282,6 +283,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
282
283
  # turn function argument to send_message into a normal text stream
283
284
  self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
284
285
 
286
+ # @matt's changes here, adopting new optimistic json parser
287
+ self.current_function_arguments = []
288
+ self.optimistic_json_parser = OptimisticJSONParser()
289
+ self.current_json_parse_result = {}
290
+
285
291
  # Store metadata passed from server
286
292
  self.metadata = {}
287
293
 
@@ -374,6 +380,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
374
380
  def stream_start(self):
375
381
  """Initialize streaming by activating the generator and clearing any old chunks."""
376
382
  self.streaming_chat_completion_mode_function_name = None
383
+ self.current_function_arguments = []
384
+ self.current_json_parse_result = {}
377
385
 
378
386
  if not self._active:
379
387
  self._active = True
@@ -383,6 +391,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
383
391
  def stream_end(self):
384
392
  """Clean up the stream by deactivating and clearing chunks."""
385
393
  self.streaming_chat_completion_mode_function_name = None
394
+ self.current_function_arguments = []
395
+ self.current_json_parse_result = {}
386
396
 
387
397
  # if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
388
398
  # self._push_to_buffer(self.multi_step_gen_indicator)
@@ -568,20 +578,27 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
568
578
  self.streaming_chat_completion_json_reader.reset()
569
579
  # early exit to turn into content mode
570
580
  return None
581
+ if tool_call.function.arguments:
582
+ self.current_function_arguments.append(tool_call.function.arguments)
571
583
 
572
584
  # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
573
585
  if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
574
586
  # Strip out any extras tokens
575
- cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
576
587
  # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
577
- if cleaned_func_args is None:
578
- return None
588
+ combined_args = "".join(self.current_function_arguments)
589
+ parsed_args = self.optimistic_json_parser.parse(combined_args)
590
+
591
+ if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
592
+ self.assistant_message_tool_kwarg
593
+ ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
594
+ new_content = parsed_args.get(self.assistant_message_tool_kwarg)
595
+ prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
596
+ # TODO: Assumes consistent state and that prev_content is subset of new_content
597
+ diff = new_content.replace(prev_content, "", 1)
598
+ self.current_json_parse_result = parsed_args
599
+ processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff)
579
600
  else:
580
- processed_chunk = AssistantMessage(
581
- id=message_id,
582
- date=message_date,
583
- content=cleaned_func_args,
584
- )
601
+ return None
585
602
 
586
603
  # otherwise we just do a regular passthrough of a ToolCallDelta via a ToolCallMessage
587
604
  else:
@@ -637,6 +654,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
637
654
  # updates_inner_thoughts = ""
638
655
  # else: # OpenAI
639
656
  # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
657
+ self.current_function_arguments.append(tool_call.function.arguments)
640
658
  updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
641
659
 
642
660
  # If we have inner thoughts, we should output them as a chunk
@@ -731,6 +749,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
731
749
  if self.function_args_buffer:
732
750
  # In this case, we should release the buffer + new data at once
733
751
  combined_chunk = self.function_args_buffer + updates_main_json
752
+
734
753
  processed_chunk = AssistantMessage(
735
754
  id=message_id,
736
755
  date=message_date,
@@ -745,11 +764,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
745
764
 
746
765
  else:
747
766
  # If there's no buffer to clear, just output a new chunk with new data
748
- processed_chunk = AssistantMessage(
749
- id=message_id,
750
- date=message_date,
751
- content=updates_main_json,
752
- )
767
+ # TODO: THIS IS HORRIBLE
768
+ # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
769
+ # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
770
+ combined_args = "".join(self.current_function_arguments)
771
+ parsed_args = self.optimistic_json_parser.parse(combined_args)
772
+
773
+ if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
774
+ self.assistant_message_tool_kwarg
775
+ ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
776
+ new_content = parsed_args.get(self.assistant_message_tool_kwarg)
777
+ prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
778
+ # TODO: Assumes consistent state and that prev_content is subset of new_content
779
+ diff = new_content.replace(prev_content, "", 1)
780
+ self.current_json_parse_result = parsed_args
781
+ processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff)
782
+ else:
783
+ return None
784
+
753
785
  # Store the ID of the tool call so allow skipping the corresponding response
754
786
  if self.function_id_buffer:
755
787
  self.prev_assistant_message_id = self.function_id_buffer
@@ -1018,6 +1050,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1018
1050
  message_date=message_date,
1019
1051
  expect_reasoning_content=expect_reasoning_content,
1020
1052
  )
1053
+
1021
1054
  if processed_chunk is None:
1022
1055
  return
1023
1056
 
@@ -24,7 +24,7 @@ logger = get_logger(__name__)
24
24
 
25
25
 
26
26
  @router.post(
27
- "/chat/completions",
27
+ "/{agent_id}/chat/completions",
28
28
  response_model=None,
29
29
  operation_id="create_chat_completions",
30
30
  responses={
@@ -37,6 +37,7 @@ logger = get_logger(__name__)
37
37
  },
38
38
  )
39
39
  async def create_chat_completions(
40
+ agent_id: str,
40
41
  completion_request: CompletionCreateParams = Body(...),
41
42
  server: "SyncServer" = Depends(get_letta_server),
42
43
  user_id: Optional[str] = Header(None, alias="user_id"),
@@ -51,12 +52,6 @@ async def create_chat_completions(
51
52
 
52
53
  actor = server.user_manager.get_user_or_default(user_id=user_id)
53
54
 
54
- agent_id = str(completion_request.get("user", None))
55
- if agent_id is None:
56
- error_msg = "Must pass agent_id in the 'user' field"
57
- logger.error(error_msg)
58
- raise HTTPException(status_code=400, detail=error_msg)
59
-
60
55
  letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
61
56
  llm_config = letta_agent.agent_state.llm_config
62
57
  if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint: