letta-nightly 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241023104105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (33) hide show
  1. letta/__init__.py +7 -2
  2. letta/agent_store/db.py +4 -2
  3. letta/cli/cli_config.py +2 -2
  4. letta/client/client.py +13 -0
  5. letta/constants.py +4 -1
  6. letta/embeddings.py +34 -16
  7. letta/llm_api/azure_openai.py +44 -4
  8. letta/llm_api/helpers.py +45 -19
  9. letta/llm_api/openai.py +24 -5
  10. letta/metadata.py +1 -59
  11. letta/orm/__all__.py +0 -0
  12. letta/orm/__init__.py +0 -0
  13. letta/orm/base.py +75 -0
  14. letta/orm/enums.py +8 -0
  15. letta/orm/errors.py +2 -0
  16. letta/orm/mixins.py +40 -0
  17. letta/orm/organization.py +35 -0
  18. letta/orm/sqlalchemy_base.py +214 -0
  19. letta/schemas/organization.py +3 -3
  20. letta/server/rest_api/interface.py +245 -98
  21. letta/server/rest_api/routers/v1/agents.py +11 -3
  22. letta/server/rest_api/routers/v1/organizations.py +4 -5
  23. letta/server/server.py +10 -25
  24. letta/services/__init__.py +0 -0
  25. letta/services/organization_manager.py +66 -0
  26. letta/streaming_utils.py +270 -0
  27. {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/METADATA +2 -1
  28. {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/RECORD +31 -22
  29. letta/base.py +0 -3
  30. letta/client/admin.py +0 -171
  31. {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/LICENSE +0 -0
  32. {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/WHEEL +0 -0
  33. {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/entry_points.txt +0 -0
@@ -8,6 +8,7 @@ from typing import AsyncGenerator, Literal, Optional, Union
8
8
 
9
9
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
10
10
  from letta.interface import AgentInterface
11
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
11
12
  from letta.schemas.enums import MessageStreamStatus
12
13
  from letta.schemas.letta_message import (
13
14
  AssistantMessage,
@@ -23,9 +24,14 @@ from letta.schemas.letta_message import (
23
24
  from letta.schemas.message import Message
24
25
  from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
25
26
  from letta.streaming_interface import AgentChunkStreamingInterface
27
+ from letta.streaming_utils import (
28
+ FunctionArgumentsStreamHandler,
29
+ JSONInnerThoughtsExtractor,
30
+ )
26
31
  from letta.utils import is_utc_datetime
27
32
 
28
33
 
34
+ # TODO strip from code / deprecate
29
35
  class QueuingInterface(AgentInterface):
30
36
  """Messages are queued inside an internal buffer and manually flushed"""
31
37
 
@@ -248,58 +254,6 @@ class QueuingInterface(AgentInterface):
248
254
  self._queue_push(message_api=new_message, message_obj=msg_obj)
249
255
 
250
256
 
251
- class FunctionArgumentsStreamHandler:
252
- """State machine that can process a stream of"""
253
-
254
- def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
255
- self.json_key = json_key
256
- self.reset()
257
-
258
- def reset(self):
259
- self.in_message = False
260
- self.key_buffer = ""
261
- self.accumulating = False
262
- self.message_started = False
263
-
264
- def process_json_chunk(self, chunk: str) -> Optional[str]:
265
- """Process a chunk from the function arguments and return the plaintext version"""
266
-
267
- # Use strip to handle only leading and trailing whitespace in control structures
268
- if self.accumulating:
269
- clean_chunk = chunk.strip()
270
- if self.json_key in self.key_buffer:
271
- if ":" in clean_chunk:
272
- self.in_message = True
273
- self.accumulating = False
274
- return None
275
- self.key_buffer += clean_chunk
276
- return None
277
-
278
- if self.in_message:
279
- if chunk.strip() == '"' and self.message_started:
280
- self.in_message = False
281
- self.message_started = False
282
- return None
283
- if not self.message_started and chunk.strip() == '"':
284
- self.message_started = True
285
- return None
286
- if self.message_started:
287
- if chunk.strip().endswith('"'):
288
- self.in_message = False
289
- return chunk.rstrip('"\n')
290
- return chunk
291
-
292
- if chunk.strip() == "{":
293
- self.key_buffer = ""
294
- self.accumulating = True
295
- return None
296
- if chunk.strip() == "}":
297
- self.in_message = False
298
- self.message_started = False
299
- return None
300
- return None
301
-
302
-
303
257
  class StreamingServerInterface(AgentChunkStreamingInterface):
304
258
  """Maintain a generator that is a proxy for self.process_chunk()
305
259
 
@@ -316,9 +270,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
316
270
  def __init__(
317
271
  self,
318
272
  multi_step=True,
273
+ # Related to if we want to try and pass back the AssistantMessage as a special case function
319
274
  use_assistant_message=False,
320
275
  assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
321
276
  assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
277
+ # Related to if we expect inner_thoughts to be in the kwargs
278
+ inner_thoughts_in_kwargs=True,
279
+ inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
322
280
  ):
323
281
  # If streaming mode, ignores base interface calls like .assistant_message, etc
324
282
  self.streaming_mode = False
@@ -346,10 +304,28 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
346
304
  self.assistant_message_function_name = assistant_message_function_name
347
305
  self.assistant_message_function_kwarg = assistant_message_function_kwarg
348
306
 
307
+ # Support for inner_thoughts_in_kwargs
308
+ self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
309
+ self.inner_thoughts_kwarg = inner_thoughts_kwarg
310
+ # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
311
+ self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=inner_thoughts_kwarg, wait_for_first_key=True)
312
+ # Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
313
+ self.function_name_buffer = None
314
+ self.function_args_buffer = None
315
+ self.function_id_buffer = None
316
+
349
317
  # extra prints
350
318
  self.debug = False
351
319
  self.timeout = 30
352
320
 
321
+ def _reset_inner_thoughts_json_reader(self):
322
+ # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
323
+ self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
324
+ # Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
325
+ self.function_name_buffer = None
326
+ self.function_args_buffer = None
327
+ self.function_id_buffer = None
328
+
353
329
  async def _create_generator(self) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
354
330
  """An asynchronous generator that yields chunks as they become available."""
355
331
  while self._active:
@@ -365,16 +341,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
365
341
  # Reset the event until a new item is pushed
366
342
  self._event.clear()
367
343
 
368
- # while self._active:
369
- # # Wait until there is an item in the deque or the stream is deactivated
370
- # await self._event.wait()
371
-
372
- # while self._chunks:
373
- # yield self._chunks.popleft()
374
-
375
- # # Reset the event until a new item is pushed
376
- # self._event.clear()
377
-
378
344
  def get_generator(self) -> AsyncGenerator:
379
345
  """Get the generator that yields processed chunks."""
380
346
  if not self._active:
@@ -419,17 +385,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
419
385
  if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
420
386
  self._push_to_buffer(self.multi_step_gen_indicator)
421
387
 
422
- # self._active = False
423
- # self._event.set() # Unblock the generator if it's waiting to allow it to complete
424
-
425
- # if not self.multi_step:
426
- # # end the stream
427
- # self._active = False
428
- # self._event.set() # Unblock the generator if it's waiting to allow it to complete
429
- # else:
430
- # # signal that a new step has started in the stream
431
- # self._chunks.append(self.multi_step_indicator)
432
- # self._event.set() # Signal that new data is available
388
+ # Wipe the inner thoughts buffers
389
+ self._reset_inner_thoughts_json_reader()
433
390
 
434
391
  def step_complete(self):
435
392
  """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
@@ -441,10 +398,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
441
398
  # signal that a new step has started in the stream
442
399
  self._push_to_buffer(self.multi_step_indicator)
443
400
 
401
+ # Wipe the inner thoughts buffers
402
+ self._reset_inner_thoughts_json_reader()
403
+
444
404
  def step_yield(self):
445
405
  """If multi_step, this is the true 'stream_end' function."""
446
- # if self.multi_step:
447
- # end the stream
448
406
  self._active = False
449
407
  self._event.set() # Unblock the generator if it's waiting to allow it to complete
450
408
 
@@ -479,8 +437,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
479
437
  elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
480
438
  tool_call = message_delta.tool_calls[0]
481
439
 
440
+ # TODO(charles) merge into logic for internal_monologue
482
441
  # special case for trapping `send_message`
483
442
  if self.use_assistant_message and tool_call.function:
443
+ if self.inner_thoughts_in_kwargs:
444
+ raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
484
445
 
485
446
  # If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
486
447
 
@@ -538,6 +499,204 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
538
499
  ),
539
500
  )
540
501
 
502
+ elif self.inner_thoughts_in_kwargs and tool_call.function:
503
+ if self.use_assistant_message:
504
+ raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
505
+
506
+ processed_chunk = None
507
+
508
+ if tool_call.function.name:
509
+ # If we're waiting for the first key, then we should hold back the name
510
+ # ie add it to a buffer instead of returning it as a chunk
511
+ if self.function_name_buffer is None:
512
+ self.function_name_buffer = tool_call.function.name
513
+ else:
514
+ self.function_name_buffer += tool_call.function.name
515
+
516
+ if tool_call.id:
517
+ # Buffer until next time
518
+ if self.function_id_buffer is None:
519
+ self.function_id_buffer = tool_call.id
520
+ else:
521
+ self.function_id_buffer += tool_call.id
522
+
523
+ if tool_call.function.arguments:
524
+ updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
525
+
526
+ # If we have inner thoughts, we should output them as a chunk
527
+ if updates_inner_thoughts:
528
+ processed_chunk = InternalMonologue(
529
+ id=message_id,
530
+ date=message_date,
531
+ internal_monologue=updates_inner_thoughts,
532
+ )
533
+ # Additionally inner thoughts may stream back with a chunk of main JSON
534
+ # In that case, since we can only return a chunk at a time, we should buffer it
535
+ if updates_main_json:
536
+ if self.function_args_buffer is None:
537
+ self.function_args_buffer = updates_main_json
538
+ else:
539
+ self.function_args_buffer += updates_main_json
540
+
541
+ # If we have main_json, we should output a FunctionCallMessage
542
+ elif updates_main_json:
543
+
544
+ # If there's something in the function_name buffer, we should release it first
545
+ # NOTE: we could output it as part of a chunk that has both name and args,
546
+ # however the frontend may expect name first, then args, so to be
547
+ # safe we'll output name first in a separate chunk
548
+ if self.function_name_buffer:
549
+ processed_chunk = FunctionCallMessage(
550
+ id=message_id,
551
+ date=message_date,
552
+ function_call=FunctionCallDelta(
553
+ name=self.function_name_buffer,
554
+ arguments=None,
555
+ function_call_id=self.function_id_buffer,
556
+ ),
557
+ )
558
+ # Clear the buffer
559
+ self.function_name_buffer = None
560
+ self.function_id_buffer = None
561
+ # Since we're clearing the name buffer, we should store
562
+ # any updates to the arguments inside a separate buffer
563
+
564
+ # Add any main_json updates to the arguments buffer
565
+ if self.function_args_buffer is None:
566
+ self.function_args_buffer = updates_main_json
567
+ else:
568
+ self.function_args_buffer += updates_main_json
569
+
570
+ # If there was nothing in the name buffer, we can proceed to
571
+ # output the arguments chunk as a FunctionCallMessage
572
+ else:
573
+ # There may be a buffer from a previous chunk, for example
574
+ # if the previous chunk had arguments but we needed to flush name
575
+ if self.function_args_buffer:
576
+ # In this case, we should release the buffer + new data at once
577
+ combined_chunk = self.function_args_buffer + updates_main_json
578
+ processed_chunk = FunctionCallMessage(
579
+ id=message_id,
580
+ date=message_date,
581
+ function_call=FunctionCallDelta(
582
+ name=None,
583
+ arguments=combined_chunk,
584
+ function_call_id=self.function_id_buffer,
585
+ ),
586
+ )
587
+ # clear buffer
588
+ self.function_args_buffer = None
589
+ self.function_id_buffer = None
590
+ else:
591
+ # If there's no buffer to clear, just output a new chunk with new data
592
+ processed_chunk = FunctionCallMessage(
593
+ id=message_id,
594
+ date=message_date,
595
+ function_call=FunctionCallDelta(
596
+ name=None,
597
+ arguments=updates_main_json,
598
+ function_call_id=self.function_id_buffer,
599
+ ),
600
+ )
601
+ self.function_id_buffer = None
602
+
603
+ # # If there's something in the main_json buffer, we should add if to the arguments and release it together
604
+ # tool_call_delta = {}
605
+ # if tool_call.id:
606
+ # tool_call_delta["id"] = tool_call.id
607
+ # if tool_call.function:
608
+ # if tool_call.function.arguments:
609
+ # # tool_call_delta["arguments"] = tool_call.function.arguments
610
+ # # NOTE: using the stripped one
611
+ # tool_call_delta["arguments"] = updates_main_json
612
+ # # We use the buffered name
613
+ # if self.function_name_buffer:
614
+ # tool_call_delta["name"] = self.function_name_buffer
615
+ # # if tool_call.function.name:
616
+ # # tool_call_delta["name"] = tool_call.function.name
617
+
618
+ # processed_chunk = FunctionCallMessage(
619
+ # id=message_id,
620
+ # date=message_date,
621
+ # function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
622
+ # )
623
+
624
+ else:
625
+ processed_chunk = None
626
+
627
+ return processed_chunk
628
+
629
+ # # NOTE: this is a simplified version of the parsing code that:
630
+ # # (1) assumes that the inner_thoughts key will always come first
631
+ # # (2) assumes that there's no extra spaces in the stringified JSON
632
+ # # i.e., the prefix will look exactly like: "{\"variable\":\"}"
633
+ # if tool_call.function.arguments:
634
+ # self.function_args_buffer += tool_call.function.arguments
635
+
636
+ # # prefix_str = f'{{"\\"{self.inner_thoughts_kwarg}\\":\\"}}'
637
+ # prefix_str = f'{{"{self.inner_thoughts_kwarg}":'
638
+ # if self.function_args_buffer.startswith(prefix_str):
639
+ # print(f"Found prefix!!!: {self.function_args_buffer}")
640
+ # else:
641
+ # print(f"No prefix found: {self.function_args_buffer}")
642
+
643
+ # tool_call_delta = {}
644
+ # if tool_call.id:
645
+ # tool_call_delta["id"] = tool_call.id
646
+ # if tool_call.function:
647
+ # if tool_call.function.arguments:
648
+ # tool_call_delta["arguments"] = tool_call.function.arguments
649
+ # if tool_call.function.name:
650
+ # tool_call_delta["name"] = tool_call.function.name
651
+
652
+ # processed_chunk = FunctionCallMessage(
653
+ # id=message_id,
654
+ # date=message_date,
655
+ # function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
656
+ # )
657
+
658
+ # elif False and self.inner_thoughts_in_kwargs and tool_call.function:
659
+ # if self.use_assistant_message:
660
+ # raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
661
+
662
+ # if tool_call.function.arguments:
663
+
664
+ # Maintain a state machine to track if we're reading a key vs reading a value
665
+ # Technically we can we pre-key, post-key, pre-value, post-value
666
+
667
+ # for c in tool_call.function.arguments:
668
+ # if self.function_chunks_parsing_state == FunctionChunksParsingState.PRE_KEY:
669
+ # if c == '"':
670
+ # self.function_chunks_parsing_state = FunctionChunksParsingState.READING_KEY
671
+ # elif self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
672
+ # if c == '"':
673
+ # self.function_chunks_parsing_state = FunctionChunksParsingState.POST_KEY
674
+
675
+ # If we're reading a key:
676
+ # if self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
677
+
678
+ # We need to buffer the function arguments until we get complete keys
679
+ # We are reading stringified-JSON, so we need to check for keys in data that looks like:
680
+ # "arguments":"{\""
681
+ # "arguments":"inner"
682
+ # "arguments":"_th"
683
+ # "arguments":"ought"
684
+ # "arguments":"s"
685
+ # "arguments":"\":\""
686
+
687
+ # Once we get a complete key, check if the key matches
688
+
689
+ # If it does match, start processing the value (stringified-JSON string
690
+ # And with each new chunk, output it as a chunk of type InternalMonologue
691
+
692
+ # If the key doesn't match, then flush the buffer as a single FunctionCallMessage chunk
693
+
694
+ # If we're reading a value
695
+
696
+ # If we're reading the inner thoughts value, we output chunks of type InternalMonologue
697
+
698
+ # Otherwise, do simple chunks of FunctionCallMessage
699
+
541
700
  else:
542
701
 
543
702
  tool_call_delta = {}
@@ -563,7 +722,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
563
722
  # skip if there's a finish
564
723
  return None
565
724
  else:
566
- raise ValueError(f"Couldn't find delta in chunk: {chunk}")
725
+ # Example case that would trigger here:
726
+ # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
727
+ # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
728
+ # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
729
+ # model='gpt-4o-mini-2024-07-18'
730
+ # object='chat.completion.chunk'
731
+ warnings.warn(f"Couldn't find delta in chunk: {chunk}")
732
+ return None
567
733
 
568
734
  return processed_chunk
569
735
 
@@ -663,6 +829,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
663
829
  # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
664
830
  # "id": str(msg_obj.id) if msg_obj is not None else None,
665
831
  # }
832
+ assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
666
833
  processed_chunk = InternalMonologue(
667
834
  id=msg_obj.id,
668
835
  date=msg_obj.created_at,
@@ -676,18 +843,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
676
843
  def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
677
844
  """Letta uses send_message"""
678
845
 
679
- # if not self.streaming_mode and self.send_message_special_case:
680
-
681
- # # create a fake "chunk" of a stream
682
- # processed_chunk = {
683
- # "assistant_message": msg,
684
- # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
685
- # "id": str(msg_obj.id) if msg_obj is not None else None,
686
- # }
687
-
688
- # self._chunks.append(processed_chunk)
689
- # self._event.set() # Signal that new data is available
690
-
846
+ # NOTE: this is a no-op, we handle this special case in function_message instead
691
847
  return
692
848
 
693
849
  def function_message(self, msg: str, msg_obj: Optional[Message] = None):
@@ -699,6 +855,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
699
855
  if msg.startswith("Running "):
700
856
  if not self.streaming_mode:
701
857
  # create a fake "chunk" of a stream
858
+ assert msg_obj.tool_calls is not None and len(msg_obj.tool_calls) > 0, "Function call required for function_message"
702
859
  function_call = msg_obj.tool_calls[0]
703
860
 
704
861
  if self.nonstreaming_legacy_mode:
@@ -784,13 +941,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
784
941
  return
785
942
  else:
786
943
  return
787
- # msg = msg.replace("Running ", "")
788
- # new_message = {"function_call": msg}
789
944
 
790
945
  elif msg.startswith("Ran "):
791
946
  return
792
- # msg = msg.replace("Ran ", "Function call returned: ")
793
- # new_message = {"function_call": msg}
794
947
 
795
948
  elif msg.startswith("Success: "):
796
949
  msg = msg.replace("Success: ", "")
@@ -821,10 +974,4 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
821
974
  raise ValueError(msg)
822
975
  new_message = {"function_message": msg}
823
976
 
824
- # add extra metadata
825
- # if msg_obj is not None:
826
- # new_message["id"] = str(msg_obj.id)
827
- # assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
828
- # new_message["date"] = msg_obj.created_at.isoformat()
829
-
830
977
  self._push_to_buffer(new_message)
@@ -430,9 +430,6 @@ async def send_message_to_agent(
430
430
  # Get the generator object off of the agent's streaming interface
431
431
  # This will be attached to the POST SSE request used under-the-hood
432
432
  letta_agent = server._get_or_load_agent(agent_id=agent_id)
433
- streaming_interface = letta_agent.interface
434
- if not isinstance(streaming_interface, StreamingServerInterface):
435
- raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
436
433
 
437
434
  # Disable token streaming if not OpenAI
438
435
  # TODO: cleanup this logic
@@ -441,6 +438,12 @@ async def send_message_to_agent(
441
438
  print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
442
439
  stream_tokens = False
443
440
 
441
+ # Create a new interface per request
442
+ letta_agent.interface = StreamingServerInterface()
443
+ streaming_interface = letta_agent.interface
444
+ if not isinstance(streaming_interface, StreamingServerInterface):
445
+ raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
446
+
444
447
  # Enable token-streaming within the request if desired
445
448
  streaming_interface.streaming_mode = stream_tokens
446
449
  # "chatcompletion mode" does some remapping and ignores inner thoughts
@@ -454,6 +457,11 @@ async def send_message_to_agent(
454
457
  streaming_interface.assistant_message_function_name = assistant_message_function_name
455
458
  streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
456
459
 
460
+ # Related to JSON buffer reader
461
+ streaming_interface.inner_thoughts_in_kwargs = (
462
+ llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
463
+ )
464
+
457
465
  # Offload the synchronous message_func to a separate thread
458
466
  streaming_interface.stream_start()
459
467
  task = asyncio.create_task(
@@ -22,7 +22,7 @@ def get_all_orgs(
22
22
  Get a list of all orgs in the database
23
23
  """
24
24
  try:
25
- next_cursor, orgs = server.ms.list_organizations(cursor=cursor, limit=limit)
25
+ next_cursor, orgs = server.organization_manager.list_organizations(cursor=cursor, limit=limit)
26
26
  except HTTPException:
27
27
  raise
28
28
  except Exception as e:
@@ -38,8 +38,7 @@ def create_org(
38
38
  """
39
39
  Create a new org in the database
40
40
  """
41
-
42
- org = server.create_organization(request)
41
+ org = server.organization_manager.create_organization(request)
43
42
  return org
44
43
 
45
44
 
@@ -50,10 +49,10 @@ def delete_org(
50
49
  ):
51
50
  # TODO make a soft deletion, instead of a hard deletion
52
51
  try:
53
- org = server.ms.get_organization(org_id=org_id)
52
+ org = server.organization_manager.get_organization_by_id(org_id=org_id)
54
53
  if org is None:
55
54
  raise HTTPException(status_code=404, detail=f"Organization does not exist")
56
- server.ms.delete_organization(org_id=org_id)
55
+ server.organization_manager.delete_organization(org_id=org_id)
57
56
  except HTTPException:
58
57
  raise
59
58
  except Exception as e:
letta/server/server.py CHANGED
@@ -44,6 +44,7 @@ from letta.log import get_logger
44
44
  from letta.memory import get_memory_functions
45
45
  from letta.metadata import Base, MetadataStore
46
46
  from letta.o1_agent import O1Agent
47
+ from letta.orm.errors import NoResultFound
47
48
  from letta.prompts import gpt_system
48
49
  from letta.providers import (
49
50
  AnthropicProvider,
@@ -80,12 +81,12 @@ from letta.schemas.memory import (
80
81
  RecallMemorySummary,
81
82
  )
82
83
  from letta.schemas.message import Message, MessageCreate, MessageRole, UpdateMessage
83
- from letta.schemas.organization import Organization, OrganizationCreate
84
84
  from letta.schemas.passage import Passage
85
85
  from letta.schemas.source import Source, SourceCreate, SourceUpdate
86
86
  from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
87
87
  from letta.schemas.usage import LettaUsageStatistics
88
88
  from letta.schemas.user import User, UserCreate
89
+ from letta.services.organization_manager import OrganizationManager
89
90
  from letta.utils import create_random_username, json_dumps, json_loads
90
91
 
91
92
  # from letta.llm_api_tools import openai_get_model_list, azure_openai_get_model_list, smart_urljoin
@@ -245,6 +246,9 @@ class SyncServer(Server):
245
246
  self.config = config
246
247
  self.ms = MetadataStore(self.config)
247
248
 
249
+ # Managers that interface with data models
250
+ self.organization_manager = OrganizationManager()
251
+
248
252
  # TODO: this should be removed
249
253
  # add global default tools (for admin)
250
254
  self.add_default_tools(module_name="base")
@@ -773,20 +777,6 @@ class SyncServer(Server):
773
777
 
774
778
  return user
775
779
 
776
- def create_organization(self, request: OrganizationCreate) -> Organization:
777
- """Create a new org using a config"""
778
- if not request.name:
779
- # auto-generate a name
780
- request.name = create_random_username()
781
- org = Organization(name=request.name)
782
- self.ms.create_organization(org)
783
- logger.info(f"Created new org from config: {org}")
784
-
785
- # add default for the org
786
- # TODO: add default data
787
-
788
- return org
789
-
790
780
  def create_agent(
791
781
  self,
792
782
  request: CreateAgent,
@@ -2125,18 +2115,13 @@ class SyncServer(Server):
2125
2115
 
2126
2116
  def get_default_user(self) -> User:
2127
2117
 
2128
- from letta.constants import (
2129
- DEFAULT_ORG_ID,
2130
- DEFAULT_ORG_NAME,
2131
- DEFAULT_USER_ID,
2132
- DEFAULT_USER_NAME,
2133
- )
2118
+ from letta.constants import DEFAULT_ORG_ID, DEFAULT_USER_ID, DEFAULT_USER_NAME
2134
2119
 
2135
2120
  # check if default org exists
2136
- default_org = self.ms.get_organization(DEFAULT_ORG_ID)
2137
- if not default_org:
2138
- org = Organization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID)
2139
- self.ms.create_organization(org)
2121
+ try:
2122
+ self.organization_manager.get_organization_by_id(DEFAULT_ORG_ID)
2123
+ except NoResultFound:
2124
+ self.organization_manager.create_default_organization()
2140
2125
 
2141
2126
  # check if default user exists
2142
2127
  try:
File without changes
@@ -0,0 +1,66 @@
1
+ from typing import List, Optional
2
+
3
+ from sqlalchemy.exc import NoResultFound
4
+
5
+ from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
6
+ from letta.orm.organization import Organization
7
+ from letta.schemas.organization import Organization as PydanticOrganization
8
+ from letta.utils import create_random_username
9
+
10
+
11
+ class OrganizationManager:
12
+ """Manager class to handle business logic related to Organizations."""
13
+
14
+ def __init__(self):
15
+ # This is probably horrible but we reuse this technique from metadata.py
16
+ # TODO: Please refactor this out
17
+ # I am currently working on a ORM refactor and would like to make a more minimal set of changes
18
+ # - Matt
19
+ from letta.server.server import db_context
20
+
21
+ self.session_maker = db_context
22
+
23
+ def get_organization_by_id(self, org_id: str) -> PydanticOrganization:
24
+ """Fetch an organization by ID."""
25
+ with self.session_maker() as session:
26
+ try:
27
+ organization = Organization.read(db_session=session, identifier=org_id)
28
+ return organization.to_pydantic()
29
+ except NoResultFound:
30
+ raise ValueError(f"Organization with id {org_id} not found.")
31
+
32
+ def create_organization(self, name: Optional[str] = None) -> PydanticOrganization:
33
+ """Create a new organization. If a name is provided, it is used, otherwise, a random one is generated."""
34
+ with self.session_maker() as session:
35
+ org = Organization(name=name if name else create_random_username())
36
+ org.create(session)
37
+ return org.to_pydantic()
38
+
39
+ def create_default_organization(self) -> PydanticOrganization:
40
+ """Create the default organization."""
41
+ with self.session_maker() as session:
42
+ org = Organization(name=DEFAULT_ORG_NAME)
43
+ org.id = DEFAULT_ORG_ID
44
+ org.create(session)
45
+ return org.to_pydantic()
46
+
47
+ def update_organization_name_using_id(self, org_id: str, name: Optional[str] = None) -> PydanticOrganization:
48
+ """Update an organization."""
49
+ with self.session_maker() as session:
50
+ organization = Organization.read(db_session=session, identifier=org_id)
51
+ if name:
52
+ organization.name = name
53
+ organization.update(session)
54
+ return organization.to_pydantic()
55
+
56
+ def delete_organization(self, org_id: str):
57
+ """Delete an organization by marking it as deleted."""
58
+ with self.session_maker() as session:
59
+ organization = Organization.read(db_session=session, identifier=org_id)
60
+ organization.delete(session)
61
+
62
+ def list_organizations(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[PydanticOrganization]:
63
+ """List organizations with pagination based on cursor (org_id) and limit."""
64
+ with self.session_maker() as session:
65
+ results = Organization.list(db_session=session, cursor=cursor, limit=limit)
66
+ return [org.to_pydantic() for org in results]