letta-nightly 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241023104105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +7 -2
- letta/agent_store/db.py +4 -2
- letta/cli/cli_config.py +2 -2
- letta/client/client.py +13 -0
- letta/constants.py +4 -1
- letta/embeddings.py +34 -16
- letta/llm_api/azure_openai.py +44 -4
- letta/llm_api/helpers.py +45 -19
- letta/llm_api/openai.py +24 -5
- letta/metadata.py +1 -59
- letta/orm/__all__.py +0 -0
- letta/orm/__init__.py +0 -0
- letta/orm/base.py +75 -0
- letta/orm/enums.py +8 -0
- letta/orm/errors.py +2 -0
- letta/orm/mixins.py +40 -0
- letta/orm/organization.py +35 -0
- letta/orm/sqlalchemy_base.py +214 -0
- letta/schemas/organization.py +3 -3
- letta/server/rest_api/interface.py +245 -98
- letta/server/rest_api/routers/v1/agents.py +11 -3
- letta/server/rest_api/routers/v1/organizations.py +4 -5
- letta/server/server.py +10 -25
- letta/services/__init__.py +0 -0
- letta/services/organization_manager.py +66 -0
- letta/streaming_utils.py +270 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/METADATA +2 -1
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/RECORD +31 -22
- letta/base.py +0 -3
- letta/client/admin.py +0 -171
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/LICENSE +0 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/WHEEL +0 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241023104105.dist-info}/entry_points.txt +0 -0
|
@@ -8,6 +8,7 @@ from typing import AsyncGenerator, Literal, Optional, Union
|
|
|
8
8
|
|
|
9
9
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
10
10
|
from letta.interface import AgentInterface
|
|
11
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
11
12
|
from letta.schemas.enums import MessageStreamStatus
|
|
12
13
|
from letta.schemas.letta_message import (
|
|
13
14
|
AssistantMessage,
|
|
@@ -23,9 +24,14 @@ from letta.schemas.letta_message import (
|
|
|
23
24
|
from letta.schemas.message import Message
|
|
24
25
|
from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
|
|
25
26
|
from letta.streaming_interface import AgentChunkStreamingInterface
|
|
27
|
+
from letta.streaming_utils import (
|
|
28
|
+
FunctionArgumentsStreamHandler,
|
|
29
|
+
JSONInnerThoughtsExtractor,
|
|
30
|
+
)
|
|
26
31
|
from letta.utils import is_utc_datetime
|
|
27
32
|
|
|
28
33
|
|
|
34
|
+
# TODO strip from code / deprecate
|
|
29
35
|
class QueuingInterface(AgentInterface):
|
|
30
36
|
"""Messages are queued inside an internal buffer and manually flushed"""
|
|
31
37
|
|
|
@@ -248,58 +254,6 @@ class QueuingInterface(AgentInterface):
|
|
|
248
254
|
self._queue_push(message_api=new_message, message_obj=msg_obj)
|
|
249
255
|
|
|
250
256
|
|
|
251
|
-
class FunctionArgumentsStreamHandler:
|
|
252
|
-
"""State machine that can process a stream of"""
|
|
253
|
-
|
|
254
|
-
def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
|
|
255
|
-
self.json_key = json_key
|
|
256
|
-
self.reset()
|
|
257
|
-
|
|
258
|
-
def reset(self):
|
|
259
|
-
self.in_message = False
|
|
260
|
-
self.key_buffer = ""
|
|
261
|
-
self.accumulating = False
|
|
262
|
-
self.message_started = False
|
|
263
|
-
|
|
264
|
-
def process_json_chunk(self, chunk: str) -> Optional[str]:
|
|
265
|
-
"""Process a chunk from the function arguments and return the plaintext version"""
|
|
266
|
-
|
|
267
|
-
# Use strip to handle only leading and trailing whitespace in control structures
|
|
268
|
-
if self.accumulating:
|
|
269
|
-
clean_chunk = chunk.strip()
|
|
270
|
-
if self.json_key in self.key_buffer:
|
|
271
|
-
if ":" in clean_chunk:
|
|
272
|
-
self.in_message = True
|
|
273
|
-
self.accumulating = False
|
|
274
|
-
return None
|
|
275
|
-
self.key_buffer += clean_chunk
|
|
276
|
-
return None
|
|
277
|
-
|
|
278
|
-
if self.in_message:
|
|
279
|
-
if chunk.strip() == '"' and self.message_started:
|
|
280
|
-
self.in_message = False
|
|
281
|
-
self.message_started = False
|
|
282
|
-
return None
|
|
283
|
-
if not self.message_started and chunk.strip() == '"':
|
|
284
|
-
self.message_started = True
|
|
285
|
-
return None
|
|
286
|
-
if self.message_started:
|
|
287
|
-
if chunk.strip().endswith('"'):
|
|
288
|
-
self.in_message = False
|
|
289
|
-
return chunk.rstrip('"\n')
|
|
290
|
-
return chunk
|
|
291
|
-
|
|
292
|
-
if chunk.strip() == "{":
|
|
293
|
-
self.key_buffer = ""
|
|
294
|
-
self.accumulating = True
|
|
295
|
-
return None
|
|
296
|
-
if chunk.strip() == "}":
|
|
297
|
-
self.in_message = False
|
|
298
|
-
self.message_started = False
|
|
299
|
-
return None
|
|
300
|
-
return None
|
|
301
|
-
|
|
302
|
-
|
|
303
257
|
class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
304
258
|
"""Maintain a generator that is a proxy for self.process_chunk()
|
|
305
259
|
|
|
@@ -316,9 +270,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
316
270
|
def __init__(
|
|
317
271
|
self,
|
|
318
272
|
multi_step=True,
|
|
273
|
+
# Related to if we want to try and pass back the AssistantMessage as a special case function
|
|
319
274
|
use_assistant_message=False,
|
|
320
275
|
assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
|
|
321
276
|
assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
|
|
277
|
+
# Related to if we expect inner_thoughts to be in the kwargs
|
|
278
|
+
inner_thoughts_in_kwargs=True,
|
|
279
|
+
inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
|
|
322
280
|
):
|
|
323
281
|
# If streaming mode, ignores base interface calls like .assistant_message, etc
|
|
324
282
|
self.streaming_mode = False
|
|
@@ -346,10 +304,28 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
346
304
|
self.assistant_message_function_name = assistant_message_function_name
|
|
347
305
|
self.assistant_message_function_kwarg = assistant_message_function_kwarg
|
|
348
306
|
|
|
307
|
+
# Support for inner_thoughts_in_kwargs
|
|
308
|
+
self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
|
|
309
|
+
self.inner_thoughts_kwarg = inner_thoughts_kwarg
|
|
310
|
+
# A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
|
|
311
|
+
self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=inner_thoughts_kwarg, wait_for_first_key=True)
|
|
312
|
+
# Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
|
|
313
|
+
self.function_name_buffer = None
|
|
314
|
+
self.function_args_buffer = None
|
|
315
|
+
self.function_id_buffer = None
|
|
316
|
+
|
|
349
317
|
# extra prints
|
|
350
318
|
self.debug = False
|
|
351
319
|
self.timeout = 30
|
|
352
320
|
|
|
321
|
+
def _reset_inner_thoughts_json_reader(self):
|
|
322
|
+
# A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
|
|
323
|
+
self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
|
|
324
|
+
# Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
|
|
325
|
+
self.function_name_buffer = None
|
|
326
|
+
self.function_args_buffer = None
|
|
327
|
+
self.function_id_buffer = None
|
|
328
|
+
|
|
353
329
|
async def _create_generator(self) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
|
|
354
330
|
"""An asynchronous generator that yields chunks as they become available."""
|
|
355
331
|
while self._active:
|
|
@@ -365,16 +341,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
365
341
|
# Reset the event until a new item is pushed
|
|
366
342
|
self._event.clear()
|
|
367
343
|
|
|
368
|
-
# while self._active:
|
|
369
|
-
# # Wait until there is an item in the deque or the stream is deactivated
|
|
370
|
-
# await self._event.wait()
|
|
371
|
-
|
|
372
|
-
# while self._chunks:
|
|
373
|
-
# yield self._chunks.popleft()
|
|
374
|
-
|
|
375
|
-
# # Reset the event until a new item is pushed
|
|
376
|
-
# self._event.clear()
|
|
377
|
-
|
|
378
344
|
def get_generator(self) -> AsyncGenerator:
|
|
379
345
|
"""Get the generator that yields processed chunks."""
|
|
380
346
|
if not self._active:
|
|
@@ -419,17 +385,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
419
385
|
if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
|
|
420
386
|
self._push_to_buffer(self.multi_step_gen_indicator)
|
|
421
387
|
|
|
422
|
-
#
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
# if not self.multi_step:
|
|
426
|
-
# # end the stream
|
|
427
|
-
# self._active = False
|
|
428
|
-
# self._event.set() # Unblock the generator if it's waiting to allow it to complete
|
|
429
|
-
# else:
|
|
430
|
-
# # signal that a new step has started in the stream
|
|
431
|
-
# self._chunks.append(self.multi_step_indicator)
|
|
432
|
-
# self._event.set() # Signal that new data is available
|
|
388
|
+
# Wipe the inner thoughts buffers
|
|
389
|
+
self._reset_inner_thoughts_json_reader()
|
|
433
390
|
|
|
434
391
|
def step_complete(self):
|
|
435
392
|
"""Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
|
|
@@ -441,10 +398,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
441
398
|
# signal that a new step has started in the stream
|
|
442
399
|
self._push_to_buffer(self.multi_step_indicator)
|
|
443
400
|
|
|
401
|
+
# Wipe the inner thoughts buffers
|
|
402
|
+
self._reset_inner_thoughts_json_reader()
|
|
403
|
+
|
|
444
404
|
def step_yield(self):
|
|
445
405
|
"""If multi_step, this is the true 'stream_end' function."""
|
|
446
|
-
# if self.multi_step:
|
|
447
|
-
# end the stream
|
|
448
406
|
self._active = False
|
|
449
407
|
self._event.set() # Unblock the generator if it's waiting to allow it to complete
|
|
450
408
|
|
|
@@ -479,8 +437,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
479
437
|
elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
|
480
438
|
tool_call = message_delta.tool_calls[0]
|
|
481
439
|
|
|
440
|
+
# TODO(charles) merge into logic for internal_monologue
|
|
482
441
|
# special case for trapping `send_message`
|
|
483
442
|
if self.use_assistant_message and tool_call.function:
|
|
443
|
+
if self.inner_thoughts_in_kwargs:
|
|
444
|
+
raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
484
445
|
|
|
485
446
|
# If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
|
|
486
447
|
|
|
@@ -538,6 +499,204 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
538
499
|
),
|
|
539
500
|
)
|
|
540
501
|
|
|
502
|
+
elif self.inner_thoughts_in_kwargs and tool_call.function:
|
|
503
|
+
if self.use_assistant_message:
|
|
504
|
+
raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
505
|
+
|
|
506
|
+
processed_chunk = None
|
|
507
|
+
|
|
508
|
+
if tool_call.function.name:
|
|
509
|
+
# If we're waiting for the first key, then we should hold back the name
|
|
510
|
+
# ie add it to a buffer instead of returning it as a chunk
|
|
511
|
+
if self.function_name_buffer is None:
|
|
512
|
+
self.function_name_buffer = tool_call.function.name
|
|
513
|
+
else:
|
|
514
|
+
self.function_name_buffer += tool_call.function.name
|
|
515
|
+
|
|
516
|
+
if tool_call.id:
|
|
517
|
+
# Buffer until next time
|
|
518
|
+
if self.function_id_buffer is None:
|
|
519
|
+
self.function_id_buffer = tool_call.id
|
|
520
|
+
else:
|
|
521
|
+
self.function_id_buffer += tool_call.id
|
|
522
|
+
|
|
523
|
+
if tool_call.function.arguments:
|
|
524
|
+
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
|
525
|
+
|
|
526
|
+
# If we have inner thoughts, we should output them as a chunk
|
|
527
|
+
if updates_inner_thoughts:
|
|
528
|
+
processed_chunk = InternalMonologue(
|
|
529
|
+
id=message_id,
|
|
530
|
+
date=message_date,
|
|
531
|
+
internal_monologue=updates_inner_thoughts,
|
|
532
|
+
)
|
|
533
|
+
# Additionally inner thoughts may stream back with a chunk of main JSON
|
|
534
|
+
# In that case, since we can only return a chunk at a time, we should buffer it
|
|
535
|
+
if updates_main_json:
|
|
536
|
+
if self.function_args_buffer is None:
|
|
537
|
+
self.function_args_buffer = updates_main_json
|
|
538
|
+
else:
|
|
539
|
+
self.function_args_buffer += updates_main_json
|
|
540
|
+
|
|
541
|
+
# If we have main_json, we should output a FunctionCallMessage
|
|
542
|
+
elif updates_main_json:
|
|
543
|
+
|
|
544
|
+
# If there's something in the function_name buffer, we should release it first
|
|
545
|
+
# NOTE: we could output it as part of a chunk that has both name and args,
|
|
546
|
+
# however the frontend may expect name first, then args, so to be
|
|
547
|
+
# safe we'll output name first in a separate chunk
|
|
548
|
+
if self.function_name_buffer:
|
|
549
|
+
processed_chunk = FunctionCallMessage(
|
|
550
|
+
id=message_id,
|
|
551
|
+
date=message_date,
|
|
552
|
+
function_call=FunctionCallDelta(
|
|
553
|
+
name=self.function_name_buffer,
|
|
554
|
+
arguments=None,
|
|
555
|
+
function_call_id=self.function_id_buffer,
|
|
556
|
+
),
|
|
557
|
+
)
|
|
558
|
+
# Clear the buffer
|
|
559
|
+
self.function_name_buffer = None
|
|
560
|
+
self.function_id_buffer = None
|
|
561
|
+
# Since we're clearing the name buffer, we should store
|
|
562
|
+
# any updates to the arguments inside a separate buffer
|
|
563
|
+
|
|
564
|
+
# Add any main_json updates to the arguments buffer
|
|
565
|
+
if self.function_args_buffer is None:
|
|
566
|
+
self.function_args_buffer = updates_main_json
|
|
567
|
+
else:
|
|
568
|
+
self.function_args_buffer += updates_main_json
|
|
569
|
+
|
|
570
|
+
# If there was nothing in the name buffer, we can proceed to
|
|
571
|
+
# output the arguments chunk as a FunctionCallMessage
|
|
572
|
+
else:
|
|
573
|
+
# There may be a buffer from a previous chunk, for example
|
|
574
|
+
# if the previous chunk had arguments but we needed to flush name
|
|
575
|
+
if self.function_args_buffer:
|
|
576
|
+
# In this case, we should release the buffer + new data at once
|
|
577
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
|
578
|
+
processed_chunk = FunctionCallMessage(
|
|
579
|
+
id=message_id,
|
|
580
|
+
date=message_date,
|
|
581
|
+
function_call=FunctionCallDelta(
|
|
582
|
+
name=None,
|
|
583
|
+
arguments=combined_chunk,
|
|
584
|
+
function_call_id=self.function_id_buffer,
|
|
585
|
+
),
|
|
586
|
+
)
|
|
587
|
+
# clear buffer
|
|
588
|
+
self.function_args_buffer = None
|
|
589
|
+
self.function_id_buffer = None
|
|
590
|
+
else:
|
|
591
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
|
592
|
+
processed_chunk = FunctionCallMessage(
|
|
593
|
+
id=message_id,
|
|
594
|
+
date=message_date,
|
|
595
|
+
function_call=FunctionCallDelta(
|
|
596
|
+
name=None,
|
|
597
|
+
arguments=updates_main_json,
|
|
598
|
+
function_call_id=self.function_id_buffer,
|
|
599
|
+
),
|
|
600
|
+
)
|
|
601
|
+
self.function_id_buffer = None
|
|
602
|
+
|
|
603
|
+
# # If there's something in the main_json buffer, we should add if to the arguments and release it together
|
|
604
|
+
# tool_call_delta = {}
|
|
605
|
+
# if tool_call.id:
|
|
606
|
+
# tool_call_delta["id"] = tool_call.id
|
|
607
|
+
# if tool_call.function:
|
|
608
|
+
# if tool_call.function.arguments:
|
|
609
|
+
# # tool_call_delta["arguments"] = tool_call.function.arguments
|
|
610
|
+
# # NOTE: using the stripped one
|
|
611
|
+
# tool_call_delta["arguments"] = updates_main_json
|
|
612
|
+
# # We use the buffered name
|
|
613
|
+
# if self.function_name_buffer:
|
|
614
|
+
# tool_call_delta["name"] = self.function_name_buffer
|
|
615
|
+
# # if tool_call.function.name:
|
|
616
|
+
# # tool_call_delta["name"] = tool_call.function.name
|
|
617
|
+
|
|
618
|
+
# processed_chunk = FunctionCallMessage(
|
|
619
|
+
# id=message_id,
|
|
620
|
+
# date=message_date,
|
|
621
|
+
# function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
|
|
622
|
+
# )
|
|
623
|
+
|
|
624
|
+
else:
|
|
625
|
+
processed_chunk = None
|
|
626
|
+
|
|
627
|
+
return processed_chunk
|
|
628
|
+
|
|
629
|
+
# # NOTE: this is a simplified version of the parsing code that:
|
|
630
|
+
# # (1) assumes that the inner_thoughts key will always come first
|
|
631
|
+
# # (2) assumes that there's no extra spaces in the stringified JSON
|
|
632
|
+
# # i.e., the prefix will look exactly like: "{\"variable\":\"}"
|
|
633
|
+
# if tool_call.function.arguments:
|
|
634
|
+
# self.function_args_buffer += tool_call.function.arguments
|
|
635
|
+
|
|
636
|
+
# # prefix_str = f'{{"\\"{self.inner_thoughts_kwarg}\\":\\"}}'
|
|
637
|
+
# prefix_str = f'{{"{self.inner_thoughts_kwarg}":'
|
|
638
|
+
# if self.function_args_buffer.startswith(prefix_str):
|
|
639
|
+
# print(f"Found prefix!!!: {self.function_args_buffer}")
|
|
640
|
+
# else:
|
|
641
|
+
# print(f"No prefix found: {self.function_args_buffer}")
|
|
642
|
+
|
|
643
|
+
# tool_call_delta = {}
|
|
644
|
+
# if tool_call.id:
|
|
645
|
+
# tool_call_delta["id"] = tool_call.id
|
|
646
|
+
# if tool_call.function:
|
|
647
|
+
# if tool_call.function.arguments:
|
|
648
|
+
# tool_call_delta["arguments"] = tool_call.function.arguments
|
|
649
|
+
# if tool_call.function.name:
|
|
650
|
+
# tool_call_delta["name"] = tool_call.function.name
|
|
651
|
+
|
|
652
|
+
# processed_chunk = FunctionCallMessage(
|
|
653
|
+
# id=message_id,
|
|
654
|
+
# date=message_date,
|
|
655
|
+
# function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
|
|
656
|
+
# )
|
|
657
|
+
|
|
658
|
+
# elif False and self.inner_thoughts_in_kwargs and tool_call.function:
|
|
659
|
+
# if self.use_assistant_message:
|
|
660
|
+
# raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
661
|
+
|
|
662
|
+
# if tool_call.function.arguments:
|
|
663
|
+
|
|
664
|
+
# Maintain a state machine to track if we're reading a key vs reading a value
|
|
665
|
+
# Technically we can we pre-key, post-key, pre-value, post-value
|
|
666
|
+
|
|
667
|
+
# for c in tool_call.function.arguments:
|
|
668
|
+
# if self.function_chunks_parsing_state == FunctionChunksParsingState.PRE_KEY:
|
|
669
|
+
# if c == '"':
|
|
670
|
+
# self.function_chunks_parsing_state = FunctionChunksParsingState.READING_KEY
|
|
671
|
+
# elif self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
|
|
672
|
+
# if c == '"':
|
|
673
|
+
# self.function_chunks_parsing_state = FunctionChunksParsingState.POST_KEY
|
|
674
|
+
|
|
675
|
+
# If we're reading a key:
|
|
676
|
+
# if self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
|
|
677
|
+
|
|
678
|
+
# We need to buffer the function arguments until we get complete keys
|
|
679
|
+
# We are reading stringified-JSON, so we need to check for keys in data that looks like:
|
|
680
|
+
# "arguments":"{\""
|
|
681
|
+
# "arguments":"inner"
|
|
682
|
+
# "arguments":"_th"
|
|
683
|
+
# "arguments":"ought"
|
|
684
|
+
# "arguments":"s"
|
|
685
|
+
# "arguments":"\":\""
|
|
686
|
+
|
|
687
|
+
# Once we get a complete key, check if the key matches
|
|
688
|
+
|
|
689
|
+
# If it does match, start processing the value (stringified-JSON string
|
|
690
|
+
# And with each new chunk, output it as a chunk of type InternalMonologue
|
|
691
|
+
|
|
692
|
+
# If the key doesn't match, then flush the buffer as a single FunctionCallMessage chunk
|
|
693
|
+
|
|
694
|
+
# If we're reading a value
|
|
695
|
+
|
|
696
|
+
# If we're reading the inner thoughts value, we output chunks of type InternalMonologue
|
|
697
|
+
|
|
698
|
+
# Otherwise, do simple chunks of FunctionCallMessage
|
|
699
|
+
|
|
541
700
|
else:
|
|
542
701
|
|
|
543
702
|
tool_call_delta = {}
|
|
@@ -563,7 +722,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
563
722
|
# skip if there's a finish
|
|
564
723
|
return None
|
|
565
724
|
else:
|
|
566
|
-
|
|
725
|
+
# Example case that would trigger here:
|
|
726
|
+
# id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
|
|
727
|
+
# choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
|
|
728
|
+
# created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
|
|
729
|
+
# model='gpt-4o-mini-2024-07-18'
|
|
730
|
+
# object='chat.completion.chunk'
|
|
731
|
+
warnings.warn(f"Couldn't find delta in chunk: {chunk}")
|
|
732
|
+
return None
|
|
567
733
|
|
|
568
734
|
return processed_chunk
|
|
569
735
|
|
|
@@ -663,6 +829,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
663
829
|
# "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
|
|
664
830
|
# "id": str(msg_obj.id) if msg_obj is not None else None,
|
|
665
831
|
# }
|
|
832
|
+
assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
|
|
666
833
|
processed_chunk = InternalMonologue(
|
|
667
834
|
id=msg_obj.id,
|
|
668
835
|
date=msg_obj.created_at,
|
|
@@ -676,18 +843,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
676
843
|
def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
677
844
|
"""Letta uses send_message"""
|
|
678
845
|
|
|
679
|
-
#
|
|
680
|
-
|
|
681
|
-
# # create a fake "chunk" of a stream
|
|
682
|
-
# processed_chunk = {
|
|
683
|
-
# "assistant_message": msg,
|
|
684
|
-
# "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
|
|
685
|
-
# "id": str(msg_obj.id) if msg_obj is not None else None,
|
|
686
|
-
# }
|
|
687
|
-
|
|
688
|
-
# self._chunks.append(processed_chunk)
|
|
689
|
-
# self._event.set() # Signal that new data is available
|
|
690
|
-
|
|
846
|
+
# NOTE: this is a no-op, we handle this special case in function_message instead
|
|
691
847
|
return
|
|
692
848
|
|
|
693
849
|
def function_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
@@ -699,6 +855,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
699
855
|
if msg.startswith("Running "):
|
|
700
856
|
if not self.streaming_mode:
|
|
701
857
|
# create a fake "chunk" of a stream
|
|
858
|
+
assert msg_obj.tool_calls is not None and len(msg_obj.tool_calls) > 0, "Function call required for function_message"
|
|
702
859
|
function_call = msg_obj.tool_calls[0]
|
|
703
860
|
|
|
704
861
|
if self.nonstreaming_legacy_mode:
|
|
@@ -784,13 +941,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
784
941
|
return
|
|
785
942
|
else:
|
|
786
943
|
return
|
|
787
|
-
# msg = msg.replace("Running ", "")
|
|
788
|
-
# new_message = {"function_call": msg}
|
|
789
944
|
|
|
790
945
|
elif msg.startswith("Ran "):
|
|
791
946
|
return
|
|
792
|
-
# msg = msg.replace("Ran ", "Function call returned: ")
|
|
793
|
-
# new_message = {"function_call": msg}
|
|
794
947
|
|
|
795
948
|
elif msg.startswith("Success: "):
|
|
796
949
|
msg = msg.replace("Success: ", "")
|
|
@@ -821,10 +974,4 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
821
974
|
raise ValueError(msg)
|
|
822
975
|
new_message = {"function_message": msg}
|
|
823
976
|
|
|
824
|
-
# add extra metadata
|
|
825
|
-
# if msg_obj is not None:
|
|
826
|
-
# new_message["id"] = str(msg_obj.id)
|
|
827
|
-
# assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
|
|
828
|
-
# new_message["date"] = msg_obj.created_at.isoformat()
|
|
829
|
-
|
|
830
977
|
self._push_to_buffer(new_message)
|
|
@@ -430,9 +430,6 @@ async def send_message_to_agent(
|
|
|
430
430
|
# Get the generator object off of the agent's streaming interface
|
|
431
431
|
# This will be attached to the POST SSE request used under-the-hood
|
|
432
432
|
letta_agent = server._get_or_load_agent(agent_id=agent_id)
|
|
433
|
-
streaming_interface = letta_agent.interface
|
|
434
|
-
if not isinstance(streaming_interface, StreamingServerInterface):
|
|
435
|
-
raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
|
|
436
433
|
|
|
437
434
|
# Disable token streaming if not OpenAI
|
|
438
435
|
# TODO: cleanup this logic
|
|
@@ -441,6 +438,12 @@ async def send_message_to_agent(
|
|
|
441
438
|
print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
|
|
442
439
|
stream_tokens = False
|
|
443
440
|
|
|
441
|
+
# Create a new interface per request
|
|
442
|
+
letta_agent.interface = StreamingServerInterface()
|
|
443
|
+
streaming_interface = letta_agent.interface
|
|
444
|
+
if not isinstance(streaming_interface, StreamingServerInterface):
|
|
445
|
+
raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
|
|
446
|
+
|
|
444
447
|
# Enable token-streaming within the request if desired
|
|
445
448
|
streaming_interface.streaming_mode = stream_tokens
|
|
446
449
|
# "chatcompletion mode" does some remapping and ignores inner thoughts
|
|
@@ -454,6 +457,11 @@ async def send_message_to_agent(
|
|
|
454
457
|
streaming_interface.assistant_message_function_name = assistant_message_function_name
|
|
455
458
|
streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
|
|
456
459
|
|
|
460
|
+
# Related to JSON buffer reader
|
|
461
|
+
streaming_interface.inner_thoughts_in_kwargs = (
|
|
462
|
+
llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
|
|
463
|
+
)
|
|
464
|
+
|
|
457
465
|
# Offload the synchronous message_func to a separate thread
|
|
458
466
|
streaming_interface.stream_start()
|
|
459
467
|
task = asyncio.create_task(
|
|
@@ -22,7 +22,7 @@ def get_all_orgs(
|
|
|
22
22
|
Get a list of all orgs in the database
|
|
23
23
|
"""
|
|
24
24
|
try:
|
|
25
|
-
next_cursor, orgs = server.
|
|
25
|
+
next_cursor, orgs = server.organization_manager.list_organizations(cursor=cursor, limit=limit)
|
|
26
26
|
except HTTPException:
|
|
27
27
|
raise
|
|
28
28
|
except Exception as e:
|
|
@@ -38,8 +38,7 @@ def create_org(
|
|
|
38
38
|
"""
|
|
39
39
|
Create a new org in the database
|
|
40
40
|
"""
|
|
41
|
-
|
|
42
|
-
org = server.create_organization(request)
|
|
41
|
+
org = server.organization_manager.create_organization(request)
|
|
43
42
|
return org
|
|
44
43
|
|
|
45
44
|
|
|
@@ -50,10 +49,10 @@ def delete_org(
|
|
|
50
49
|
):
|
|
51
50
|
# TODO make a soft deletion, instead of a hard deletion
|
|
52
51
|
try:
|
|
53
|
-
org = server.
|
|
52
|
+
org = server.organization_manager.get_organization_by_id(org_id=org_id)
|
|
54
53
|
if org is None:
|
|
55
54
|
raise HTTPException(status_code=404, detail=f"Organization does not exist")
|
|
56
|
-
server.
|
|
55
|
+
server.organization_manager.delete_organization(org_id=org_id)
|
|
57
56
|
except HTTPException:
|
|
58
57
|
raise
|
|
59
58
|
except Exception as e:
|
letta/server/server.py
CHANGED
|
@@ -44,6 +44,7 @@ from letta.log import get_logger
|
|
|
44
44
|
from letta.memory import get_memory_functions
|
|
45
45
|
from letta.metadata import Base, MetadataStore
|
|
46
46
|
from letta.o1_agent import O1Agent
|
|
47
|
+
from letta.orm.errors import NoResultFound
|
|
47
48
|
from letta.prompts import gpt_system
|
|
48
49
|
from letta.providers import (
|
|
49
50
|
AnthropicProvider,
|
|
@@ -80,12 +81,12 @@ from letta.schemas.memory import (
|
|
|
80
81
|
RecallMemorySummary,
|
|
81
82
|
)
|
|
82
83
|
from letta.schemas.message import Message, MessageCreate, MessageRole, UpdateMessage
|
|
83
|
-
from letta.schemas.organization import Organization, OrganizationCreate
|
|
84
84
|
from letta.schemas.passage import Passage
|
|
85
85
|
from letta.schemas.source import Source, SourceCreate, SourceUpdate
|
|
86
86
|
from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
|
|
87
87
|
from letta.schemas.usage import LettaUsageStatistics
|
|
88
88
|
from letta.schemas.user import User, UserCreate
|
|
89
|
+
from letta.services.organization_manager import OrganizationManager
|
|
89
90
|
from letta.utils import create_random_username, json_dumps, json_loads
|
|
90
91
|
|
|
91
92
|
# from letta.llm_api_tools import openai_get_model_list, azure_openai_get_model_list, smart_urljoin
|
|
@@ -245,6 +246,9 @@ class SyncServer(Server):
|
|
|
245
246
|
self.config = config
|
|
246
247
|
self.ms = MetadataStore(self.config)
|
|
247
248
|
|
|
249
|
+
# Managers that interface with data models
|
|
250
|
+
self.organization_manager = OrganizationManager()
|
|
251
|
+
|
|
248
252
|
# TODO: this should be removed
|
|
249
253
|
# add global default tools (for admin)
|
|
250
254
|
self.add_default_tools(module_name="base")
|
|
@@ -773,20 +777,6 @@ class SyncServer(Server):
|
|
|
773
777
|
|
|
774
778
|
return user
|
|
775
779
|
|
|
776
|
-
def create_organization(self, request: OrganizationCreate) -> Organization:
|
|
777
|
-
"""Create a new org using a config"""
|
|
778
|
-
if not request.name:
|
|
779
|
-
# auto-generate a name
|
|
780
|
-
request.name = create_random_username()
|
|
781
|
-
org = Organization(name=request.name)
|
|
782
|
-
self.ms.create_organization(org)
|
|
783
|
-
logger.info(f"Created new org from config: {org}")
|
|
784
|
-
|
|
785
|
-
# add default for the org
|
|
786
|
-
# TODO: add default data
|
|
787
|
-
|
|
788
|
-
return org
|
|
789
|
-
|
|
790
780
|
def create_agent(
|
|
791
781
|
self,
|
|
792
782
|
request: CreateAgent,
|
|
@@ -2125,18 +2115,13 @@ class SyncServer(Server):
|
|
|
2125
2115
|
|
|
2126
2116
|
def get_default_user(self) -> User:
|
|
2127
2117
|
|
|
2128
|
-
from letta.constants import
|
|
2129
|
-
DEFAULT_ORG_ID,
|
|
2130
|
-
DEFAULT_ORG_NAME,
|
|
2131
|
-
DEFAULT_USER_ID,
|
|
2132
|
-
DEFAULT_USER_NAME,
|
|
2133
|
-
)
|
|
2118
|
+
from letta.constants import DEFAULT_ORG_ID, DEFAULT_USER_ID, DEFAULT_USER_NAME
|
|
2134
2119
|
|
|
2135
2120
|
# check if default org exists
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
self.
|
|
2121
|
+
try:
|
|
2122
|
+
self.organization_manager.get_organization_by_id(DEFAULT_ORG_ID)
|
|
2123
|
+
except NoResultFound:
|
|
2124
|
+
self.organization_manager.create_default_organization()
|
|
2140
2125
|
|
|
2141
2126
|
# check if default user exists
|
|
2142
2127
|
try:
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from sqlalchemy.exc import NoResultFound
|
|
4
|
+
|
|
5
|
+
from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
|
|
6
|
+
from letta.orm.organization import Organization
|
|
7
|
+
from letta.schemas.organization import Organization as PydanticOrganization
|
|
8
|
+
from letta.utils import create_random_username
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OrganizationManager:
|
|
12
|
+
"""Manager class to handle business logic related to Organizations."""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
# This is probably horrible but we reuse this technique from metadata.py
|
|
16
|
+
# TODO: Please refactor this out
|
|
17
|
+
# I am currently working on a ORM refactor and would like to make a more minimal set of changes
|
|
18
|
+
# - Matt
|
|
19
|
+
from letta.server.server import db_context
|
|
20
|
+
|
|
21
|
+
self.session_maker = db_context
|
|
22
|
+
|
|
23
|
+
def get_organization_by_id(self, org_id: str) -> PydanticOrganization:
|
|
24
|
+
"""Fetch an organization by ID."""
|
|
25
|
+
with self.session_maker() as session:
|
|
26
|
+
try:
|
|
27
|
+
organization = Organization.read(db_session=session, identifier=org_id)
|
|
28
|
+
return organization.to_pydantic()
|
|
29
|
+
except NoResultFound:
|
|
30
|
+
raise ValueError(f"Organization with id {org_id} not found.")
|
|
31
|
+
|
|
32
|
+
def create_organization(self, name: Optional[str] = None) -> PydanticOrganization:
|
|
33
|
+
"""Create a new organization. If a name is provided, it is used, otherwise, a random one is generated."""
|
|
34
|
+
with self.session_maker() as session:
|
|
35
|
+
org = Organization(name=name if name else create_random_username())
|
|
36
|
+
org.create(session)
|
|
37
|
+
return org.to_pydantic()
|
|
38
|
+
|
|
39
|
+
def create_default_organization(self) -> PydanticOrganization:
|
|
40
|
+
"""Create the default organization."""
|
|
41
|
+
with self.session_maker() as session:
|
|
42
|
+
org = Organization(name=DEFAULT_ORG_NAME)
|
|
43
|
+
org.id = DEFAULT_ORG_ID
|
|
44
|
+
org.create(session)
|
|
45
|
+
return org.to_pydantic()
|
|
46
|
+
|
|
47
|
+
def update_organization_name_using_id(self, org_id: str, name: Optional[str] = None) -> PydanticOrganization:
|
|
48
|
+
"""Update an organization."""
|
|
49
|
+
with self.session_maker() as session:
|
|
50
|
+
organization = Organization.read(db_session=session, identifier=org_id)
|
|
51
|
+
if name:
|
|
52
|
+
organization.name = name
|
|
53
|
+
organization.update(session)
|
|
54
|
+
return organization.to_pydantic()
|
|
55
|
+
|
|
56
|
+
def delete_organization(self, org_id: str):
|
|
57
|
+
"""Delete an organization by marking it as deleted."""
|
|
58
|
+
with self.session_maker() as session:
|
|
59
|
+
organization = Organization.read(db_session=session, identifier=org_id)
|
|
60
|
+
organization.delete(session)
|
|
61
|
+
|
|
62
|
+
def list_organizations(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[PydanticOrganization]:
|
|
63
|
+
"""List organizations with pagination based on cursor (org_id) and limit."""
|
|
64
|
+
with self.session_maker() as session:
|
|
65
|
+
results = Organization.list(db_session=session, cursor=cursor, limit=limit)
|
|
66
|
+
return [org.to_pydantic() for org in results]
|