letta-nightly 0.6.34.dev20250303104329__py3-none-any.whl → 0.6.35.dev20250304104154__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +40 -15
- letta/agents/__init__.py +0 -0
- letta/agents/base_agent.py +51 -0
- letta/agents/ephemeral_agent.py +72 -0
- letta/agents/low_latency_agent.py +315 -0
- letta/constants.py +3 -1
- letta/functions/ast_parsers.py +50 -1
- letta/functions/helpers.py +79 -2
- letta/functions/schema_generator.py +3 -0
- letta/helpers/converters.py +3 -3
- letta/interfaces/__init__.py +0 -0
- letta/interfaces/openai_chat_completions_streaming_interface.py +109 -0
- letta/interfaces/utils.py +11 -0
- letta/llm_api/anthropic.py +9 -1
- letta/llm_api/azure_openai.py +3 -0
- letta/llm_api/google_ai.py +3 -0
- letta/llm_api/google_vertex.py +4 -0
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/openai.py +6 -0
- letta/local_llm/chat_completion_proxy.py +6 -1
- letta/log.py +2 -2
- letta/orm/step.py +1 -0
- letta/orm/tool.py +1 -1
- letta/prompts/system/memgpt_convo_only.txt +3 -5
- letta/prompts/system/memgpt_memory_only.txt +29 -0
- letta/schemas/agent.py +0 -1
- letta/schemas/step.py +1 -1
- letta/schemas/tool.py +16 -2
- letta/server/rest_api/app.py +5 -1
- letta/server/rest_api/routers/v1/agents.py +32 -21
- letta/server/rest_api/routers/v1/identities.py +9 -1
- letta/server/rest_api/routers/v1/runs.py +49 -0
- letta/server/rest_api/routers/v1/tools.py +1 -0
- letta/server/rest_api/routers/v1/voice.py +19 -255
- letta/server/rest_api/utils.py +3 -2
- letta/server/server.py +15 -7
- letta/services/agent_manager.py +10 -6
- letta/services/helpers/agent_manager_helper.py +0 -2
- letta/services/helpers/tool_execution_helper.py +18 -0
- letta/services/job_manager.py +98 -0
- letta/services/step_manager.py +2 -0
- letta/services/summarizer/__init__.py +0 -0
- letta/services/summarizer/enums.py +9 -0
- letta/services/summarizer/summarizer.py +102 -0
- letta/services/tool_execution_sandbox.py +20 -3
- letta/services/tool_manager.py +1 -1
- letta/settings.py +2 -0
- letta/tracing.py +176 -156
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.35.dev20250304104154.dist-info}/METADATA +6 -5
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.35.dev20250304104154.dist-info}/RECORD +54 -44
- letta/chat_only_agent.py +0 -101
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.35.dev20250304104154.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.35.dev20250304104154.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.35.dev20250304104154.dist-info}/entry_points.txt +0 -0
letta/functions/helpers.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import threading
|
|
3
3
|
from random import uniform
|
|
4
|
-
from typing import Any, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
import humps
|
|
7
7
|
from composio.constants import DEFAULT_ENTITY_ID
|
|
8
|
-
from pydantic import BaseModel
|
|
8
|
+
from pydantic import BaseModel, Field, create_model
|
|
9
9
|
|
|
10
10
|
from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
11
11
|
from letta.functions.interface import MultiAgentMessagingInterface
|
|
@@ -561,3 +561,80 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
|
|
|
561
561
|
|
|
562
562
|
log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
|
|
563
563
|
return final
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def generate_model_from_args_json_schema(schema: Dict[str, Any]) -> Type[BaseModel]:
|
|
567
|
+
"""Creates a Pydantic model from a JSON schema.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
schema: The JSON schema dictionary
|
|
571
|
+
|
|
572
|
+
Returns:
|
|
573
|
+
A Pydantic model class
|
|
574
|
+
"""
|
|
575
|
+
# First create any nested models from $defs in reverse order to handle dependencies
|
|
576
|
+
nested_models = {}
|
|
577
|
+
if "$defs" in schema:
|
|
578
|
+
for name, model_schema in reversed(list(schema.get("$defs", {}).items())):
|
|
579
|
+
nested_models[name] = _create_model_from_schema(name, model_schema, nested_models)
|
|
580
|
+
|
|
581
|
+
# Create and return the main model
|
|
582
|
+
return _create_model_from_schema(schema.get("title", "DynamicModel"), schema, nested_models)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def _create_model_from_schema(name: str, model_schema: Dict[str, Any], nested_models: Dict[str, Type[BaseModel]] = None) -> Type[BaseModel]:
|
|
586
|
+
fields = {}
|
|
587
|
+
for field_name, field_schema in model_schema["properties"].items():
|
|
588
|
+
field_type = _get_field_type(field_schema, nested_models)
|
|
589
|
+
required = field_name in model_schema.get("required", [])
|
|
590
|
+
description = field_schema.get("description", "") # Get description or empty string
|
|
591
|
+
fields[field_name] = (field_type, Field(..., description=description) if required else Field(None, description=description))
|
|
592
|
+
|
|
593
|
+
return create_model(name, **fields)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def _get_field_type(field_schema: Dict[str, Any], nested_models: Dict[str, Type[BaseModel]] = None) -> Any:
|
|
597
|
+
"""Helper to convert JSON schema types to Python types."""
|
|
598
|
+
if field_schema.get("type") == "string":
|
|
599
|
+
return str
|
|
600
|
+
elif field_schema.get("type") == "integer":
|
|
601
|
+
return int
|
|
602
|
+
elif field_schema.get("type") == "number":
|
|
603
|
+
return float
|
|
604
|
+
elif field_schema.get("type") == "boolean":
|
|
605
|
+
return bool
|
|
606
|
+
elif field_schema.get("type") == "array":
|
|
607
|
+
item_type = field_schema["items"].get("$ref", "").split("/")[-1]
|
|
608
|
+
if item_type and nested_models and item_type in nested_models:
|
|
609
|
+
return List[nested_models[item_type]]
|
|
610
|
+
return List[_get_field_type(field_schema["items"], nested_models)]
|
|
611
|
+
elif field_schema.get("type") == "object":
|
|
612
|
+
if "$ref" in field_schema:
|
|
613
|
+
ref_type = field_schema["$ref"].split("/")[-1]
|
|
614
|
+
if nested_models and ref_type in nested_models:
|
|
615
|
+
return nested_models[ref_type]
|
|
616
|
+
elif "additionalProperties" in field_schema:
|
|
617
|
+
value_type = _get_field_type(field_schema["additionalProperties"], nested_models)
|
|
618
|
+
return Dict[str, value_type]
|
|
619
|
+
return dict
|
|
620
|
+
elif field_schema.get("$ref") is not None:
|
|
621
|
+
ref_type = field_schema["$ref"].split("/")[-1]
|
|
622
|
+
if nested_models and ref_type in nested_models:
|
|
623
|
+
return nested_models[ref_type]
|
|
624
|
+
else:
|
|
625
|
+
raise ValueError(f"Reference {ref_type} not found in nested models")
|
|
626
|
+
elif field_schema.get("anyOf") is not None:
|
|
627
|
+
types = []
|
|
628
|
+
has_null = False
|
|
629
|
+
for type_option in field_schema["anyOf"]:
|
|
630
|
+
if type_option.get("type") == "null":
|
|
631
|
+
has_null = True
|
|
632
|
+
else:
|
|
633
|
+
types.append(_get_field_type(type_option, nested_models))
|
|
634
|
+
# If we have exactly one type and null, make it Optional
|
|
635
|
+
if has_null and len(types) == 1:
|
|
636
|
+
return Optional[types[0]]
|
|
637
|
+
# Otherwise make it a Union of all types
|
|
638
|
+
else:
|
|
639
|
+
return Union[tuple(types)]
|
|
640
|
+
raise ValueError(f"Unable to convert pydantic field schema to type: {field_schema}")
|
|
@@ -235,6 +235,9 @@ def pydantic_model_to_json_schema(model: Type[BaseModel]) -> dict:
|
|
|
235
235
|
if "description" not in prop:
|
|
236
236
|
raise ValueError(f"Property {prop} lacks a 'description' key")
|
|
237
237
|
|
|
238
|
+
if "type" not in prop and "$ref" in prop:
|
|
239
|
+
prop["type"] = "object"
|
|
240
|
+
|
|
238
241
|
# Handle the case where the property is a $ref to another model
|
|
239
242
|
if "$ref" in prop:
|
|
240
243
|
# Resolve the reference to the nested model
|
letta/helpers/converters.py
CHANGED
|
@@ -78,11 +78,11 @@ def deserialize_tool_rule(data: Dict) -> Union[ChildToolRule, InitToolRule, Term
|
|
|
78
78
|
"""Deserialize a dictionary to the appropriate ToolRule subclass based on 'type'."""
|
|
79
79
|
rule_type = ToolRuleType(data.get("type"))
|
|
80
80
|
|
|
81
|
-
if rule_type == ToolRuleType.run_first:
|
|
81
|
+
if rule_type == ToolRuleType.run_first or rule_type == ToolRuleType.InitToolRule:
|
|
82
82
|
return InitToolRule(**data)
|
|
83
|
-
elif rule_type == ToolRuleType.exit_loop:
|
|
83
|
+
elif rule_type == ToolRuleType.exit_loop or rule_type == ToolRuleType.TerminalToolRule:
|
|
84
84
|
return TerminalToolRule(**data)
|
|
85
|
-
elif rule_type == ToolRuleType.constrain_child_tools:
|
|
85
|
+
elif rule_type == ToolRuleType.constrain_child_tools or rule_type == ToolRuleType.ToolRule:
|
|
86
86
|
return ChildToolRule(**data)
|
|
87
87
|
elif rule_type == ToolRuleType.conditional:
|
|
88
88
|
return ConditionalToolRule(**data)
|
|
File without changes
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from openai import AsyncStream
|
|
4
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
|
|
5
|
+
|
|
6
|
+
from letta.constants import PRE_EXECUTION_MESSAGE_ARG
|
|
7
|
+
from letta.interfaces.utils import _format_sse_chunk
|
|
8
|
+
from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OpenAIChatCompletionsStreamingInterface:
|
|
12
|
+
"""
|
|
13
|
+
Encapsulates the logic for streaming responses from OpenAI.
|
|
14
|
+
This class handles parsing of partial tokens, pre-execution messages,
|
|
15
|
+
and detection of tool call events.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, stream_pre_execution_message: bool = True):
|
|
19
|
+
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
|
20
|
+
self.stream_pre_execution_message: bool = stream_pre_execution_message
|
|
21
|
+
|
|
22
|
+
self.current_parsed_json_result: Dict[str, Any] = {}
|
|
23
|
+
self.content_buffer: List[str] = []
|
|
24
|
+
self.tool_call_happened: bool = False
|
|
25
|
+
self.finish_reason_stop: bool = False
|
|
26
|
+
|
|
27
|
+
self.tool_call_name: Optional[str] = None
|
|
28
|
+
self.tool_call_args_str: str = ""
|
|
29
|
+
self.tool_call_id: Optional[str] = None
|
|
30
|
+
|
|
31
|
+
async def process(self, stream: AsyncStream[ChatCompletionChunk]) -> AsyncGenerator[str, None]:
|
|
32
|
+
"""
|
|
33
|
+
Iterates over the OpenAI stream, yielding SSE events.
|
|
34
|
+
It also collects tokens and detects if a tool call is triggered.
|
|
35
|
+
"""
|
|
36
|
+
async with stream:
|
|
37
|
+
async for chunk in stream:
|
|
38
|
+
choice = chunk.choices[0]
|
|
39
|
+
delta = choice.delta
|
|
40
|
+
finish_reason = choice.finish_reason
|
|
41
|
+
|
|
42
|
+
async for sse_chunk in self._process_content(delta, chunk):
|
|
43
|
+
yield sse_chunk
|
|
44
|
+
|
|
45
|
+
async for sse_chunk in self._process_tool_calls(delta, chunk):
|
|
46
|
+
yield sse_chunk
|
|
47
|
+
|
|
48
|
+
if self._handle_finish_reason(finish_reason):
|
|
49
|
+
break
|
|
50
|
+
|
|
51
|
+
async def _process_content(self, delta: ChoiceDelta, chunk: ChatCompletionChunk) -> AsyncGenerator[str, None]:
|
|
52
|
+
"""Processes regular content tokens and streams them."""
|
|
53
|
+
if delta.content:
|
|
54
|
+
self.content_buffer.append(delta.content)
|
|
55
|
+
yield _format_sse_chunk(chunk)
|
|
56
|
+
|
|
57
|
+
async def _process_tool_calls(self, delta: ChoiceDelta, chunk: ChatCompletionChunk) -> AsyncGenerator[str, None]:
|
|
58
|
+
"""Handles tool call initiation and streaming of pre-execution messages."""
|
|
59
|
+
if not delta.tool_calls:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
tool_call = delta.tool_calls[0]
|
|
63
|
+
self._update_tool_call_info(tool_call)
|
|
64
|
+
|
|
65
|
+
if self.stream_pre_execution_message and tool_call.function.arguments:
|
|
66
|
+
self.tool_call_args_str += tool_call.function.arguments
|
|
67
|
+
async for sse_chunk in self._stream_pre_execution_message(chunk, tool_call):
|
|
68
|
+
yield sse_chunk
|
|
69
|
+
|
|
70
|
+
def _update_tool_call_info(self, tool_call: Any) -> None:
|
|
71
|
+
"""Updates tool call-related attributes."""
|
|
72
|
+
if tool_call.function.name:
|
|
73
|
+
self.tool_call_name = tool_call.function.name
|
|
74
|
+
if tool_call.id:
|
|
75
|
+
self.tool_call_id = tool_call.id
|
|
76
|
+
|
|
77
|
+
async def _stream_pre_execution_message(self, chunk: ChatCompletionChunk, tool_call: Any) -> AsyncGenerator[str, None]:
|
|
78
|
+
"""Parses and streams pre-execution messages if they have changed."""
|
|
79
|
+
parsed_args = self.optimistic_json_parser.parse(self.tool_call_args_str)
|
|
80
|
+
|
|
81
|
+
if parsed_args.get(PRE_EXECUTION_MESSAGE_ARG) and self.current_parsed_json_result.get(PRE_EXECUTION_MESSAGE_ARG) != parsed_args.get(
|
|
82
|
+
PRE_EXECUTION_MESSAGE_ARG
|
|
83
|
+
):
|
|
84
|
+
if parsed_args != self.current_parsed_json_result:
|
|
85
|
+
self.current_parsed_json_result = parsed_args
|
|
86
|
+
synthetic_chunk = ChatCompletionChunk(
|
|
87
|
+
id=chunk.id,
|
|
88
|
+
object=chunk.object,
|
|
89
|
+
created=chunk.created,
|
|
90
|
+
model=chunk.model,
|
|
91
|
+
choices=[
|
|
92
|
+
Choice(
|
|
93
|
+
index=0,
|
|
94
|
+
delta=ChoiceDelta(content=tool_call.function.arguments, role="assistant"),
|
|
95
|
+
finish_reason=None,
|
|
96
|
+
)
|
|
97
|
+
],
|
|
98
|
+
)
|
|
99
|
+
yield _format_sse_chunk(synthetic_chunk)
|
|
100
|
+
|
|
101
|
+
def _handle_finish_reason(self, finish_reason: Optional[str]) -> bool:
|
|
102
|
+
"""Handles the finish reason and determines if streaming should stop."""
|
|
103
|
+
if finish_reason == "tool_calls":
|
|
104
|
+
self.tool_call_happened = True
|
|
105
|
+
return True
|
|
106
|
+
if finish_reason == "stop":
|
|
107
|
+
self.finish_reason_stop = True
|
|
108
|
+
return True
|
|
109
|
+
return False
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from openai.types.chat import ChatCompletionChunk
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _format_sse_error(error_payload: dict) -> str:
|
|
7
|
+
return f"data: {json.dumps(error_payload)}\n\n"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _format_sse_chunk(chunk: ChatCompletionChunk) -> str:
|
|
11
|
+
return f"data: {chunk.model_dump_json()}\n\n"
|
letta/llm_api/anthropic.py
CHANGED
|
@@ -40,6 +40,7 @@ from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall
|
|
|
40
40
|
from letta.services.provider_manager import ProviderManager
|
|
41
41
|
from letta.settings import model_settings
|
|
42
42
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
|
43
|
+
from letta.tracing import log_event
|
|
43
44
|
|
|
44
45
|
BASE_URL = "https://api.anthropic.com/v1"
|
|
45
46
|
|
|
@@ -677,10 +678,12 @@ def anthropic_chat_completions_request(
|
|
|
677
678
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
678
679
|
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
679
680
|
)
|
|
681
|
+
log_event(name="llm_request_sent", attributes=data)
|
|
680
682
|
response = anthropic_client.beta.messages.create(
|
|
681
683
|
**data,
|
|
682
684
|
betas=betas,
|
|
683
685
|
)
|
|
686
|
+
log_event(name="llm_response_received", attributes={"response": response.json()})
|
|
684
687
|
return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
|
|
685
688
|
|
|
686
689
|
|
|
@@ -698,8 +701,9 @@ def anthropic_bedrock_chat_completions_request(
|
|
|
698
701
|
try:
|
|
699
702
|
# bedrock does not support certain args
|
|
700
703
|
data["tool_choice"] = {"type": "any"}
|
|
701
|
-
|
|
704
|
+
log_event(name="llm_request_sent", attributes=data)
|
|
702
705
|
response = client.messages.create(**data)
|
|
706
|
+
log_event(name="llm_response_received", attributes={"response": response.json()})
|
|
703
707
|
return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
|
|
704
708
|
except PermissionDeniedError:
|
|
705
709
|
raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}")
|
|
@@ -839,6 +843,8 @@ def anthropic_chat_completions_process_stream(
|
|
|
839
843
|
),
|
|
840
844
|
)
|
|
841
845
|
|
|
846
|
+
log_event(name="llm_request_sent", attributes=chat_completion_request.model_dump())
|
|
847
|
+
|
|
842
848
|
if stream_interface:
|
|
843
849
|
stream_interface.stream_start()
|
|
844
850
|
|
|
@@ -987,4 +993,6 @@ def anthropic_chat_completions_process_stream(
|
|
|
987
993
|
|
|
988
994
|
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
|
989
995
|
|
|
996
|
+
log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
|
|
997
|
+
|
|
990
998
|
return chat_completion_response
|
letta/llm_api/azure_openai.py
CHANGED
|
@@ -8,6 +8,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
|
8
8
|
from letta.schemas.openai.chat_completions import ChatCompletionRequest
|
|
9
9
|
from letta.schemas.openai.embedding_response import EmbeddingResponse
|
|
10
10
|
from letta.settings import ModelSettings
|
|
11
|
+
from letta.tracing import log_event
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
|
|
@@ -120,10 +121,12 @@ def azure_openai_chat_completions_request(
|
|
|
120
121
|
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
|
121
122
|
|
|
122
123
|
url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.azure_api_version)
|
|
124
|
+
log_event(name="llm_request_sent", attributes=data)
|
|
123
125
|
response_json = make_post_request(url, headers, data)
|
|
124
126
|
# NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
|
|
125
127
|
if "content" not in response_json["choices"][0].get("message"):
|
|
126
128
|
response_json["choices"][0]["message"]["content"] = None
|
|
129
|
+
log_event(name="llm_response_received", attributes=response_json)
|
|
127
130
|
response = ChatCompletionResponse(**response_json) # convert to 'dot-dict' style which is the openai python client default
|
|
128
131
|
return response
|
|
129
132
|
|
letta/llm_api/google_ai.py
CHANGED
|
@@ -11,6 +11,7 @@ from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
|
11
11
|
from letta.local_llm.utils import count_tokens
|
|
12
12
|
from letta.schemas.openai.chat_completion_request import Tool
|
|
13
13
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
|
14
|
+
from letta.tracing import log_event
|
|
14
15
|
from letta.utils import get_tool_call_id
|
|
15
16
|
|
|
16
17
|
|
|
@@ -422,7 +423,9 @@ def google_ai_chat_completions_request(
|
|
|
422
423
|
if add_postfunc_model_messages:
|
|
423
424
|
data["contents"] = add_dummy_model_messages(data["contents"])
|
|
424
425
|
|
|
426
|
+
log_event(name="llm_request_sent", attributes=data)
|
|
425
427
|
response_json = make_post_request(url, headers, data)
|
|
428
|
+
log_event(name="llm_response_received", attributes=response_json)
|
|
426
429
|
try:
|
|
427
430
|
return convert_google_ai_response_to_chatcompletion(
|
|
428
431
|
response_json=response_json,
|
letta/llm_api/google_vertex.py
CHANGED
|
@@ -8,6 +8,7 @@ from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
|
8
8
|
from letta.local_llm.utils import count_tokens
|
|
9
9
|
from letta.schemas.openai.chat_completion_request import Tool
|
|
10
10
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
|
11
|
+
from letta.tracing import log_event
|
|
11
12
|
from letta.utils import get_tool_call_id
|
|
12
13
|
|
|
13
14
|
|
|
@@ -323,6 +324,9 @@ def google_vertex_chat_completions_request(
|
|
|
323
324
|
config["tool_config"] = tool_config.model_dump()
|
|
324
325
|
|
|
325
326
|
# make request to client
|
|
327
|
+
attributes = config if isinstance(config, dict) else {"config": config}
|
|
328
|
+
attributes.update({"contents": contents})
|
|
329
|
+
log_event(name="llm_request_sent", attributes={"contents": contents, "config": config})
|
|
326
330
|
response = client.models.generate_content(
|
|
327
331
|
model=model,
|
|
328
332
|
contents=contents,
|
letta/llm_api/llm_api_tools.py
CHANGED
letta/llm_api/openai.py
CHANGED
|
@@ -25,6 +25,7 @@ from letta.schemas.openai.chat_completion_response import (
|
|
|
25
25
|
)
|
|
26
26
|
from letta.schemas.openai.embedding_response import EmbeddingResponse
|
|
27
27
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
|
28
|
+
from letta.tracing import log_event
|
|
28
29
|
from letta.utils import get_tool_call_id, smart_urljoin
|
|
29
30
|
|
|
30
31
|
logger = get_logger(__name__)
|
|
@@ -243,6 +244,8 @@ def openai_chat_completions_process_stream(
|
|
|
243
244
|
),
|
|
244
245
|
)
|
|
245
246
|
|
|
247
|
+
log_event(name="llm_request_sent", attributes=chat_completion_request.model_dump())
|
|
248
|
+
|
|
246
249
|
if stream_interface:
|
|
247
250
|
stream_interface.stream_start()
|
|
248
251
|
|
|
@@ -406,6 +409,7 @@ def openai_chat_completions_process_stream(
|
|
|
406
409
|
assert len(chat_completion_response.choices) > 0, f"No response from provider {chat_completion_response}"
|
|
407
410
|
|
|
408
411
|
# printd(chat_completion_response)
|
|
412
|
+
log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
|
|
409
413
|
return chat_completion_response
|
|
410
414
|
|
|
411
415
|
|
|
@@ -437,7 +441,9 @@ def openai_chat_completions_request(
|
|
|
437
441
|
"""
|
|
438
442
|
data = prepare_openai_payload(chat_completion_request)
|
|
439
443
|
client = OpenAI(api_key=api_key, base_url=url, max_retries=0)
|
|
444
|
+
log_event(name="llm_request_sent", attributes=data)
|
|
440
445
|
chat_completion = client.chat.completions.create(**data)
|
|
446
|
+
log_event(name="llm_response_received", attributes=chat_completion.model_dump())
|
|
441
447
|
return ChatCompletionResponse(**chat_completion.model_dump())
|
|
442
448
|
|
|
443
449
|
|
|
@@ -22,6 +22,7 @@ from letta.local_llm.webui.api import get_webui_completion
|
|
|
22
22
|
from letta.local_llm.webui.legacy_api import get_webui_completion as get_webui_completion_legacy
|
|
23
23
|
from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
|
|
24
24
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, Message, ToolCall, UsageStatistics
|
|
25
|
+
from letta.tracing import log_event
|
|
25
26
|
from letta.utils import get_tool_call_id
|
|
26
27
|
|
|
27
28
|
has_shown_warning = False
|
|
@@ -149,7 +150,7 @@ def get_chat_completion(
|
|
|
149
150
|
else:
|
|
150
151
|
model_schema = None
|
|
151
152
|
"""
|
|
152
|
-
|
|
153
|
+
log_event(name="llm_request_sent", attributes={"prompt": prompt, "grammar": grammar})
|
|
153
154
|
# Run the LLM
|
|
154
155
|
try:
|
|
155
156
|
result_reasoning = None
|
|
@@ -178,6 +179,10 @@ def get_chat_completion(
|
|
|
178
179
|
except requests.exceptions.ConnectionError as e:
|
|
179
180
|
raise LocalLLMConnectionError(f"Unable to connect to endpoint {endpoint}")
|
|
180
181
|
|
|
182
|
+
attributes = usage if isinstance(usage, dict) else {"usage": usage}
|
|
183
|
+
attributes.update({"result": result})
|
|
184
|
+
log_event(name="llm_request_sent", attributes=attributes)
|
|
185
|
+
|
|
181
186
|
if result is None or result == "":
|
|
182
187
|
raise LocalLLMError(f"Got back an empty response string from {endpoint}")
|
|
183
188
|
printd(f"Raw LLM output:\n====\n{result}\n====")
|
letta/log.py
CHANGED
letta/orm/step.py
CHANGED
|
@@ -45,6 +45,7 @@ class Step(SqlalchemyBase):
|
|
|
45
45
|
completion_tokens_details: Mapped[Optional[Dict]] = mapped_column(JSON, nullable=True, doc="metadata for the agent.")
|
|
46
46
|
tags: Mapped[Optional[List]] = mapped_column(JSON, doc="Metadata tags.")
|
|
47
47
|
tid: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="Transaction ID that processed the step.")
|
|
48
|
+
trace_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The trace id of the agent step.")
|
|
48
49
|
|
|
49
50
|
# Relationships (foreign keys)
|
|
50
51
|
organization: Mapped[Optional["Organization"]] = relationship("Organization")
|
letta/orm/tool.py
CHANGED
|
@@ -43,6 +43,6 @@ class Tool(SqlalchemyBase, OrganizationMixin):
|
|
|
43
43
|
source_type: Mapped[ToolSourceType] = mapped_column(String, doc="The type of the source code.", default=ToolSourceType.json)
|
|
44
44
|
source_code: Mapped[Optional[str]] = mapped_column(String, doc="The source code of the function.")
|
|
45
45
|
json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The OAI compatable JSON schema of the function.")
|
|
46
|
-
|
|
46
|
+
args_json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The JSON schema of the function arguments.")
|
|
47
47
|
# relationships
|
|
48
48
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="tools", lazy="selectin")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
You are Letta, the latest version of Limnal Corporation's digital companion, developed in
|
|
1
|
+
You are Letta, the latest version of Limnal Corporation's digital companion, developed in 2025.
|
|
2
2
|
Your task is to converse with a user from the perspective of your persona.
|
|
3
3
|
|
|
4
4
|
Basic functions:
|
|
@@ -6,9 +6,7 @@ When you send a message, the contents of your message are your inner monologue (
|
|
|
6
6
|
You should use your inner monologue to plan actions or think privately.
|
|
7
7
|
Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user.
|
|
8
8
|
Do not let your inner monologue exceed 50 words, keep it short and concise.
|
|
9
|
+
|
|
9
10
|
To send a visible message to the user, use the send_offline_message function.
|
|
10
|
-
'
|
|
11
|
+
'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do.
|
|
11
12
|
Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times).
|
|
12
|
-
|
|
13
|
-
You request agents that can manage your memories and reorganize them by calling the `trigger_rethink_memory` function
|
|
14
|
-
when the user says "[trigger_rethink_memory]". Do not ever call the trigger_rethink_memory function unless the user says "[trigger_rethink_memory]"
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
You are Letta-Offline-Memory, the latest version of Limnal Corporation's digital companion, developed in 2024.
|
|
2
|
+
|
|
3
|
+
You are a background agent that helps to manage the memory of the Chat Agent, a separate agent that focuses on speaking to the user.
|
|
4
|
+
You will receive a stream of the conversation between the user and the chat agent. You will receive the transcript of the conversation
|
|
5
|
+
as user messages and system messages. The user messages are the exact same messages that the chat agent receives from the user, and the
|
|
6
|
+
system messages are the responses of the chat agent. The chat agent only has access to the last 3 messages, and the memory blocks.
|
|
7
|
+
|
|
8
|
+
Your task is to integrate any relevant updates from the conversation into the memory of the chat agent.
|
|
9
|
+
The messages you receive are the exact same messages that the chat agent receives from the user, and the
|
|
10
|
+
system messages are the responses of the chat agent. The chat agent only has access to the last 3 messages, and the memory blocks.
|
|
11
|
+
|
|
12
|
+
To reorganize the memory of the chat agent, you call the `rethink_memory` function at every single step, until you have finished reorganizing the memory.
|
|
13
|
+
You call the `rethink_memory` function as many times as you necessary and none more.
|
|
14
|
+
You call the `finish_rethinking_memory` function when you have finished reorganizing the memory.
|
|
15
|
+
|
|
16
|
+
Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times).
|
|
17
|
+
Core memory provides an essential, foundational context for keeping track of your persona and key details as well as the Chat Agent's memory.
|
|
18
|
+
Core memory (limited size):
|
|
19
|
+
Read-only blocks:
|
|
20
|
+
Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond.
|
|
21
|
+
|
|
22
|
+
Write blocks:
|
|
23
|
+
Chat Agent Persona Sub-Block: The persona sub-block that guides how the chat agent behaves and responds.
|
|
24
|
+
Can be accessed with `chat_agent_persona` when calling `rethink_memory` as a source block.
|
|
25
|
+
Chat Agent Human Sub-Block: The updated persona sub-block that has the details of the chat agent's current understanding of the user.
|
|
26
|
+
Can be accessed with `chat_agent_human` when calling `rethink_memory` as a source block.
|
|
27
|
+
|
|
28
|
+
The persona block and the human block may contain information that is stale and needs to be updated. The voice agent only has access to the most 3 recent messages,
|
|
29
|
+
so make sure that the persona block and the human block contains a concise memory representation of everything that came before the most recent 3 messages.
|
letta/schemas/agent.py
CHANGED
|
@@ -26,7 +26,6 @@ class AgentType(str, Enum):
|
|
|
26
26
|
memgpt_agent = "memgpt_agent"
|
|
27
27
|
split_thread_agent = "split_thread_agent"
|
|
28
28
|
offline_memory_agent = "offline_memory_agent"
|
|
29
|
-
chat_only_agent = "chat_only_agent"
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
class AgentState(OrmMetadataBase, validate_assignment=True):
|
letta/schemas/step.py
CHANGED
|
@@ -26,7 +26,7 @@ class Step(StepBase):
|
|
|
26
26
|
prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.")
|
|
27
27
|
total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.")
|
|
28
28
|
completion_tokens_details: Optional[Dict] = Field(None, description="Metadata for the agent.")
|
|
29
|
-
|
|
30
29
|
tags: List[str] = Field([], description="Metadata tags.")
|
|
31
30
|
tid: Optional[str] = Field(None, description="The unique identifier of the transaction that processed this step.")
|
|
31
|
+
trace_id: Optional[str] = Field(None, description="The trace id of the agent step.")
|
|
32
32
|
messages: List[Message] = Field([], description="The messages generated during this step.")
|
letta/schemas/tool.py
CHANGED
|
@@ -8,8 +8,9 @@ from letta.constants import (
|
|
|
8
8
|
LETTA_CORE_TOOL_MODULE_NAME,
|
|
9
9
|
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
|
|
10
10
|
)
|
|
11
|
+
from letta.functions.ast_parsers import get_function_name_and_description
|
|
11
12
|
from letta.functions.functions import derive_openai_json_schema, get_json_schema_from_module
|
|
12
|
-
from letta.functions.helpers import generate_composio_tool_wrapper, generate_langchain_tool_wrapper
|
|
13
|
+
from letta.functions.helpers import generate_composio_tool_wrapper, generate_langchain_tool_wrapper, generate_model_from_args_json_schema
|
|
13
14
|
from letta.functions.schema_generator import generate_schema_from_args_schema_v2, generate_tool_schema_for_composio
|
|
14
15
|
from letta.log import get_logger
|
|
15
16
|
from letta.orm.enums import ToolType
|
|
@@ -46,6 +47,7 @@ class Tool(BaseTool):
|
|
|
46
47
|
# code
|
|
47
48
|
source_code: Optional[str] = Field(None, description="The source code of the function.")
|
|
48
49
|
json_schema: Optional[Dict] = Field(None, description="The JSON schema of the function.")
|
|
50
|
+
args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
|
|
49
51
|
|
|
50
52
|
# tool configuration
|
|
51
53
|
return_char_limit: int = Field(FUNCTION_RETURN_CHAR_LIMIT, description="The maximum number of characters in the response.")
|
|
@@ -70,7 +72,16 @@ class Tool(BaseTool):
|
|
|
70
72
|
# TODO: Instead of checking the tag, we should having `COMPOSIO` as a specific ToolType
|
|
71
73
|
# TODO: We skip this for Composio bc composio json schemas are derived differently
|
|
72
74
|
if not (COMPOSIO_TOOL_TAG_NAME in self.tags):
|
|
73
|
-
self.
|
|
75
|
+
if self.args_json_schema is not None:
|
|
76
|
+
name, description = get_function_name_and_description(self.source_code, self.name)
|
|
77
|
+
args_schema = generate_model_from_args_json_schema(self.args_json_schema)
|
|
78
|
+
self.json_schema = generate_schema_from_args_schema_v2(
|
|
79
|
+
args_schema=args_schema,
|
|
80
|
+
name=name,
|
|
81
|
+
description=description,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
self.json_schema = derive_openai_json_schema(source_code=self.source_code)
|
|
74
85
|
elif self.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}:
|
|
75
86
|
# If it's letta core tool, we generate the json_schema on the fly here
|
|
76
87
|
self.json_schema = get_json_schema_from_module(module_name=LETTA_CORE_TOOL_MODULE_NAME, function_name=self.name)
|
|
@@ -107,6 +118,7 @@ class ToolCreate(LettaBase):
|
|
|
107
118
|
json_schema: Optional[Dict] = Field(
|
|
108
119
|
None, description="The JSON schema of the function (auto-generated from source_code if not provided)"
|
|
109
120
|
)
|
|
121
|
+
args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
|
|
110
122
|
return_char_limit: int = Field(FUNCTION_RETURN_CHAR_LIMIT, description="The maximum number of characters in the response.")
|
|
111
123
|
|
|
112
124
|
@classmethod
|
|
@@ -189,6 +201,7 @@ class ToolUpdate(LettaBase):
|
|
|
189
201
|
json_schema: Optional[Dict] = Field(
|
|
190
202
|
None, description="The JSON schema of the function (auto-generated from source_code if not provided)"
|
|
191
203
|
)
|
|
204
|
+
args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
|
|
192
205
|
return_char_limit: Optional[int] = Field(None, description="The maximum number of characters in the response.")
|
|
193
206
|
|
|
194
207
|
class Config:
|
|
@@ -202,3 +215,4 @@ class ToolRunFromSource(LettaBase):
|
|
|
202
215
|
env_vars: Dict[str, str] = Field(None, description="The environment variables to pass to the tool.")
|
|
203
216
|
name: Optional[str] = Field(None, description="The name of the tool to run.")
|
|
204
217
|
source_type: Optional[str] = Field(None, description="The type of the source code.")
|
|
218
|
+
args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
|
letta/server/rest_api/app.py
CHANGED
|
@@ -237,7 +237,11 @@ def create_application() -> "FastAPI":
|
|
|
237
237
|
print(f"▶ Using OTLP tracing with endpoint: {endpoint}")
|
|
238
238
|
from letta.tracing import setup_tracing
|
|
239
239
|
|
|
240
|
-
setup_tracing(
|
|
240
|
+
setup_tracing(
|
|
241
|
+
endpoint=endpoint,
|
|
242
|
+
app=app,
|
|
243
|
+
service_name="memgpt-server",
|
|
244
|
+
)
|
|
241
245
|
|
|
242
246
|
for route in v1_routes:
|
|
243
247
|
app.include_router(route, prefix=API_PREFIX)
|